diff --git a/.github/workflows/check-valid-milestone.yml b/.github/workflows/check-valid-milestone.yml new file mode 100644 index 000000000..0e9357050 --- /dev/null +++ b/.github/workflows/check-valid-milestone.yml @@ -0,0 +1,100 @@ +name: PR Milestone Check + +on: + pull_request: + types: [opened, reopened, edited, synchronize, milestoned, demilestoned] + branches: + - "main" + - "release-v*.*.*" + +jobs: + check-milestone: + name: Check PR Milestone + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Validate PR milestone + uses: actions/github-script@v6 + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + script: | + // Get PR number directly from the event payload + const prNumber = context.payload.pull_request.number; + + // Get PR details + const { data: prData } = await github.rest.pulls.get({ + owner: 'meilisearch', + repo: 'meilisearch', + pull_number: prNumber + }); + + // Get base branch name + const baseBranch = prData.base.ref; + console.log(`Base branch: ${baseBranch}`); + + // Get PR milestone + const prMilestone = prData.milestone; + if (!prMilestone) { + core.setFailed('PR must have a milestone assigned'); + return; + } + console.log(`PR milestone: ${prMilestone.title}`); + + // Validate milestone format: vx.y.z + const milestoneRegex = /^v\d+\.\d+\.\d+$/; + if (!milestoneRegex.test(prMilestone.title)) { + core.setFailed(`Milestone "${prMilestone.title}" does not follow the required format vx.y.z`); + return; + } + + // For main branch PRs, check if the milestone is the highest one + if (baseBranch === 'main') { + // Get all milestones + const { data: milestones } = await github.rest.issues.listMilestones({ + owner: 'meilisearch', + repo: 'meilisearch', + state: 'open', + sort: 'due_on', + direction: 'desc' + }); + + // Sort milestones by version number (vx.y.z) + const sortedMilestones = milestones + .filter(m => milestoneRegex.test(m.title)) + .sort((a, b) => { + const versionA = a.title.substring(1).split('.').map(Number); + const versionB = b.title.substring(1).split('.').map(Number); + + // Compare major version + if (versionA[0] !== versionB[0]) return versionB[0] - versionA[0]; + // Compare minor version + if (versionA[1] !== versionB[1]) return versionB[1] - versionA[1]; + // Compare patch version + return versionB[2] - versionA[2]; + }); + + if (sortedMilestones.length === 0) { + core.setFailed('No valid milestones found in the repository. Please create at least one milestone with the format vx.y.z'); + return; + } + + const highestMilestone = sortedMilestones[0]; + console.log(`Highest milestone: ${highestMilestone.title}`); + + if (prMilestone.title !== highestMilestone.title) { + core.setFailed(`PRs targeting the main branch must use the highest milestone (${highestMilestone.title}), but this PR uses ${prMilestone.title}`); + return; + } + } else { + // For release branches, the milestone should match the branch version + const branchVersion = baseBranch.substring(8); // remove 'release-' + if (prMilestone.title !== branchVersion) { + core.setFailed(`PRs targeting release branch "${baseBranch}" must use the matching milestone "${branchVersion}", but this PR uses "${prMilestone.title}"`); + return; + } + } + + console.log('PR milestone validation passed!'); diff --git a/.github/workflows/test-suite.yml b/.github/workflows/test-suite.yml index 81f7228fd..feb95d8ad 100644 --- a/.github/workflows/test-suite.yml +++ b/.github/workflows/test-suite.yml @@ -25,7 +25,7 @@ jobs: # Use ubuntu-22.04 to compile with glibc 2.35 image: ubuntu:22.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Install needed dependencies run: | apt-get update && apt-get install -y curl diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 092eb10e9..26d5b74b4 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -95,6 +95,11 @@ Meilisearch follows the [cargo xtask](https://github.com/matklad/cargo-xtask) wo Run `cargo xtask --help` from the root of the repository to find out what is available. +#### Update the openAPI file if the API changed + +To update the openAPI file in the code, see [sprint_issue.md](https://github.com/meilisearch/meilisearch/blob/main/.github/ISSUE_TEMPLATE/sprint_issue.md#reminders-when-modifying-the-api). +If you want to update the openAPI file on the [open-api repository](https://github.com/meilisearch/open-api), see [update-openapi-issue.md](https://github.com/meilisearch/engine-team/blob/main/issue-templates/update-openapi-issue.md). + ### Logging Meilisearch uses [`tracing`](https://lib.rs/crates/tracing) for logging purposes. Tracing logs are structured and can be displayed as JSON to the end user, so prefer passing arguments as fields rather than interpolating them in the message. diff --git a/Cargo.lock b/Cargo.lock index 480dc782e..aa0020617 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -503,7 +503,7 @@ source = "git+https://github.com/meilisearch/bbqueue#cbb87cc707b5af415ef203bdaf2 [[package]] name = "benchmarks" -version = "1.13.1" +version = "1.13.3" dependencies = [ "anyhow", "bumpalo", @@ -694,7 +694,7 @@ dependencies = [ [[package]] name = "build-info" -version = "1.13.1" +version = "1.13.3" dependencies = [ "anyhow", "time", @@ -1671,7 +1671,7 @@ dependencies = [ [[package]] name = "dump" -version = "1.13.1" +version = "1.13.3" dependencies = [ "anyhow", "big_s", @@ -1873,7 +1873,7 @@ checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" [[package]] name = "file-store" -version = "1.13.1" +version = "1.13.3" dependencies = [ "tempfile", "thiserror 2.0.9", @@ -1895,7 +1895,7 @@ dependencies = [ [[package]] name = "filter-parser" -version = "1.13.1" +version = "1.13.3" dependencies = [ "insta", "nom", @@ -1915,7 +1915,7 @@ dependencies = [ [[package]] name = "flatten-serde-json" -version = "1.13.1" +version = "1.13.3" dependencies = [ "criterion", "serde_json", @@ -2054,7 +2054,7 @@ dependencies = [ [[package]] name = "fuzzers" -version = "1.13.1" +version = "1.13.3" dependencies = [ "arbitrary", "bumpalo", @@ -2743,7 +2743,7 @@ checksum = "206ca75c9c03ba3d4ace2460e57b189f39f43de612c2f85836e65c929701bb2d" [[package]] name = "index-scheduler" -version = "1.13.1" +version = "1.13.3" dependencies = [ "anyhow", "arroy 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -2950,7 +2950,7 @@ dependencies = [ [[package]] name = "json-depth-checker" -version = "1.13.1" +version = "1.13.3" dependencies = [ "criterion", "serde_json", @@ -3569,7 +3569,7 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" [[package]] name = "meili-snap" -version = "1.13.1" +version = "1.13.3" dependencies = [ "insta", "md5", @@ -3578,7 +3578,7 @@ dependencies = [ [[package]] name = "meilisearch" -version = "1.13.1" +version = "1.13.3" dependencies = [ "actix-cors", "actix-http", @@ -3670,7 +3670,7 @@ dependencies = [ [[package]] name = "meilisearch-auth" -version = "1.13.1" +version = "1.13.3" dependencies = [ "base64 0.22.1", "enum-iterator", @@ -3689,7 +3689,7 @@ dependencies = [ [[package]] name = "meilisearch-types" -version = "1.13.1" +version = "1.13.3" dependencies = [ "actix-web", "anyhow", @@ -3723,7 +3723,7 @@ dependencies = [ [[package]] name = "meilitool" -version = "1.13.1" +version = "1.13.3" dependencies = [ "anyhow", "arroy 0.5.0 (git+https://github.com/meilisearch/arroy/?tag=DO-NOT-DELETE-upgrade-v04-to-v05)", @@ -3758,7 +3758,7 @@ dependencies = [ [[package]] name = "milli" -version = "1.13.1" +version = "1.13.3" dependencies = [ "allocator-api2", "arroy 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -4270,7 +4270,7 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] name = "permissive-json-pointer" -version = "1.13.1" +version = "1.13.3" dependencies = [ "big_s", "serde_json", @@ -6847,7 +6847,7 @@ dependencies = [ [[package]] name = "xtask" -version = "1.13.1" +version = "1.13.3" dependencies = [ "anyhow", "build-info", diff --git a/Cargo.toml b/Cargo.toml index ce1e119e1..0a16810af 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,7 +22,7 @@ members = [ ] [workspace.package] -version = "1.13.1" +version = "1.13.3" authors = [ "Quentin de Quelen ", "Clément Renault ", diff --git a/bors.toml b/bors.toml index 71a8748b8..3d04b834c 100644 --- a/bors.toml +++ b/bors.toml @@ -6,6 +6,5 @@ status = [ 'Run Rustfmt', 'Run tests in debug', ] -pr_status = ['Milestone Check'] # 3 hours timeout timeout-sec = 10800 diff --git a/crates/index-scheduler/src/index_mapper/mod.rs b/crates/index-scheduler/src/index_mapper/mod.rs index 48e29508f..32cfa94ad 100644 --- a/crates/index-scheduler/src/index_mapper/mod.rs +++ b/crates/index-scheduler/src/index_mapper/mod.rs @@ -102,6 +102,10 @@ pub struct IndexStats { /// Stats of the documents database. #[serde(default)] pub documents_database_stats: DatabaseStats, + + #[serde(default, skip_serializing)] + pub number_of_documents: Option, + /// Size taken up by the index' DB, in bytes. /// /// This includes the size taken by both the used and free pages of the DB, and as the free pages @@ -143,6 +147,7 @@ impl IndexStats { number_of_embeddings: Some(arroy_stats.number_of_embeddings), number_of_embedded_documents: Some(arroy_stats.documents.len()), documents_database_stats: index.documents_stats(rtxn)?.unwrap_or_default(), + number_of_documents: None, database_size: index.on_disk_size()?, used_database_size: index.used_size()?, primary_key: index.primary_key(rtxn)?.map(|s| s.to_string()), diff --git a/crates/index-scheduler/src/scheduler/snapshots/test.rs/test_settings_update/after_registering_settings_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/test_settings_update/after_registering_settings_task.snap index fb2a9de43..d9d8b0724 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test.rs/test_settings_update/after_registering_settings_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/test_settings_update/after_registering_settings_task.snap @@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: enqueued [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test.rs/test_settings_update/settings_update_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test.rs/test_settings_update/settings_update_processed.snap index f503e2a56..ca8a3e137 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test.rs/test_settings_update/settings_update_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test.rs/test_settings_update/settings_update_processed.snap @@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(4), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: enqueued [] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/Intel to kefir succeeds.snap b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/Intel to kefir succeeds.snap index fcbab1a07..74cdb9bc1 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/Intel to kefir succeeds.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/Intel to kefir succeeds.snap @@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_embedders.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} 1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} 2 {uid: 2, batch_uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} ---------------------------------------------------------------------- diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/Intel to kefir.snap b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/Intel to kefir.snap index d6a677999..16858361e 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/Intel to kefir.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/Intel to kefir.snap @@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_embedders.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} 1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} 2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} ---------------------------------------------------------------------- diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/adding Intel succeeds.snap b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/adding Intel succeeds.snap index 2dc23b3b4..8daa10244 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/adding Intel succeeds.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/adding Intel succeeds.snap @@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_embedders.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} 1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/after adding Intel.snap b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/after adding Intel.snap index 818cdd474..87a9ec11c 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/after adding Intel.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/after adding Intel.snap @@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_embedders.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} 1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/after_registering_settings_task_vectors.snap b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/after_registering_settings_task_vectors.snap index 172f80633..35bd9dee9 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/after_registering_settings_task_vectors.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/after_registering_settings_task_vectors.snap @@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_embedders.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: enqueued [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/settings_update_processed_vectors.snap b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/settings_update_processed_vectors.snap index 1635614e8..40e8f63e9 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/settings_update_processed_vectors.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_embedders.rs/import_vectors/settings_update_processed_vectors.snap @@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_embedders.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, pooling: NotSet, api_key: Set("My super secret"), dimensions: Set(384), binary_quantized: NotSet, document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), pooling: NotSet, api_key: NotSet, dimensions: NotSet, binary_quantized: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, search_embedder: NotSet, indexing_embedder: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: enqueued [] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/after_processing_everything.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/after_processing_everything.snap index e7b50dfea..f7527e04c 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/after_processing_everything.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/after_processing_everything.snap @@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 13, 1) }, kind: UpgradeDatabase { from: (1, 12, 0) }} +0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 13, 3) }, kind: UpgradeDatabase { from: (1, 12, 0) }} 1 {uid: 1, batch_uid: 1, status: succeeded, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} 2 {uid: 2, batch_uid: 2, status: succeeded, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }} 3 {uid: 3, batch_uid: 3, status: failed, error: ResponseError { code: 200, message: "Index `doggo` already exists.", error_code: "index_already_exists", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#index_already_exists" }, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }} @@ -57,7 +57,7 @@ girafo: { number_of_documents: 0, field_distribution: {} } [timestamp] [4,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.13.1"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, } +0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.13.3"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, } 1 {uid: 1, details: {"primaryKey":"mouse"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"catto":1}}, } 2 {uid: 2, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, } 3 {uid: 3, details: {"primaryKey":"bone"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"indexCreation":1},"indexUids":{"doggo":1}}, } diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/register_automatic_upgrade_task.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/register_automatic_upgrade_task.snap index 1bd70062e..427b782cc 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/register_automatic_upgrade_task.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/register_automatic_upgrade_task.snap @@ -1,13 +1,12 @@ --- source: crates/index-scheduler/src/scheduler/test_failure.rs -snapshot_kind: text --- ### Autobatching Enabled = true ### Processing batch None: [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 13, 1) }, kind: UpgradeDatabase { from: (1, 12, 0) }} +0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 13, 3) }, kind: UpgradeDatabase { from: (1, 12, 0) }} ---------------------------------------------------------------------- ### Status: enqueued [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/registered_a_task_while_the_upgrade_task_is_enqueued.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/registered_a_task_while_the_upgrade_task_is_enqueued.snap index ece9ba67b..7d951b451 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/registered_a_task_while_the_upgrade_task_is_enqueued.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/registered_a_task_while_the_upgrade_task_is_enqueued.snap @@ -1,13 +1,12 @@ --- source: crates/index-scheduler/src/scheduler/test_failure.rs -snapshot_kind: text --- ### Autobatching Enabled = true ### Processing batch None: [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 13, 1) }, kind: UpgradeDatabase { from: (1, 12, 0) }} +0 {uid: 0, status: enqueued, details: { from: (1, 12, 0), to: (1, 13, 3) }, kind: UpgradeDatabase { from: (1, 12, 0) }} 1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} ---------------------------------------------------------------------- ### Status: diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/upgrade_task_failed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/upgrade_task_failed.snap index 6414ed9be..e55646e9e 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/upgrade_task_failed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/upgrade_task_failed.snap @@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 13, 1) }, kind: UpgradeDatabase { from: (1, 12, 0) }} +0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 13, 3) }, kind: UpgradeDatabase { from: (1, 12, 0) }} 1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} ---------------------------------------------------------------------- ### Status: @@ -37,7 +37,7 @@ catto [1,] [timestamp] [0,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.13.1"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, } +0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.13.3"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/upgrade_task_failed_again.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/upgrade_task_failed_again.snap index 1da68c7c9..badca4d41 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/upgrade_task_failed_again.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/upgrade_task_failed_again.snap @@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 13, 1) }, kind: UpgradeDatabase { from: (1, 12, 0) }} +0 {uid: 0, batch_uid: 0, status: failed, error: ResponseError { code: 200, message: "Planned failure for tests.", error_code: "internal", error_type: "internal", error_link: "https://docs.meilisearch.com/errors#internal" }, details: { from: (1, 12, 0), to: (1, 13, 3) }, kind: UpgradeDatabase { from: (1, 12, 0) }} 1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} 2 {uid: 2, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }} ---------------------------------------------------------------------- @@ -40,7 +40,7 @@ doggo [2,] [timestamp] [0,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.13.1"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, } +0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.13.3"}, stats: {"totalNbTasks":1,"status":{"failed":1},"types":{"upgradeDatabase":1},"indexUids":{}}, } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/upgrade_task_succeeded.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/upgrade_task_succeeded.snap index fbb38c597..f1ecb40dc 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/upgrade_task_succeeded.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/upgrade_failure/upgrade_task_succeeded.snap @@ -6,7 +6,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 13, 1) }, kind: UpgradeDatabase { from: (1, 12, 0) }} +0 {uid: 0, batch_uid: 0, status: succeeded, details: { from: (1, 12, 0), to: (1, 13, 3) }, kind: UpgradeDatabase { from: (1, 12, 0) }} 1 {uid: 1, status: enqueued, details: { primary_key: Some("mouse") }, kind: IndexCreation { index_uid: "catto", primary_key: Some("mouse") }} 2 {uid: 2, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }} 3 {uid: 3, status: enqueued, details: { primary_key: Some("bone") }, kind: IndexCreation { index_uid: "doggo", primary_key: Some("bone") }} @@ -43,7 +43,7 @@ doggo [2,3,] [timestamp] [0,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.13.1"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, } +0 {uid: 0, details: {"upgradeFrom":"v1.12.0","upgradeTo":"v1.13.3"}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"upgradeDatabase":1},"indexUids":{}}, } ---------------------------------------------------------------------- ### Batch to tasks mapping: 0 [0,] diff --git a/crates/index-scheduler/src/scheduler/test_embedders.rs b/crates/index-scheduler/src/scheduler/test_embedders.rs index b1c619441..05929b651 100644 --- a/crates/index-scheduler/src/scheduler/test_embedders.rs +++ b/crates/index-scheduler/src/scheduler/test_embedders.rs @@ -104,9 +104,10 @@ fn import_vectors() { let configs = index_scheduler.embedders("doggos".to_string(), configs).unwrap(); let (hf_embedder, _, _) = configs.get(&simple_hf_name).unwrap(); - let beagle_embed = hf_embedder.embed_one(S("Intel the beagle best doggo"), None).unwrap(); - let lab_embed = hf_embedder.embed_one(S("Max the lab best doggo"), None).unwrap(); - let patou_embed = hf_embedder.embed_one(S("kefir the patou best doggo"), None).unwrap(); + let beagle_embed = + hf_embedder.embed_search(S("Intel the beagle best doggo"), None).unwrap(); + let lab_embed = hf_embedder.embed_search(S("Max the lab best doggo"), None).unwrap(); + let patou_embed = hf_embedder.embed_search(S("kefir the patou best doggo"), None).unwrap(); (fakerest_name, simple_hf_name, beagle_embed, lab_embed, patou_embed) }; diff --git a/crates/index-scheduler/src/upgrade/mod.rs b/crates/index-scheduler/src/upgrade/mod.rs index cfc351b09..017685198 100644 --- a/crates/index-scheduler/src/upgrade/mod.rs +++ b/crates/index-scheduler/src/upgrade/mod.rs @@ -24,10 +24,11 @@ pub fn upgrade_index_scheduler( let current_minor = to.1; let current_patch = to.2; - let upgrade_functions: &[&dyn UpgradeIndexScheduler] = &[&V1_12_ToCurrent {}]; + let upgrade_functions: &[&dyn UpgradeIndexScheduler] = &[&ToCurrentNoOp {}]; let start = match from { (1, 12, _) => 0, + (1, 13, _) => 0, (major, minor, patch) => { if major > current_major || (major == current_major && minor > current_minor) @@ -85,9 +86,9 @@ pub fn upgrade_index_scheduler( } #[allow(non_camel_case_types)] -struct V1_12_ToCurrent {} +struct ToCurrentNoOp {} -impl UpgradeIndexScheduler for V1_12_ToCurrent { +impl UpgradeIndexScheduler for ToCurrentNoOp { fn upgrade( &self, _env: &Env, diff --git a/crates/meilisearch-types/src/error.rs b/crates/meilisearch-types/src/error.rs index 7db4f9d9a..c7f375eff 100644 --- a/crates/meilisearch-types/src/error.rs +++ b/crates/meilisearch-types/src/error.rs @@ -429,9 +429,10 @@ impl ErrorCode for milli::Error { | UserError::InvalidUrl { .. } | UserError::InvalidSettingsDocumentTemplateMaxBytes { .. } | UserError::InvalidPrompt(_) - | UserError::InvalidDisableBinaryQuantization { .. } => { - Code::InvalidSettingsEmbedders - } + | UserError::InvalidDisableBinaryQuantization { .. } + | UserError::InvalidSourceForNested { .. } + | UserError::MissingSourceForNested { .. } + | UserError::InvalidSettingsEmbedder { .. } => Code::InvalidSettingsEmbedders, UserError::TooManyEmbedders(_) => Code::InvalidSettingsEmbedders, UserError::InvalidPromptForEmbeddings(..) => Code::InvalidSettingsEmbedders, UserError::NoPrimaryKeyCandidateFound => Code::IndexPrimaryKeyNoCandidateFound, diff --git a/crates/meilisearch/Cargo.toml b/crates/meilisearch/Cargo.toml index a917ab00f..e25fd9400 100644 --- a/crates/meilisearch/Cargo.toml +++ b/crates/meilisearch/Cargo.toml @@ -170,5 +170,5 @@ german = ["meilisearch-types/german"] turkish = ["meilisearch-types/turkish"] [package.metadata.mini-dashboard] -assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.17/build.zip" -sha1 = "29e92ce25f306208a9c86f013279c736bdc1e034" +assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.18/build.zip" +sha1 = "b408a30dcb6e20cddb0c153c23385bcac4c8e912" diff --git a/crates/meilisearch/src/lib.rs b/crates/meilisearch/src/lib.rs index e22b6dff3..948d1148b 100644 --- a/crates/meilisearch/src/lib.rs +++ b/crates/meilisearch/src/lib.rs @@ -364,7 +364,7 @@ fn check_version( let (bin_major, bin_minor, bin_patch) = binary_version; let (db_major, db_minor, db_patch) = get_version(&opt.db_path)?; - if db_major != bin_major || db_minor != bin_minor || db_patch > bin_patch { + if db_major != bin_major || db_minor != bin_minor || db_patch != bin_patch { if opt.experimental_dumpless_upgrade { update_version_file_for_dumpless_upgrade( opt, diff --git a/crates/meilisearch/src/routes/indexes/mod.rs b/crates/meilisearch/src/routes/indexes/mod.rs index bbcb3674b..5aebf5cac 100644 --- a/crates/meilisearch/src/routes/indexes/mod.rs +++ b/crates/meilisearch/src/routes/indexes/mod.rs @@ -514,7 +514,10 @@ pub struct IndexStats { impl From for IndexStats { fn from(stats: index_scheduler::IndexStats) -> Self { IndexStats { - number_of_documents: stats.inner_stats.documents_database_stats.number_of_entries(), + number_of_documents: stats + .inner_stats + .number_of_documents + .unwrap_or(stats.inner_stats.documents_database_stats.number_of_entries()), raw_document_db_size: stats.inner_stats.documents_database_stats.total_value_size(), avg_document_size: stats.inner_stats.documents_database_stats.average_value_size(), is_indexing: stats.is_indexing, diff --git a/crates/meilisearch/src/routes/indexes/settings_analytics.rs b/crates/meilisearch/src/routes/indexes/settings_analytics.rs index 627f9103e..cb5983f02 100644 --- a/crates/meilisearch/src/routes/indexes/settings_analytics.rs +++ b/crates/meilisearch/src/routes/indexes/settings_analytics.rs @@ -523,6 +523,7 @@ impl EmbeddersAnalytics { EmbedderSource::UserProvided => sources.insert("userProvided".to_string()), EmbedderSource::Ollama => sources.insert("ollama".to_string()), EmbedderSource::Rest => sources.insert("rest".to_string()), + EmbedderSource::Composite => sources.insert("composite".to_string()), }; } }; diff --git a/crates/meilisearch/src/search/mod.rs b/crates/meilisearch/src/search/mod.rs index a16f4eb6a..58a181d3b 100644 --- a/crates/meilisearch/src/search/mod.rs +++ b/crates/meilisearch/src/search/mod.rs @@ -916,7 +916,7 @@ fn prepare_search<'t>( let deadline = std::time::Instant::now() + std::time::Duration::from_secs(10); embedder - .embed_one(query.q.clone().unwrap(), Some(deadline)) + .embed_search(query.q.clone().unwrap(), Some(deadline)) .map_err(milli::vector::Error::from) .map_err(milli::Error::from)? } diff --git a/crates/meilisearch/tests/common/mod.rs b/crates/meilisearch/tests/common/mod.rs index 52aa3b32d..4d57a6163 100644 --- a/crates/meilisearch/tests/common/mod.rs +++ b/crates/meilisearch/tests/common/mod.rs @@ -34,6 +34,10 @@ impl Value { } } + pub fn has_uid(&self) -> bool { + self["uid"].as_u64().is_some() || self["taskUid"].as_u64().is_some() + } + /// Return `true` if the `status` field is set to `succeeded`. /// Panic if the `status` field doesn't exists. #[track_caller] diff --git a/crates/meilisearch/tests/settings/mod.rs b/crates/meilisearch/tests/settings/mod.rs index 67df4068a..6b61e6be0 100644 --- a/crates/meilisearch/tests/settings/mod.rs +++ b/crates/meilisearch/tests/settings/mod.rs @@ -4,3 +4,4 @@ mod get_settings; mod prefix_search_settings; mod proximity_settings; mod tokenizer_customization; +mod vectors; diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-apiKey-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-apiKey-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-apiKey-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-apiKey-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-apiKey-sending_result.snap new file mode 100644 index 000000000..3a9b5bfb8 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-apiKey-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `apiKey` unavailable for source `huggingFace`.\n - note: `apiKey` is available for sources: `openAi`, `ollama`, `rest`\n - note: available fields for source `huggingFace`: `source`, `model`, `revision`, `pooling`, `documentTemplate`, `documentTemplateMaxBytes`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-binaryQuantized-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-binaryQuantized-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-binaryQuantized-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-binaryQuantized-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-binaryQuantized-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-binaryQuantized-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-binaryQuantized-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-binaryQuantized-task_result.snap new file mode 100644 index 000000000..8f0a4edfa --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-binaryQuantized-task_result.snap @@ -0,0 +1,24 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "huggingFace", + "binaryQuantized": false + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-dimensions-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-dimensions-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-dimensions-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-dimensions-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-dimensions-sending_result.snap new file mode 100644 index 000000000..f5dc3b48f --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-dimensions-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `dimensions` unavailable for source `huggingFace`.\n - note: `dimensions` is available for sources: `openAi`, `ollama`, `userProvided`, `rest`\n - note: available fields for source `huggingFace`: `source`, `model`, `revision`, `pooling`, `documentTemplate`, `documentTemplateMaxBytes`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-model-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-model-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-model-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-model-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-model-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-model-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-model-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-model-task_result.snap new file mode 100644 index 000000000..757a7b89f --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-model-task_result.snap @@ -0,0 +1,24 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2" + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-pooling-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-pooling-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-pooling-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-pooling-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-pooling-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-pooling-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-pooling-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-pooling-task_result.snap new file mode 100644 index 000000000..12d199767 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-pooling-task_result.snap @@ -0,0 +1,24 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "huggingFace", + "pooling": "forceMean" + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-revision-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-revision-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-revision-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-revision-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-revision-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-revision-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-revision-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-revision-task_result.snap new file mode 100644 index 000000000..78d4c44cc --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/huggingFace-revision-task_result.snap @@ -0,0 +1,29 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "failed", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "huggingFace", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e" + } + } + }, + "error": { + "message": "Index `test`: Error while generating embeddings: error: fetching file from HG_HUB failed:\n - request error: https://huggingface.co/BAAI/bge-base-en-v1.5/resolve/e4ce9877abf3edfe10b0d82785e83bdcb973e22e/config.json: status code 404", + "code": "vector_embedding_error", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#vector_embedding_error" + }, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-apiKey-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-apiKey-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-apiKey-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-apiKey-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-apiKey-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-apiKey-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-apiKey-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-apiKey-task_result.snap new file mode 100644 index 000000000..ac3780eb1 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-apiKey-task_result.snap @@ -0,0 +1,26 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "ollama", + "model": "all-minilm", + "apiKey": "XXX...", + "dimensions": 768 + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-binaryQuantized-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-binaryQuantized-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-binaryQuantized-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-binaryQuantized-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-binaryQuantized-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-binaryQuantized-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-binaryQuantized-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-binaryQuantized-task_result.snap new file mode 100644 index 000000000..b9ae269bb --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-binaryQuantized-task_result.snap @@ -0,0 +1,26 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "ollama", + "model": "all-minilm", + "dimensions": 768, + "binaryQuantized": false + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-dimensions-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-dimensions-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-dimensions-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-dimensions-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-dimensions-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-dimensions-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-dimensions-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-dimensions-task_result.snap new file mode 100644 index 000000000..aef2ba2b0 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-dimensions-task_result.snap @@ -0,0 +1,25 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "ollama", + "model": "all-minilm", + "dimensions": 768 + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-model-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-model-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-model-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-model-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-model-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-model-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-model-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-model-task_result.snap new file mode 100644 index 000000000..aef2ba2b0 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-model-task_result.snap @@ -0,0 +1,25 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "ollama", + "model": "all-minilm", + "dimensions": 768 + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-pooling-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-pooling-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-pooling-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-pooling-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-pooling-sending_result.snap new file mode 100644 index 000000000..110555f8b --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-pooling-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `pooling` unavailable for source `ollama`.\n - note: `pooling` is available for sources: `huggingFace`\n - note: available fields for source `ollama`: `source`, `model`, `apiKey`, `dimensions`, `documentTemplate`, `documentTemplateMaxBytes`, `url`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-revision-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-revision-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-revision-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-revision-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-revision-sending_result.snap new file mode 100644 index 000000000..a220caa82 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/ollama-revision-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `revision` unavailable for source `ollama`.\n - note: `revision` is available for sources: `huggingFace`\n - note: available fields for source `ollama`: `source`, `model`, `apiKey`, `dimensions`, `documentTemplate`, `documentTemplateMaxBytes`, `url`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-apiKey-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-apiKey-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-apiKey-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-apiKey-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-apiKey-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-apiKey-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-apiKey-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-apiKey-task_result.snap new file mode 100644 index 000000000..0cca31fb7 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-apiKey-task_result.snap @@ -0,0 +1,24 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "openAi", + "apiKey": "XXX..." + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-binaryQuantized-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-binaryQuantized-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-binaryQuantized-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-binaryQuantized-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-binaryQuantized-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-binaryQuantized-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-binaryQuantized-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-binaryQuantized-task_result.snap new file mode 100644 index 000000000..329e88cac --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-binaryQuantized-task_result.snap @@ -0,0 +1,24 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "openAi", + "binaryQuantized": false + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-dimensions-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-dimensions-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-dimensions-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-dimensions-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-dimensions-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-dimensions-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-dimensions-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-dimensions-task_result.snap new file mode 100644 index 000000000..b63a458ca --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-dimensions-task_result.snap @@ -0,0 +1,24 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "openAi", + "dimensions": 768 + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-model-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-model-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-model-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-model-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-model-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-model-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-model-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-model-task_result.snap new file mode 100644 index 000000000..daa87d395 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-model-task_result.snap @@ -0,0 +1,24 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "openAi", + "model": "text-embedding-3-small" + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-pooling-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-pooling-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-pooling-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-pooling-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-pooling-sending_result.snap new file mode 100644 index 000000000..958b5184a --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-pooling-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `pooling` unavailable for source `openAi`.\n - note: `pooling` is available for sources: `huggingFace`\n - note: available fields for source `openAi`: `source`, `model`, `apiKey`, `dimensions`, `documentTemplate`, `documentTemplateMaxBytes`, `url`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-revision-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-revision-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-revision-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-revision-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-revision-sending_result.snap new file mode 100644 index 000000000..acfdeac87 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/openAi-revision-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `revision` unavailable for source `openAi`.\n - note: `revision` is available for sources: `huggingFace`\n - note: available fields for source `openAi`: `source`, `model`, `apiKey`, `dimensions`, `documentTemplate`, `documentTemplateMaxBytes`, `url`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-apiKey-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-apiKey-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-apiKey-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-apiKey-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-apiKey-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-apiKey-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-apiKey-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-apiKey-task_result.snap new file mode 100644 index 000000000..ed8a6b2ea --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-apiKey-task_result.snap @@ -0,0 +1,32 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "rest", + "apiKey": "XXX...", + "dimensions": 768, + "url": "http://rest.example/", + "request": { + "text": "{{text}}" + }, + "response": { + "embedding": "{{embedding}}" + } + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-binaryQuantized-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-binaryQuantized-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-binaryQuantized-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-binaryQuantized-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-binaryQuantized-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-binaryQuantized-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-binaryQuantized-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-binaryQuantized-task_result.snap new file mode 100644 index 000000000..12fd314f5 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-binaryQuantized-task_result.snap @@ -0,0 +1,32 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "rest", + "dimensions": 768, + "binaryQuantized": false, + "url": "http://rest.example/", + "request": { + "text": "{{text}}" + }, + "response": { + "embedding": "{{embedding}}" + } + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-dimensions-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-dimensions-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-dimensions-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-dimensions-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-dimensions-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-dimensions-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-dimensions-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-dimensions-task_result.snap new file mode 100644 index 000000000..4f1bbf136 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-dimensions-task_result.snap @@ -0,0 +1,31 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "rest", + "dimensions": 768, + "url": "http://rest.example/", + "request": { + "text": "{{text}}" + }, + "response": { + "embedding": "{{embedding}}" + } + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-model-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-model-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-model-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-model-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-model-sending_result.snap new file mode 100644 index 000000000..8ac20a01c --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-model-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `model` unavailable for source `rest`.\n - note: `model` is available for sources: `openAi`, `huggingFace`, `ollama`\n - note: available fields for source `rest`: `source`, `apiKey`, `dimensions`, `documentTemplate`, `documentTemplateMaxBytes`, `url`, `request`, `response`, `headers`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-pooling-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-pooling-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-pooling-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-pooling-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-pooling-sending_result.snap new file mode 100644 index 000000000..31a2a7d15 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-pooling-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `pooling` unavailable for source `rest`.\n - note: `pooling` is available for sources: `huggingFace`\n - note: available fields for source `rest`: `source`, `apiKey`, `dimensions`, `documentTemplate`, `documentTemplateMaxBytes`, `url`, `request`, `response`, `headers`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-revision-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-revision-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-revision-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-revision-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-revision-sending_result.snap new file mode 100644 index 000000000..d732ac50c --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/rest-revision-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `revision` unavailable for source `rest`.\n - note: `revision` is available for sources: `huggingFace`\n - note: available fields for source `rest`: `source`, `apiKey`, `dimensions`, `documentTemplate`, `documentTemplateMaxBytes`, `url`, `request`, `response`, `headers`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-apiKey-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-apiKey-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-apiKey-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-apiKey-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-apiKey-sending_result.snap new file mode 100644 index 000000000..e47bd1e7f --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-apiKey-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `apiKey` unavailable for source `userProvided`.\n - note: `apiKey` is available for sources: `openAi`, `ollama`, `rest`\n - note: available fields for source `userProvided`: `source`, `dimensions`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-binaryQuantized-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-binaryQuantized-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-binaryQuantized-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-binaryQuantized-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-binaryQuantized-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-binaryQuantized-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-binaryQuantized-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-binaryQuantized-task_result.snap new file mode 100644 index 000000000..93102fbe5 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-binaryQuantized-task_result.snap @@ -0,0 +1,25 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "userProvided", + "dimensions": 768, + "binaryQuantized": false + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-dimensions-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-dimensions-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-dimensions-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-dimensions-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-dimensions-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-dimensions-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-dimensions-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-dimensions-task_result.snap new file mode 100644 index 000000000..e095014fd --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-dimensions-task_result.snap @@ -0,0 +1,24 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "userProvided", + "dimensions": 768 + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-model-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-model-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-model-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-model-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-model-sending_result.snap new file mode 100644 index 000000000..acb26f215 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-model-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `model` unavailable for source `userProvided`.\n - note: `model` is available for sources: `openAi`, `huggingFace`, `ollama`\n - note: available fields for source `userProvided`: `source`, `dimensions`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-pooling-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-pooling-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-pooling-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-pooling-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-pooling-sending_result.snap new file mode 100644 index 000000000..466826779 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-pooling-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `pooling` unavailable for source `userProvided`.\n - note: `pooling` is available for sources: `huggingFace`\n - note: available fields for source `userProvided`: `source`, `dimensions`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-revision-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-revision-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-revision-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-revision-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-revision-sending_result.snap new file mode 100644 index 000000000..821d9550d --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters/userProvided-revision-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `revision` unavailable for source `userProvided`.\n - note: `revision` is available for sources: `huggingFace`\n - note: available fields for source `userProvided`: `source`, `dimensions`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-distribution-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-distribution-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-distribution-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-distribution-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-distribution-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-distribution-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-distribution-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-distribution-task_result.snap new file mode 100644 index 000000000..0c60b1c6e --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-distribution-task_result.snap @@ -0,0 +1,27 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "huggingFace", + "distribution": { + "mean": 0.4, + "sigma": 0.1 + } + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-documentTemplate-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-documentTemplate-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-documentTemplate-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-documentTemplate-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-documentTemplate-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-documentTemplate-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-documentTemplate-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-documentTemplate-task_result.snap new file mode 100644 index 000000000..b7f10fd11 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-documentTemplate-task_result.snap @@ -0,0 +1,24 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "huggingFace", + "documentTemplate": "toto" + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-documentTemplateMaxBytes-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-documentTemplateMaxBytes-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-documentTemplateMaxBytes-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-documentTemplateMaxBytes-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-documentTemplateMaxBytes-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-documentTemplateMaxBytes-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-documentTemplateMaxBytes-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-documentTemplateMaxBytes-task_result.snap new file mode 100644 index 000000000..93401b927 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-documentTemplateMaxBytes-task_result.snap @@ -0,0 +1,24 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "huggingFace", + "documentTemplateMaxBytes": 200 + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-headers-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-headers-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-headers-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-headers-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-headers-sending_result.snap new file mode 100644 index 000000000..38f95e6cb --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-headers-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `headers` unavailable for source `huggingFace`.\n - note: `headers` is available for sources: `rest`\n - note: available fields for source `huggingFace`: `source`, `model`, `revision`, `pooling`, `documentTemplate`, `documentTemplateMaxBytes`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-request-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-request-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-request-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-request-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-request-sending_result.snap new file mode 100644 index 000000000..83fc14a3f --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-request-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `request` unavailable for source `huggingFace`.\n - note: `request` is available for sources: `rest`\n - note: available fields for source `huggingFace`: `source`, `model`, `revision`, `pooling`, `documentTemplate`, `documentTemplateMaxBytes`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-response-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-response-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-response-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-response-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-response-sending_result.snap new file mode 100644 index 000000000..f4e2f4a6f --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-response-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `response` unavailable for source `huggingFace`.\n - note: `response` is available for sources: `rest`\n - note: available fields for source `huggingFace`: `source`, `model`, `revision`, `pooling`, `documentTemplate`, `documentTemplateMaxBytes`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-url-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-url-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-url-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-url-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-url-sending_result.snap new file mode 100644 index 000000000..3f18f89bd --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/huggingFace-url-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `url` unavailable for source `huggingFace`.\n - note: `url` is available for sources: `openAi`, `ollama`, `rest`\n - note: available fields for source `huggingFace`: `source`, `model`, `revision`, `pooling`, `documentTemplate`, `documentTemplateMaxBytes`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-distribution-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-distribution-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-distribution-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-distribution-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-distribution-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-distribution-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-distribution-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-distribution-task_result.snap new file mode 100644 index 000000000..5b0056604 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-distribution-task_result.snap @@ -0,0 +1,29 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "ollama", + "model": "all-minilm", + "dimensions": 768, + "distribution": { + "mean": 0.4, + "sigma": 0.1 + } + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-documentTemplate-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-documentTemplate-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-documentTemplate-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-documentTemplate-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-documentTemplate-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-documentTemplate-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-documentTemplate-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-documentTemplate-task_result.snap new file mode 100644 index 000000000..1b42db77b --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-documentTemplate-task_result.snap @@ -0,0 +1,26 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "ollama", + "model": "all-minilm", + "dimensions": 768, + "documentTemplate": "toto" + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-documentTemplateMaxBytes-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-documentTemplateMaxBytes-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-documentTemplateMaxBytes-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-documentTemplateMaxBytes-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-documentTemplateMaxBytes-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-documentTemplateMaxBytes-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-documentTemplateMaxBytes-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-documentTemplateMaxBytes-task_result.snap new file mode 100644 index 000000000..a2e8024a6 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-documentTemplateMaxBytes-task_result.snap @@ -0,0 +1,26 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "ollama", + "model": "all-minilm", + "dimensions": 768, + "documentTemplateMaxBytes": 200 + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-headers-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-headers-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-headers-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-headers-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-headers-sending_result.snap new file mode 100644 index 000000000..600e8271d --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-headers-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `headers` unavailable for source `ollama`.\n - note: `headers` is available for sources: `rest`\n - note: available fields for source `ollama`: `source`, `model`, `apiKey`, `dimensions`, `documentTemplate`, `documentTemplateMaxBytes`, `url`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-request-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-request-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-request-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-request-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-request-sending_result.snap new file mode 100644 index 000000000..b257b474e --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-request-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `request` unavailable for source `ollama`.\n - note: `request` is available for sources: `rest`\n - note: available fields for source `ollama`: `source`, `model`, `apiKey`, `dimensions`, `documentTemplate`, `documentTemplateMaxBytes`, `url`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-response-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-response-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-response-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-response-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-response-sending_result.snap new file mode 100644 index 000000000..de06524f1 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-response-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `response` unavailable for source `ollama`.\n - note: `response` is available for sources: `rest`\n - note: available fields for source `ollama`: `source`, `model`, `apiKey`, `dimensions`, `documentTemplate`, `documentTemplateMaxBytes`, `url`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-url-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-url-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-url-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-url-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-url-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-url-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-url-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-url-task_result.snap new file mode 100644 index 000000000..4eaf0ba2f --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/ollama-url-task_result.snap @@ -0,0 +1,31 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "failed", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "ollama", + "model": "all-minilm", + "dimensions": 768, + "url": "http://rest.example/" + } + } + }, + "error": { + "message": "Index `test`: Error while generating embeddings: user error: unsupported Ollama URL.\n - For `ollama` sources, the URL must end with `/api/embed` or `/api/embeddings`\n - Got `http://rest.example/`", + "code": "vector_embedding_error", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#vector_embedding_error" + }, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-distribution-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-distribution-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-distribution-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-distribution-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-distribution-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-distribution-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-distribution-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-distribution-task_result.snap new file mode 100644 index 000000000..eb6eaf59d --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-distribution-task_result.snap @@ -0,0 +1,27 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "openAi", + "distribution": { + "mean": 0.4, + "sigma": 0.1 + } + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-documentTemplate-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-documentTemplate-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-documentTemplate-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-documentTemplate-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-documentTemplate-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-documentTemplate-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-documentTemplate-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-documentTemplate-task_result.snap new file mode 100644 index 000000000..d1ad94953 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-documentTemplate-task_result.snap @@ -0,0 +1,24 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "openAi", + "documentTemplate": "toto" + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-documentTemplateMaxBytes-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-documentTemplateMaxBytes-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-documentTemplateMaxBytes-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-documentTemplateMaxBytes-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-documentTemplateMaxBytes-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-documentTemplateMaxBytes-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-documentTemplateMaxBytes-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-documentTemplateMaxBytes-task_result.snap new file mode 100644 index 000000000..dca04b8c2 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-documentTemplateMaxBytes-task_result.snap @@ -0,0 +1,24 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "openAi", + "documentTemplateMaxBytes": 200 + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-headers-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-headers-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-headers-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-headers-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-headers-sending_result.snap new file mode 100644 index 000000000..117268660 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-headers-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `headers` unavailable for source `openAi`.\n - note: `headers` is available for sources: `rest`\n - note: available fields for source `openAi`: `source`, `model`, `apiKey`, `dimensions`, `documentTemplate`, `documentTemplateMaxBytes`, `url`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-request-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-request-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-request-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-request-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-request-sending_result.snap new file mode 100644 index 000000000..dcf8000eb --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-request-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `request` unavailable for source `openAi`.\n - note: `request` is available for sources: `rest`\n - note: available fields for source `openAi`: `source`, `model`, `apiKey`, `dimensions`, `documentTemplate`, `documentTemplateMaxBytes`, `url`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-response-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-response-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-response-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-response-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-response-sending_result.snap new file mode 100644 index 000000000..d834bc900 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-response-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `response` unavailable for source `openAi`.\n - note: `response` is available for sources: `rest`\n - note: available fields for source `openAi`: `source`, `model`, `apiKey`, `dimensions`, `documentTemplate`, `documentTemplateMaxBytes`, `url`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-url-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-url-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-url-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-url-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-url-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-url-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-url-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-url-task_result.snap new file mode 100644 index 000000000..78d2b853e --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/openAi-url-task_result.snap @@ -0,0 +1,24 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "openAi", + "url": "http://rest.example/" + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-distribution-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-distribution-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-distribution-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-distribution-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-distribution-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-distribution-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-distribution-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-distribution-task_result.snap new file mode 100644 index 000000000..96841efcc --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-distribution-task_result.snap @@ -0,0 +1,35 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "rest", + "dimensions": 768, + "url": "http://rest.example/", + "request": { + "text": "{{text}}" + }, + "response": { + "embedding": "{{embedding}}" + }, + "distribution": { + "mean": 0.4, + "sigma": 0.1 + } + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-documentTemplate-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-documentTemplate-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-documentTemplate-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-documentTemplate-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-documentTemplate-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-documentTemplate-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-documentTemplate-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-documentTemplate-task_result.snap new file mode 100644 index 000000000..f9bb045ad --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-documentTemplate-task_result.snap @@ -0,0 +1,32 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "rest", + "dimensions": 768, + "documentTemplate": "toto", + "url": "http://rest.example/", + "request": { + "text": "{{text}}" + }, + "response": { + "embedding": "{{embedding}}" + } + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-documentTemplateMaxBytes-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-documentTemplateMaxBytes-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-documentTemplateMaxBytes-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-documentTemplateMaxBytes-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-documentTemplateMaxBytes-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-documentTemplateMaxBytes-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-documentTemplateMaxBytes-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-documentTemplateMaxBytes-task_result.snap new file mode 100644 index 000000000..5085ab19e --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-documentTemplateMaxBytes-task_result.snap @@ -0,0 +1,32 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "rest", + "dimensions": 768, + "documentTemplateMaxBytes": 200, + "url": "http://rest.example/", + "request": { + "text": "{{text}}" + }, + "response": { + "embedding": "{{embedding}}" + } + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-headers-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-headers-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-headers-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-headers-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-headers-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-headers-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-headers-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-headers-task_result.snap new file mode 100644 index 000000000..db6434f0e --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-headers-task_result.snap @@ -0,0 +1,34 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "rest", + "dimensions": 768, + "url": "http://rest.example/", + "request": { + "text": "{{text}}" + }, + "response": { + "embedding": "{{embedding}}" + }, + "headers": { + "custom": "value" + } + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-request-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-request-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-request-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-request-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-request-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-request-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-request-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-request-task_result.snap new file mode 100644 index 000000000..4f1bbf136 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-request-task_result.snap @@ -0,0 +1,31 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "rest", + "dimensions": 768, + "url": "http://rest.example/", + "request": { + "text": "{{text}}" + }, + "response": { + "embedding": "{{embedding}}" + } + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-response-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-response-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-response-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-response-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-response-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-response-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-response-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-response-task_result.snap new file mode 100644 index 000000000..4f1bbf136 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-response-task_result.snap @@ -0,0 +1,31 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "rest", + "dimensions": 768, + "url": "http://rest.example/", + "request": { + "text": "{{text}}" + }, + "response": { + "embedding": "{{embedding}}" + } + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-url-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-url-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-url-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-url-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-url-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-url-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-url-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-url-task_result.snap new file mode 100644 index 000000000..4f1bbf136 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/rest-url-task_result.snap @@ -0,0 +1,31 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "rest", + "dimensions": 768, + "url": "http://rest.example/", + "request": { + "text": "{{text}}" + }, + "response": { + "embedding": "{{embedding}}" + } + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-distribution-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-distribution-sending_code.snap new file mode 100644 index 000000000..ef52a4a70 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-distribution-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +202 Accepted diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-distribution-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-distribution-sending_result.snap new file mode 100644 index 000000000..d868ef060 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-distribution-sending_result.snap @@ -0,0 +1,10 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "taskUid": "[taskUid]", + "indexUid": "test", + "status": "enqueued", + "type": "settingsUpdate", + "enqueuedAt": "[enqueuedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-distribution-task_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-distribution-task_result.snap new file mode 100644 index 000000000..be731d19f --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-distribution-task_result.snap @@ -0,0 +1,28 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "uid": "[uid]", + "batchUid": "[batchUid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "userProvided", + "dimensions": 768, + "distribution": { + "mean": 0.4, + "sigma": 0.1 + } + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[enqueuedAt]", + "startedAt": "[startedAt]", + "finishedAt": "[finishedAt]" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-documentTemplate-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-documentTemplate-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-documentTemplate-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-documentTemplate-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-documentTemplate-sending_result.snap new file mode 100644 index 000000000..4922d21cc --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-documentTemplate-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `documentTemplate` unavailable for source `userProvided`.\n - note: `documentTemplate` is available for sources: `openAi`, `huggingFace`, `ollama`, `rest`\n - note: available fields for source `userProvided`: `source`, `dimensions`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-documentTemplateMaxBytes-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-documentTemplateMaxBytes-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-documentTemplateMaxBytes-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-documentTemplateMaxBytes-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-documentTemplateMaxBytes-sending_result.snap new file mode 100644 index 000000000..1899cc0a8 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-documentTemplateMaxBytes-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `documentTemplateMaxBytes` unavailable for source `userProvided`.\n - note: `documentTemplateMaxBytes` is available for sources: `openAi`, `huggingFace`, `ollama`, `rest`\n - note: available fields for source `userProvided`: `source`, `dimensions`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-headers-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-headers-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-headers-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-headers-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-headers-sending_result.snap new file mode 100644 index 000000000..1cd308942 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-headers-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `headers` unavailable for source `userProvided`.\n - note: `headers` is available for sources: `rest`\n - note: available fields for source `userProvided`: `source`, `dimensions`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-request-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-request-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-request-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-request-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-request-sending_result.snap new file mode 100644 index 000000000..48f8ca1eb --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-request-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `request` unavailable for source `userProvided`.\n - note: `request` is available for sources: `rest`\n - note: available fields for source `userProvided`: `source`, `dimensions`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-response-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-response-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-response-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-response-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-response-sending_result.snap new file mode 100644 index 000000000..76c1c8f68 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-response-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `response` unavailable for source `userProvided`.\n - note: `response` is available for sources: `rest`\n - note: available fields for source `userProvided`: `source`, `dimensions`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-url-sending_code.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-url-sending_code.snap new file mode 100644 index 000000000..ef5454296 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-url-sending_code.snap @@ -0,0 +1,4 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +400 Bad Request diff --git a/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-url-sending_result.snap b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-url-sending_result.snap new file mode 100644 index 000000000..7469b3943 --- /dev/null +++ b/crates/meilisearch/tests/settings/snapshots/vectors.rs/bad_parameters_2/userProvided-url-sending_result.snap @@ -0,0 +1,9 @@ +--- +source: crates/meilisearch/tests/settings/vectors.rs +--- +{ + "message": "`.embedders.test`: Field `url` unavailable for source `userProvided`.\n - note: `url` is available for sources: `openAi`, `ollama`, `rest`\n - note: available fields for source `userProvided`: `source`, `dimensions`, `distribution`, `binaryQuantized`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" +} diff --git a/crates/meilisearch/tests/settings/vectors.rs b/crates/meilisearch/tests/settings/vectors.rs new file mode 100644 index 000000000..fb7c6dbf9 --- /dev/null +++ b/crates/meilisearch/tests/settings/vectors.rs @@ -0,0 +1,269 @@ +use meili_snap::{json_string, snapshot}; + +use crate::common::{Server, Value}; + +macro_rules! parameter_test { + ($server:ident, $source:tt, $param:tt) => { + let source = stringify!($source); + let param = stringify!($param); + let index = $server.index("test"); + + let (response, _code) = index + .update_settings(crate::json!({ + "embedders": { + "test": null, + } + })) + .await; + $server.wait_task(response.uid()).await.succeeded(); + + let mut value = base_for_source(source); + value[param] = valid_parameter(source, param).0; + let (response, code) = index + .update_settings(crate::json!({ + "embedders": { + "test": value + } + })) + .await; + snapshot!(code, name: concat!(stringify!($source), "-", stringify!($param), "-sending_code")); + snapshot!(json_string!(response, {".enqueuedAt" => "[enqueuedAt]", ".taskUid" => "[taskUid]"}), name: concat!(stringify!($source), "-", stringify!($param), "-sending_result")); + + if response.has_uid() { + let response = $server.wait_task(response.uid()).await; + snapshot!(json_string!(response, {".enqueuedAt" => "[enqueuedAt]", + ".uid" => "[uid]", ".batchUid" => "[batchUid]", + ".duration" => "[duration]", + ".startedAt" => "[startedAt]", + ".finishedAt" => "[finishedAt]"}), name: concat!(stringify!($source), "-", stringify!($param), "-task_result")); + } + + }; +} + +#[actix_rt::test] +async fn bad_parameters() { + let server = Server::new().await; + + // for each source, check which parameters are allowed/disallowed + // model + // - openai + parameter_test!(server, openAi, model); + // - huggingFace + parameter_test!(server, huggingFace, model); + // - userProvided + parameter_test!(server, userProvided, model); + // - ollama + parameter_test!(server, ollama, model); + // - rest + parameter_test!(server, rest, model); + // == + + // revision + // - openai + parameter_test!(server, openAi, revision); + // - huggingFace + parameter_test!(server, huggingFace, revision); + // - userProvided + parameter_test!(server, userProvided, revision); + // - ollama + parameter_test!(server, ollama, revision); + // - rest + parameter_test!(server, rest, revision); + // == + + // pooling + // - openai + parameter_test!(server, openAi, pooling); + // - huggingFace + parameter_test!(server, huggingFace, pooling); + // - userProvided + parameter_test!(server, userProvided, pooling); + // - ollama + parameter_test!(server, ollama, pooling); + // - rest + parameter_test!(server, rest, pooling); + // == + + // apiKey + // - openai + parameter_test!(server, openAi, apiKey); + // - huggingFace + parameter_test!(server, huggingFace, apiKey); + // - userProvided + parameter_test!(server, userProvided, apiKey); + // - ollama + parameter_test!(server, ollama, apiKey); + // - rest + parameter_test!(server, rest, apiKey); + // == + + // dimensions + // - openai + parameter_test!(server, openAi, dimensions); + // - huggingFace + parameter_test!(server, huggingFace, dimensions); + // - userProvided + parameter_test!(server, userProvided, dimensions); + // - ollama + parameter_test!(server, ollama, dimensions); + // - rest + parameter_test!(server, rest, dimensions); + // == + + // binaryQuantized + // - openai + parameter_test!(server, openAi, binaryQuantized); + // - huggingFace + parameter_test!(server, huggingFace, binaryQuantized); + // - userProvided + parameter_test!(server, userProvided, binaryQuantized); + // - ollama + parameter_test!(server, ollama, binaryQuantized); + // - rest + parameter_test!(server, rest, binaryQuantized); + // == + + // for each source, check that removing mandatory parameters is a failure +} + +#[actix_rt::test] +async fn bad_parameters_2() { + let server = Server::new().await; + + // documentTemplate + // - openai + parameter_test!(server, openAi, documentTemplate); + // - huggingFace + parameter_test!(server, huggingFace, documentTemplate); + // - userProvided + parameter_test!(server, userProvided, documentTemplate); + // - ollama + parameter_test!(server, ollama, documentTemplate); + // - rest + parameter_test!(server, rest, documentTemplate); + // == + + // documentTemplateMaxBytes + // - openai + parameter_test!(server, openAi, documentTemplateMaxBytes); + // - huggingFace + parameter_test!(server, huggingFace, documentTemplateMaxBytes); + // - userProvided + parameter_test!(server, userProvided, documentTemplateMaxBytes); + // - ollama + parameter_test!(server, ollama, documentTemplateMaxBytes); + // - rest + parameter_test!(server, rest, documentTemplateMaxBytes); + // == + + // url + // - openai + parameter_test!(server, openAi, url); + // - huggingFace + parameter_test!(server, huggingFace, url); + // - userProvided + parameter_test!(server, userProvided, url); + // - ollama + parameter_test!(server, ollama, url); + // - rest + parameter_test!(server, rest, url); + // == + + // request + // - openai + parameter_test!(server, openAi, request); + // - huggingFace + parameter_test!(server, huggingFace, request); + // - userProvided + parameter_test!(server, userProvided, request); + // - ollama + parameter_test!(server, ollama, request); + // - rest + parameter_test!(server, rest, request); + // == + + // response + // - openai + parameter_test!(server, openAi, response); + // - huggingFace + parameter_test!(server, huggingFace, response); + // - userProvided + parameter_test!(server, userProvided, response); + // - ollama + parameter_test!(server, ollama, response); + // - rest + parameter_test!(server, rest, response); + // == + + // headers + // - openai + parameter_test!(server, openAi, headers); + // - huggingFace + parameter_test!(server, huggingFace, headers); + // - userProvided + parameter_test!(server, userProvided, headers); + // - ollama + parameter_test!(server, ollama, headers); + // - rest + parameter_test!(server, rest, headers); + // == + + // distribution + // - openai + parameter_test!(server, openAi, distribution); + // - huggingFace + parameter_test!(server, huggingFace, distribution); + // - userProvided + parameter_test!(server, userProvided, distribution); + // - ollama + parameter_test!(server, ollama, distribution); + // - rest + parameter_test!(server, rest, distribution); + // == +} + +fn base_for_source(source: &'static str) -> Value { + let base_parameters = maplit::btreemap! { + "openAi" => vec![], + "huggingFace" => vec![], + "userProvided" => vec!["dimensions"], + "ollama" => vec!["model", + // add dimensions to avoid actually fetching the model from ollama + "dimensions"], + "rest" => vec!["url", "request", "response", + // add dimensions to avoid actually fetching the model from ollama + "dimensions"], + }; + + let mut value = crate::json!({ + "source": source + }); + + let mandatory_parameters = base_parameters.get(source).unwrap(); + for mandatory_parameter in mandatory_parameters { + value[mandatory_parameter] = valid_parameter(source, mandatory_parameter).0; + } + value +} + +fn valid_parameter(source: &'static str, parameter: &'static str) -> Value { + match (source, parameter) { + ("openAi", "model") => crate::json!("text-embedding-3-small"), + ("huggingFace", "model") => crate::json!("sentence-transformers/all-MiniLM-L6-v2"), + (_, "model") => crate::json!("all-minilm"), + (_, "revision") => crate::json!("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), + (_, "pooling") => crate::json!("forceMean"), + (_, "apiKey") => crate::json!("foo"), + (_, "dimensions") => crate::json!(768), + (_, "binaryQuantized") => crate::json!(false), + (_, "documentTemplate") => crate::json!("toto"), + (_, "documentTemplateMaxBytes") => crate::json!(200), + (_, "url") => crate::json!("http://rest.example/"), + (_, "request") => crate::json!({"text": "{{text}}"}), + (_, "response") => crate::json!({"embedding": "{{embedding}}"}), + (_, "headers") => crate::json!({"custom": "value"}), + (_, "distribution") => crate::json!({"mean": 0.4, "sigma": 0.1}), + _ => panic!("unknown parameter"), + } +} diff --git a/crates/meilisearch/tests/snapshot/mod.rs b/crates/meilisearch/tests/snapshot/mod.rs index 0f3417cdf..a8f93f467 100644 --- a/crates/meilisearch/tests/snapshot/mod.rs +++ b/crates/meilisearch/tests/snapshot/mod.rs @@ -111,6 +111,7 @@ async fn perform_snapshot() { } #[actix_rt::test] +#[cfg_attr(target_os = "windows", ignore)] async fn perform_on_demand_snapshot() { let temp = tempfile::tempdir().unwrap(); let snapshot_dir = tempfile::tempdir().unwrap(); diff --git a/crates/meilisearch/tests/upgrade/mod.rs b/crates/meilisearch/tests/upgrade/mod.rs index ca5cf0987..4b0cb6330 100644 --- a/crates/meilisearch/tests/upgrade/mod.rs +++ b/crates/meilisearch/tests/upgrade/mod.rs @@ -43,7 +43,7 @@ async fn version_too_old() { std::fs::write(db_path.join("VERSION"), "1.11.9999").unwrap(); let options = Opt { experimental_dumpless_upgrade: true, ..default_settings }; let err = Server::new_with_options(options).await.map(|_| ()).unwrap_err(); - snapshot!(err, @"Database version 1.11.9999 is too old for the experimental dumpless upgrade feature. Please generate a dump using the v1.11.9999 and import it in the v1.13.1"); + snapshot!(err, @"Database version 1.11.9999 is too old for the experimental dumpless upgrade feature. Please generate a dump using the v1.11.9999 and import it in the v1.13.3"); } #[actix_rt::test] @@ -58,7 +58,7 @@ async fn version_requires_downgrade() { std::fs::write(db_path.join("VERSION"), format!("{major}.{minor}.{patch}")).unwrap(); let options = Opt { experimental_dumpless_upgrade: true, ..default_settings }; let err = Server::new_with_options(options).await.map(|_| ()).unwrap_err(); - snapshot!(err, @"Database version 1.13.2 is higher than the Meilisearch version 1.13.1. Downgrade is not supported"); + snapshot!(err, @"Database version 1.13.4 is higher than the Meilisearch version 1.13.3. Downgrade is not supported"); } #[actix_rt::test] diff --git a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_afterEnqueuedAt_equal_2025-01-16T16_47_41.snap b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_afterEnqueuedAt_equal_2025-01-16T16_47_41.snap index 9f41a3055..e0691b2f9 100644 --- a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_afterEnqueuedAt_equal_2025-01-16T16_47_41.snap +++ b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_afterEnqueuedAt_equal_2025-01-16T16_47_41.snap @@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "progress": null, "details": { "upgradeFrom": "v1.12.0", - "upgradeTo": "v1.13.1" + "upgradeTo": "v1.13.3" }, "stats": { "totalNbTasks": 1, diff --git a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_afterFinishedAt_equal_2025-01-16T16_47_41.snap b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_afterFinishedAt_equal_2025-01-16T16_47_41.snap index 9f41a3055..e0691b2f9 100644 --- a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_afterFinishedAt_equal_2025-01-16T16_47_41.snap +++ b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_afterFinishedAt_equal_2025-01-16T16_47_41.snap @@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "progress": null, "details": { "upgradeFrom": "v1.12.0", - "upgradeTo": "v1.13.1" + "upgradeTo": "v1.13.3" }, "stats": { "totalNbTasks": 1, diff --git a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_afterStartedAt_equal_2025-01-16T16_47_41.snap b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_afterStartedAt_equal_2025-01-16T16_47_41.snap index 9f41a3055..e0691b2f9 100644 --- a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_afterStartedAt_equal_2025-01-16T16_47_41.snap +++ b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/batches_filter_afterStartedAt_equal_2025-01-16T16_47_41.snap @@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "progress": null, "details": { "upgradeFrom": "v1.12.0", - "upgradeTo": "v1.13.1" + "upgradeTo": "v1.13.3" }, "stats": { "totalNbTasks": 1, diff --git a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/tasks_filter_afterEnqueuedAt_equal_2025-01-16T16_47_41.snap b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/tasks_filter_afterEnqueuedAt_equal_2025-01-16T16_47_41.snap index 790118967..d7bf555c7 100644 --- a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/tasks_filter_afterEnqueuedAt_equal_2025-01-16T16_47_41.snap +++ b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/tasks_filter_afterEnqueuedAt_equal_2025-01-16T16_47_41.snap @@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "canceledBy": null, "details": { "upgradeFrom": "v1.12.0", - "upgradeTo": "v1.13.1" + "upgradeTo": "v1.13.3" }, "error": null, "duration": "[duration]", diff --git a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/tasks_filter_afterFinishedAt_equal_2025-01-16T16_47_41.snap b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/tasks_filter_afterFinishedAt_equal_2025-01-16T16_47_41.snap index 790118967..d7bf555c7 100644 --- a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/tasks_filter_afterFinishedAt_equal_2025-01-16T16_47_41.snap +++ b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/tasks_filter_afterFinishedAt_equal_2025-01-16T16_47_41.snap @@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "canceledBy": null, "details": { "upgradeFrom": "v1.12.0", - "upgradeTo": "v1.13.1" + "upgradeTo": "v1.13.3" }, "error": null, "duration": "[duration]", diff --git a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/tasks_filter_afterStartedAt_equal_2025-01-16T16_47_41.snap b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/tasks_filter_afterStartedAt_equal_2025-01-16T16_47_41.snap index 790118967..d7bf555c7 100644 --- a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/tasks_filter_afterStartedAt_equal_2025-01-16T16_47_41.snap +++ b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/tasks_filter_afterStartedAt_equal_2025-01-16T16_47_41.snap @@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "canceledBy": null, "details": { "upgradeFrom": "v1.12.0", - "upgradeTo": "v1.13.1" + "upgradeTo": "v1.13.3" }, "error": null, "duration": "[duration]", diff --git a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/the_whole_batch_queue_once_everything_has_been_processed.snap b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/the_whole_batch_queue_once_everything_has_been_processed.snap index 55891e133..d08a4b2d7 100644 --- a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/the_whole_batch_queue_once_everything_has_been_processed.snap +++ b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/the_whole_batch_queue_once_everything_has_been_processed.snap @@ -8,7 +8,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "progress": null, "details": { "upgradeFrom": "v1.12.0", - "upgradeTo": "v1.13.1" + "upgradeTo": "v1.13.3" }, "stats": { "totalNbTasks": 1, diff --git a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/the_whole_task_queue_once_everything_has_been_processed.snap b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/the_whole_task_queue_once_everything_has_been_processed.snap index 665dc07fd..927c539d1 100644 --- a/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/the_whole_task_queue_once_everything_has_been_processed.snap +++ b/crates/meilisearch/tests/upgrade/v1_12/snapshots/v1_12_0.rs/check_the_index_scheduler/the_whole_task_queue_once_everything_has_been_processed.snap @@ -12,7 +12,7 @@ source: crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs "canceledBy": null, "details": { "upgradeFrom": "v1.12.0", - "upgradeTo": "v1.13.1" + "upgradeTo": "v1.13.3" }, "error": null, "duration": "[duration]", diff --git a/crates/meilisearch/tests/vector/rest.rs b/crates/meilisearch/tests/vector/rest.rs index bf6876fbe..82fc71b26 100644 --- a/crates/meilisearch/tests/vector/rest.rs +++ b/crates/meilisearch/tests/vector/rest.rs @@ -916,7 +916,7 @@ async fn bad_settings() { snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { - "message": "`.embedders.rest`: Missing field `request` (note: this field is mandatory for source rest)", + "message": "`.embedders.rest`: Missing field `request` (note: this field is mandatory for source `rest`)", "code": "invalid_settings_embedders", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" @@ -933,7 +933,7 @@ async fn bad_settings() { snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { - "message": "`.embedders.rest`: Missing field `response` (note: this field is mandatory for source rest)", + "message": "`.embedders.rest`: Missing field `response` (note: this field is mandatory for source `rest`)", "code": "invalid_settings_embedders", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" diff --git a/crates/meilisearch/tests/vector/settings.rs b/crates/meilisearch/tests/vector/settings.rs index 97fa496b4..9fed808b0 100644 --- a/crates/meilisearch/tests/vector/settings.rs +++ b/crates/meilisearch/tests/vector/settings.rs @@ -11,13 +11,13 @@ async fn field_unavailable_for_source() { let (response, code) = index .update_settings(json!({ - "embedders": { "manual": {"source": "userProvided", "documentTemplate": "{{doc.documentTemplate}}"}}, + "embedders": { "manual": {"source": "userProvided", "dimensions": 128, "documentTemplate": "{{doc.documentTemplate}}"}}, })) .await; snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { - "message": "`.embedders.manual`: Field `documentTemplate` unavailable for source `userProvided` (only available for sources: `huggingFace`, `openAi`, `ollama`, `rest`). Available fields: `source`, `dimensions`, `distribution`, `binaryQuantized`", + "message": "`.embedders.manual`: Field `documentTemplate` unavailable for source `userProvided`.\n - note: `documentTemplate` is available for sources: `openAi`, `huggingFace`, `ollama`, `rest`\n - note: available fields for source `userProvided`: `source`, `dimensions`, `distribution`, `binaryQuantized`", "code": "invalid_settings_embedders", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" @@ -32,7 +32,7 @@ async fn field_unavailable_for_source() { snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { - "message": "`.embedders.default`: Field `revision` unavailable for source `openAi` (only available for sources: `huggingFace`). Available fields: `source`, `model`, `apiKey`, `documentTemplate`, `documentTemplateMaxBytes`, `dimensions`, `distribution`, `url`, `binaryQuantized`", + "message": "`.embedders.default`: Field `revision` unavailable for source `openAi`.\n - note: `revision` is available for sources: `huggingFace`\n - note: available fields for source `openAi`: `source`, `model`, `apiKey`, `dimensions`, `documentTemplate`, `documentTemplateMaxBytes`, `url`, `distribution`, `binaryQuantized`", "code": "invalid_settings_embedders", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" @@ -407,3 +407,551 @@ async fn ollama_url_checks() { } "###); } + +#[actix_rt::test] +async fn composite_checks() { + let server = Server::new().await; + let index = server.index("test"); + // inner distribution + let (response, _code) = index + .update_settings(json!({ + "embedders": { + "test": null + } + })) + .await; + server.wait_task(response.uid()).await; + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "test": { + "source": "composite", + "searchEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + "distribution": { + "mean": 0.5, + "sigma": 0.2, + } + }, + "indexingEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + }, + } + } + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "`.embedders.test.searchEmbedder`: Field `distribution` unavailable for source `huggingFace` for the search embedder.\n - note: available fields for source `huggingFace` for the search embedder: `source`, `model`, `revision`, `pooling`\n - note: `distribution` is available when source `huggingFace` is not for the search embedder", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" + } + "###); + // manual source + let (response, _code) = index + .update_settings(json!({ + "embedders": { + "test": null + } + })) + .await; + server.wait_task(response.uid()).await; + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "test": { + "source": "composite", + "searchEmbedder": { + "source": "userProvided", + "dimensions": 42, + }, + "indexingEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + }, + } + } + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "`.embedders.test.searchEmbedder.source`: Source `userProvided` is not available in a nested embedder", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" + } + "###); + // composite source + let (response, _code) = index + .update_settings(json!({ + "embedders": { + "test": null + } + })) + .await; + server.wait_task(response.uid()).await; + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "test": { + "source": "composite", + "searchEmbedder": { + "source": "composite", + "searchEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + }, + "indexingEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + } + }, + "indexingEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + }, + } + } + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "`.embedders.test.searchEmbedder.source`: Source `composite` is not available in a nested embedder", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" + } + "###); + // no source in indexing + let (response, _code) = index + .update_settings(json!({ + "embedders": { + "test": null + } + })) + .await; + server.wait_task(response.uid()).await; + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "test": { + "source": "composite", + "searchEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + }, + "indexingEmbedder": {}, + } + } + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "`.embedders.test.indexingEmbedder`: Missing field `source`.\n - note: this field is mandatory for nested embedders", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" + } + "###); + // no source in search + let (response, _code) = index + .update_settings(json!({ + "embedders": { + "test": null + } + })) + .await; + server.wait_task(response.uid()).await; + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "test": { + "source": "composite", + "searchEmbedder": {}, + "indexingEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + }, + } + } + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "`.embedders.test.searchEmbedder`: Missing field `source`.\n - note: this field is mandatory for nested embedders", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" + } + "###); + // no indexing + let (response, _code) = index + .update_settings(json!({ + "embedders": { + "test": null + } + })) + .await; + server.wait_task(response.uid()).await; + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "test": { + "source": "composite", + "searchEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + }, + } + } + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "`.embedders.test`: Missing field `indexingEmbedder` (note: this field is mandatory for source `composite`)", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" + } + "###); + // no search + let (response, _code) = index + .update_settings(json!({ + "embedders": { + "test": null + } + })) + .await; + server.wait_task(response.uid()).await; + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "test": { + "source": "composite", + "indexingEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + }, + } + } + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "`.embedders.test`: Missing field `searchEmbedder` (note: this field is mandatory for source `composite`)", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" + } + "###); + // inner quantized + let (response, _code) = index + .update_settings(json!({ + "embedders": { + "test": null + } + })) + .await; + server.wait_task(response.uid()).await; + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "test": { + "source": "composite", + "searchEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + "binaryQuantized": true, + }, + "indexingEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + "binaryQuantized": false, + }, + } + } + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "`.embedders.test.searchEmbedder`: Field `binaryQuantized` unavailable for source `huggingFace` for the search embedder.\n - note: available fields for source `huggingFace` for the search embedder: `source`, `model`, `revision`, `pooling`\n - note: `binaryQuantized` is available when source `huggingFace` is not for the search embedder", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" + } + "###); + // prompt in search + let (response, _code) = index + .update_settings(json!({ + "embedders": { + "test": null + } + })) + .await; + server.wait_task(response.uid()).await; + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "test": { + "source": "composite", + "searchEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + "documentTemplate": "toto", + }, + "indexingEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + }, + } + } + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "`.embedders.test.searchEmbedder`: Field `documentTemplate` unavailable for source `huggingFace` for the search embedder.\n - note: available fields for source `huggingFace` for the search embedder: `source`, `model`, `revision`, `pooling`\n - note: `documentTemplate` is available when source `huggingFace` is not for the search embedder", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" + } + "###); + // dimensions don't match + let (response, _code) = index + .update_settings(json!({ + "embedders": { + "test": null + } + })) + .await; + server.wait_task(response.uid()).await; + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "test": { + "source": "composite", + "searchEmbedder": { + "source": "ollama", + "dimensions": 0x42, + "model": "does-not-exist", + }, + "indexingEmbedder": { + "source": "ollama", + "dimensions": 42, + "model": "does-not-exist", + }, + } + } + })) + .await; + snapshot!(code, @"202 Accepted"); + let response = server.wait_task(response.uid()).await; + snapshot!(response, @r###" + { + "uid": "[uid]", + "batchUid": "[batch_uid]", + "indexUid": "test", + "status": "failed", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "composite", + "searchEmbedder": { + "source": "ollama", + "model": "does-not-exist", + "dimensions": 66 + }, + "indexingEmbedder": { + "source": "ollama", + "model": "does-not-exist", + "dimensions": 42 + } + } + } + }, + "error": { + "message": "Index `test`: Error while generating embeddings: user error: error while generating test embeddings.\n - the dimensions of embeddings produced at search time and at indexing time don't match.\n - Search time dimensions: 66\n - Indexing time dimensions: 42\n - Note: Dimensions of embeddings produced by both embedders are required to match.", + "code": "vector_embedding_error", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#vector_embedding_error" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + // pooling don't match + let (response, _code) = index + .update_settings(json!({ + "embedders": { + "test": null + } + })) + .await; + server.wait_task(response.uid()).await; + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "test": { + "source": "composite", + "searchEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + "pooling": "forceMean" + }, + "indexingEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + "pooling": "forceCls" + }, + } + } + })) + .await; + snapshot!(code, @"202 Accepted"); + let response = server.wait_task(response.uid()).await; + snapshot!(response, @r###" + { + "uid": "[uid]", + "batchUid": "[batch_uid]", + "indexUid": "test", + "status": "failed", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "composite", + "searchEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + "pooling": "forceMean" + }, + "indexingEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + "pooling": "forceCls" + } + } + } + }, + "error": { + "message": "Index `test`: Error while generating embeddings: user error: error while generating test embeddings.\n - the embeddings produced at search time and indexing time are not similar enough.\n - angular distance 0.25\n - Meilisearch requires a maximum distance of 0.01.\n - Note: check that both embedders produce similar embeddings.\n - Make sure the `model`, `revision` and `pooling` of both embedders match.", + "code": "vector_embedding_error", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#vector_embedding_error" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + // ok + let (response, _code) = index + .update_settings(json!({ + "embedders": { + "test": null + } + })) + .await; + server.wait_task(response.uid()).await; + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "test": { + "source": "composite", + "searchEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + }, + "indexingEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e", + }, + } + } + })) + .await; + snapshot!(code, @"202 Accepted"); + let response = server.wait_task(response.uid()).await; + snapshot!(response, @r###" + { + "uid": "[uid]", + "batchUid": "[batch_uid]", + "indexUid": "test", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "test": { + "source": "composite", + "searchEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e" + }, + "indexingEmbedder": { + "source": "huggingFace", + "model": "sentence-transformers/all-MiniLM-L6-v2", + "revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e" + } + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); +} diff --git a/crates/milli/src/error.rs b/crates/milli/src/error.rs index 77017a3fd..f0972de75 100644 --- a/crates/milli/src/error.rs +++ b/crates/milli/src/error.rs @@ -13,6 +13,7 @@ use thiserror::Error; use crate::constants::RESERVED_GEO_FIELD_NAME; use crate::documents::{self, DocumentsBatchCursorError}; use crate::thread_pool_no_abort::PanicCatched; +use crate::vector::settings::EmbeddingSettings; use crate::{CriterionError, DocumentId, FieldId, Object, SortError}; pub fn is_reserved_keyword(keyword: &str) -> bool { @@ -240,28 +241,52 @@ and can not be more than 511 bytes.", .document_id.to_string() InvalidSimilarEmbedder(String), #[error("Too many vectors for document with id {0}: found {1}, but limited to 256.")] TooManyVectors(String, usize), - #[error("`.embedders.{embedder_name}`: Field `{field}` unavailable for source `{source_}` (only available for sources: {}). Available fields: {}", - allowed_sources_for_field - .iter() - .map(|accepted| format!("`{}`", accepted)) - .collect::>() - .join(", "), - allowed_fields_for_source - .iter() - .map(|accepted| format!("`{}`", accepted)) - .collect::>() - .join(", ") + #[error("`.embedders.{embedder_name}`: Field `{field}` unavailable for source `{source_}`{for_context}.{available_sources}{available_fields}{available_contexts}", + field=field.name(), + for_context={ + context.in_context() + }, + available_sources={ + let allowed_sources_for_field = EmbeddingSettings::allowed_sources_for_field(*field, *context); + if allowed_sources_for_field.is_empty() { + String::new() + } else { + format!("\n - note: `{}` is available for sources: {}", + field.name(), + allowed_sources_for_field + .iter() + .map(|accepted| format!("`{}`", accepted)) + .collect::>() + .join(", "), + ) + } + }, + available_fields={ + let allowed_fields_for_source = EmbeddingSettings::allowed_fields_for_source(*source_, *context); + format!("\n - note: available fields for source `{source_}`{}: {}",context.in_context(), allowed_fields_for_source + .iter() + .map(|accepted| format!("`{}`", accepted)) + .collect::>() + .join(", "),) + }, + available_contexts={ + let available_not_nested = !matches!(EmbeddingSettings::field_status(*source_, *field, crate::vector::settings::NestingContext::NotNested), crate::vector::settings::FieldStatus::Disallowed); + if available_not_nested { + format!("\n - note: `{}` is available when source `{source_}` is not{}", field.name(), context.in_context()) + } else { + String::new() + } + } )] InvalidFieldForSource { embedder_name: String, source_: crate::vector::settings::EmbedderSource, - field: &'static str, - allowed_fields_for_source: &'static [&'static str], - allowed_sources_for_field: &'static [crate::vector::settings::EmbedderSource], + context: crate::vector::settings::NestingContext, + field: crate::vector::settings::MetaEmbeddingSetting, }, #[error("`.embedders.{embedder_name}.model`: Invalid model `{model}` for OpenAI. Supported models: {:?}", crate::vector::openai::EmbeddingModel::supported_models())] InvalidOpenAiModel { embedder_name: String, model: String }, - #[error("`.embedders.{embedder_name}`: Missing field `{field}` (note: this field is mandatory for source {source_})")] + #[error("`.embedders.{embedder_name}`: Missing field `{field}` (note: this field is mandatory for source `{source_}`)")] MissingFieldForSource { field: &'static str, source_: crate::vector::settings::EmbedderSource, @@ -281,6 +306,15 @@ and can not be more than 511 bytes.", .document_id.to_string() dimensions: usize, max_dimensions: usize, }, + #[error("`.embedders.{embedder_name}.source`: Source `{source_}` is not available in a nested embedder")] + InvalidSourceForNested { + embedder_name: String, + source_: crate::vector::settings::EmbedderSource, + }, + #[error("`.embedders.{embedder_name}`: Missing field `source`.\n - note: this field is mandatory for nested embedders")] + MissingSourceForNested { embedder_name: String }, + #[error("`.embedders.{embedder_name}`: {message}")] + InvalidSettingsEmbedder { embedder_name: String, message: String }, #[error("`.embedders.{embedder_name}.dimensions`: `dimensions` cannot be zero")] InvalidSettingsDimensions { embedder_name: String }, #[error( diff --git a/crates/milli/src/search/hybrid.rs b/crates/milli/src/search/hybrid.rs index 368d61833..a1c8b71da 100644 --- a/crates/milli/src/search/hybrid.rs +++ b/crates/milli/src/search/hybrid.rs @@ -203,7 +203,7 @@ impl<'a> Search<'a> { let deadline = std::time::Instant::now() + std::time::Duration::from_secs(3); - match embedder.embed_one(query, Some(deadline)) { + match embedder.embed_search(query, Some(deadline)) { Ok(embedding) => embedding, Err(error) => { tracing::error!(error=%error, "Embedding failed"); diff --git a/crates/milli/src/update/index_documents/extract/extract_vector_points.rs b/crates/milli/src/update/index_documents/extract/extract_vector_points.rs index 560b73834..cb8c121ce 100644 --- a/crates/milli/src/update/index_documents/extract/extract_vector_points.rs +++ b/crates/milli/src/update/index_documents/extract/extract_vector_points.rs @@ -786,7 +786,7 @@ fn embed_chunks( unused_vectors_distribution: &UnusedVectorsDistribution, request_threads: &ThreadPoolNoAbort, ) -> Result>> { - match embedder.embed_chunks(text_chunks, request_threads) { + match embedder.embed_index(text_chunks, request_threads) { Ok(chunks) => Ok(chunks), Err(error) => { if let FaultSource::Bug = error.fault { diff --git a/crates/milli/src/update/index_documents/mod.rs b/crates/milli/src/update/index_documents/mod.rs index 2ae3fa4dd..ae082284a 100644 --- a/crates/milli/src/update/index_documents/mod.rs +++ b/crates/milli/src/update/index_documents/mod.rs @@ -2772,6 +2772,8 @@ mod tests { response: Setting::NotSet, distribution: Setting::NotSet, headers: Setting::NotSet, + search_embedder: Setting::NotSet, + indexing_embedder: Setting::NotSet, binary_quantized: Setting::NotSet, }), ); diff --git a/crates/milli/src/update/new/extract/vectors/mod.rs b/crates/milli/src/update/new/extract/vectors/mod.rs index b268647c2..6820ee67b 100644 --- a/crates/milli/src/update/new/extract/vectors/mod.rs +++ b/crates/milli/src/update/new/extract/vectors/mod.rs @@ -416,7 +416,7 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> { return Err(crate::Error::UserError(crate::UserError::DocumentEmbeddingError(msg))); } - let res = match embedder.embed_chunks_ref(texts.as_slice(), threads) { + let res = match embedder.embed_index_ref(texts.as_slice(), threads) { Ok(embeddings) => { for (docid, embedding) in ids.into_iter().zip(embeddings) { sender.set_vector(*docid, embedder_id, embedding).unwrap(); diff --git a/crates/milli/src/update/settings.rs b/crates/milli/src/update/settings.rs index 42f38ea0a..571ffe1c6 100644 --- a/crates/milli/src/update/settings.rs +++ b/crates/milli/src/update/settings.rs @@ -30,8 +30,8 @@ use crate::proximity::ProximityPrecision; use crate::update::index_documents::IndexDocumentsMethod; use crate::update::{IndexDocuments, UpdateIndexingStep}; use crate::vector::settings::{ - check_set, check_unset, EmbedderAction, EmbedderSource, EmbeddingSettings, ReindexAction, - WriteBackToDocuments, + EmbedderAction, EmbedderSource, EmbeddingSettings, NestingContext, ReindexAction, + SubEmbeddingSettings, WriteBackToDocuments, }; use crate::vector::{Embedder, EmbeddingConfig, EmbeddingConfigs}; use crate::{FieldId, FilterableAttributesRule, Index, LocalizedAttributesRule, Result}; @@ -1641,26 +1641,12 @@ fn embedders(embedding_configs: Vec) -> Result, -) -> Result> { - match new { - Setting::Set(EmbeddingSettings { - source, - model, - revision, - pooling, - api_key, - dimensions, - document_template: Setting::Set(template), - document_template_max_bytes, - url, - request, - response, - distribution, - headers, - binary_quantized: binary_quantize, - }) => { - let max_bytes = match document_template_max_bytes.set() { + new_prompt: Setting, + max_bytes: Setting, +) -> Result> { + match new_prompt { + Setting::Set(template) => { + let max_bytes = match max_bytes.set() { Some(max_bytes) => NonZeroUsize::new(max_bytes).ok_or_else(|| { crate::error::UserError::InvalidSettingsDocumentTemplateMaxBytes { embedder_name: name.to_owned(), @@ -1678,22 +1664,7 @@ fn validate_prompt( .map(|prompt| crate::prompt::PromptData::from(prompt).template) .map_err(|inner| UserError::InvalidPromptForEmbeddings(name.to_owned(), inner))?; - Ok(Setting::Set(EmbeddingSettings { - source, - model, - revision, - pooling, - api_key, - dimensions, - document_template: Setting::Set(template), - document_template_max_bytes, - url, - request, - response, - distribution, - headers, - binary_quantized: binary_quantize, - })) + Ok(Setting::Set(template)) } new => Ok(new), } @@ -1703,7 +1674,6 @@ pub fn validate_embedding_settings( settings: Setting, name: &str, ) -> Result> { - let settings = validate_prompt(name, settings)?; let Setting::Set(settings) = settings else { return Ok(settings) }; let EmbeddingSettings { source, @@ -1717,11 +1687,15 @@ pub fn validate_embedding_settings( url, request, response, + search_embedder, + mut indexing_embedder, distribution, headers, binary_quantized: binary_quantize, } = settings; + let document_template = validate_prompt(name, document_template, document_template_max_bytes)?; + if let Some(0) = dimensions.set() { return Err(crate::error::UserError::InvalidSettingsDimensions { embedder_name: name.to_owned(), @@ -1747,6 +1721,7 @@ pub fn validate_embedding_settings( } let Some(inferred_source) = source.set() else { + // we are validating the fused settings, so we always have a source return Ok(Setting::Set(EmbeddingSettings { source, model, @@ -1759,20 +1734,35 @@ pub fn validate_embedding_settings( url, request, response, + search_embedder, + indexing_embedder, distribution, headers, binary_quantized: binary_quantize, })); }; + EmbeddingSettings::check_settings( + name, + inferred_source, + NestingContext::NotNested, + &model, + &revision, + &pooling, + &dimensions, + &api_key, + &url, + &request, + &response, + &document_template, + &document_template_max_bytes, + &headers, + &search_embedder, + &indexing_embedder, + &binary_quantize, + &distribution, + )?; match inferred_source { EmbedderSource::OpenAi => { - check_unset(&revision, EmbeddingSettings::REVISION, inferred_source, name)?; - check_unset(&pooling, EmbeddingSettings::POOLING, inferred_source, name)?; - - check_unset(&request, EmbeddingSettings::REQUEST, inferred_source, name)?; - check_unset(&response, EmbeddingSettings::RESPONSE, inferred_source, name)?; - check_unset(&headers, EmbeddingSettings::HEADERS, inferred_source, name)?; - if let Setting::Set(model) = &model { let model = crate::vector::openai::EmbeddingModel::from_name(model.as_str()) .ok_or(crate::error::UserError::InvalidOpenAiModel { @@ -1803,55 +1793,117 @@ pub fn validate_embedding_settings( } } } - EmbedderSource::Ollama => { - check_set(&model, EmbeddingSettings::MODEL, inferred_source, name)?; - check_unset(&revision, EmbeddingSettings::REVISION, inferred_source, name)?; - check_unset(&pooling, EmbeddingSettings::POOLING, inferred_source, name)?; + EmbedderSource::Ollama + | EmbedderSource::HuggingFace + | EmbedderSource::UserProvided + | EmbedderSource::Rest => {} + EmbedderSource::Composite => { + if let Setting::Set(embedder) = &search_embedder { + if let Some(source) = embedder.source.set() { + let search_embedder = match embedder.search_embedder.clone() { + Setting::Set(search_embedder) => Setting::Set(deserialize_sub_embedder( + search_embedder, + name, + NestingContext::Search, + )?), + Setting::Reset => Setting::Reset, + Setting::NotSet => Setting::NotSet, + }; + let indexing_embedder = match embedder.indexing_embedder.clone() { + Setting::Set(indexing_embedder) => Setting::Set(deserialize_sub_embedder( + indexing_embedder, + name, + NestingContext::Search, + )?), + Setting::Reset => Setting::Reset, + Setting::NotSet => Setting::NotSet, + }; + EmbeddingSettings::check_nested_source(name, source, NestingContext::Search)?; + EmbeddingSettings::check_settings( + name, + source, + NestingContext::Search, + &embedder.model, + &embedder.revision, + &embedder.pooling, + &embedder.dimensions, + &embedder.api_key, + &embedder.url, + &embedder.request, + &embedder.response, + &embedder.document_template, + &embedder.document_template_max_bytes, + &embedder.headers, + &search_embedder, + &indexing_embedder, + &embedder.binary_quantized, + &embedder.distribution, + )?; + } else { + return Err(UserError::MissingSourceForNested { + embedder_name: NestingContext::Search.embedder_name_with_context(name), + } + .into()); + } + } - check_unset(&request, EmbeddingSettings::REQUEST, inferred_source, name)?; - check_unset(&response, EmbeddingSettings::RESPONSE, inferred_source, name)?; - check_unset(&headers, EmbeddingSettings::HEADERS, inferred_source, name)?; - } - EmbedderSource::HuggingFace => { - check_unset(&api_key, EmbeddingSettings::API_KEY, inferred_source, name)?; - check_unset(&dimensions, EmbeddingSettings::DIMENSIONS, inferred_source, name)?; + indexing_embedder = if let Setting::Set(mut embedder) = indexing_embedder { + embedder.document_template = validate_prompt( + name, + embedder.document_template, + embedder.document_template_max_bytes, + )?; - check_unset(&url, EmbeddingSettings::URL, inferred_source, name)?; - check_unset(&request, EmbeddingSettings::REQUEST, inferred_source, name)?; - check_unset(&response, EmbeddingSettings::RESPONSE, inferred_source, name)?; - check_unset(&headers, EmbeddingSettings::HEADERS, inferred_source, name)?; - } - EmbedderSource::UserProvided => { - check_unset(&model, EmbeddingSettings::MODEL, inferred_source, name)?; - check_unset(&revision, EmbeddingSettings::REVISION, inferred_source, name)?; - check_unset(&pooling, EmbeddingSettings::POOLING, inferred_source, name)?; - check_unset(&api_key, EmbeddingSettings::API_KEY, inferred_source, name)?; - check_unset( - &document_template, - EmbeddingSettings::DOCUMENT_TEMPLATE, - inferred_source, - name, - )?; - check_unset( - &document_template_max_bytes, - EmbeddingSettings::DOCUMENT_TEMPLATE_MAX_BYTES, - inferred_source, - name, - )?; - check_set(&dimensions, EmbeddingSettings::DIMENSIONS, inferred_source, name)?; - - check_unset(&url, EmbeddingSettings::URL, inferred_source, name)?; - check_unset(&request, EmbeddingSettings::REQUEST, inferred_source, name)?; - check_unset(&response, EmbeddingSettings::RESPONSE, inferred_source, name)?; - check_unset(&headers, EmbeddingSettings::HEADERS, inferred_source, name)?; - } - EmbedderSource::Rest => { - check_unset(&model, EmbeddingSettings::MODEL, inferred_source, name)?; - check_unset(&revision, EmbeddingSettings::REVISION, inferred_source, name)?; - check_unset(&pooling, EmbeddingSettings::POOLING, inferred_source, name)?; - check_set(&url, EmbeddingSettings::URL, inferred_source, name)?; - check_set(&request, EmbeddingSettings::REQUEST, inferred_source, name)?; - check_set(&response, EmbeddingSettings::RESPONSE, inferred_source, name)?; + if let Some(source) = embedder.source.set() { + let search_embedder = match embedder.search_embedder.clone() { + Setting::Set(search_embedder) => Setting::Set(deserialize_sub_embedder( + search_embedder, + name, + NestingContext::Indexing, + )?), + Setting::Reset => Setting::Reset, + Setting::NotSet => Setting::NotSet, + }; + let indexing_embedder = match embedder.indexing_embedder.clone() { + Setting::Set(indexing_embedder) => Setting::Set(deserialize_sub_embedder( + indexing_embedder, + name, + NestingContext::Indexing, + )?), + Setting::Reset => Setting::Reset, + Setting::NotSet => Setting::NotSet, + }; + EmbeddingSettings::check_nested_source(name, source, NestingContext::Indexing)?; + EmbeddingSettings::check_settings( + name, + source, + NestingContext::Indexing, + &embedder.model, + &embedder.revision, + &embedder.pooling, + &embedder.dimensions, + &embedder.api_key, + &embedder.url, + &embedder.request, + &embedder.response, + &embedder.document_template, + &embedder.document_template_max_bytes, + &embedder.headers, + &search_embedder, + &indexing_embedder, + &embedder.binary_quantized, + &embedder.distribution, + )?; + } else { + return Err(UserError::MissingSourceForNested { + embedder_name: NestingContext::Indexing.embedder_name_with_context(name), + } + .into()); + } + Setting::Set(embedder) + } else { + indexing_embedder + }; } } Ok(Setting::Set(EmbeddingSettings { @@ -1866,992 +1918,31 @@ pub fn validate_embedding_settings( url, request, response, + search_embedder, + indexing_embedder, distribution, headers, binary_quantized: binary_quantize, })) } -#[cfg(test)] -mod tests { - use big_s::S; - use heed::types::Bytes; - use maplit::{btreemap, btreeset}; - use meili_snap::snapshot; - - use super::*; - use crate::error::Error; - use crate::index::tests::TempIndex; - use crate::update::ClearDocuments; - use crate::{db_snap, Criterion, Filter, SearchResult}; - - #[test] - fn set_and_reset_searchable_fields() { - let index = TempIndex::new(); - - // First we send 3 documents with ids from 1 to 3. - let mut wtxn = index.write_txn().unwrap(); - - index - .add_documents_using_wtxn( - &mut wtxn, - documents!([ - { "id": 1, "name": "kevin", "age": 23 }, - { "id": 2, "name": "kevina", "age": 21}, - { "id": 3, "name": "benoit", "age": 34 } - ]), - ) - .unwrap(); - - // We change the searchable fields to be the "name" field only. - index - .update_settings_using_wtxn(&mut wtxn, |settings| { - settings.set_searchable_fields(vec!["name".into()]); +fn deserialize_sub_embedder( + sub_embedder: serde_json::Value, + embedder_name: &str, + context: NestingContext, +) -> std::result::Result { + match deserr::deserialize::<_, _, deserr::errors::JsonError>(sub_embedder) { + Ok(sub_embedder) => Ok(sub_embedder), + Err(error) => { + let message = format!("{error}{}", context.nesting_embedders()); + Err(UserError::InvalidSettingsEmbedder { + embedder_name: context.embedder_name_with_context(embedder_name), + message, }) - .unwrap(); - - wtxn.commit().unwrap(); - - db_snap!(index, fields_ids_map, @r###" - 0 id | - 1 name | - 2 age | - "###); - db_snap!(index, searchable_fields, @r###"["name"]"###); - db_snap!(index, fieldids_weights_map, @r###" - fid weight - 1 0 | - "###); - - // Check that the searchable field is correctly set to "name" only. - let rtxn = index.read_txn().unwrap(); - // When we search for something that is not in - // the searchable fields it must not return any document. - let result = index.search(&rtxn).query("23").execute().unwrap(); - assert_eq!(result.documents_ids, Vec::::new()); - - // When we search for something that is in the searchable fields - // we must find the appropriate document. - let result = index.search(&rtxn).query(r#""kevin""#).execute().unwrap(); - let documents = index.documents(&rtxn, result.documents_ids).unwrap(); - let fid_map = index.fields_ids_map(&rtxn).unwrap(); - assert_eq!(documents.len(), 1); - assert_eq!(documents[0].1.get(fid_map.id("name").unwrap()), Some(&br#""kevin""#[..])); - drop(rtxn); - - // We change the searchable fields to be the "name" field only. - index - .update_settings(|settings| { - settings.reset_searchable_fields(); - }) - .unwrap(); - - db_snap!(index, fields_ids_map, @r###" - 0 id | - 1 name | - 2 age | - "###); - db_snap!(index, searchable_fields, @r###"["id", "name", "age"]"###); - db_snap!(index, fieldids_weights_map, @r###" - fid weight - 0 0 | - 1 0 | - 2 0 | - "###); - - // Check that the searchable field have been reset and documents are found now. - let rtxn = index.read_txn().unwrap(); - let fid_map = index.fields_ids_map(&rtxn).unwrap(); - let user_defined_searchable_fields = index.user_defined_searchable_fields(&rtxn).unwrap(); - snapshot!(format!("{user_defined_searchable_fields:?}"), @"None"); - // the searchable fields should contain all the fields - let searchable_fields = index.searchable_fields(&rtxn).unwrap(); - snapshot!(format!("{searchable_fields:?}"), @r###"["id", "name", "age"]"###); - let result = index.search(&rtxn).query("23").execute().unwrap(); - assert_eq!(result.documents_ids.len(), 1); - let documents = index.documents(&rtxn, result.documents_ids).unwrap(); - assert_eq!(documents[0].1.get(fid_map.id("name").unwrap()), Some(&br#""kevin""#[..])); - } - - #[test] - fn mixup_searchable_with_displayed_fields() { - let index = TempIndex::new(); - - let mut wtxn = index.write_txn().unwrap(); - // First we send 3 documents with ids from 1 to 3. - index - .add_documents_using_wtxn( - &mut wtxn, - documents!([ - { "id": 0, "name": "kevin", "age": 23}, - { "id": 1, "name": "kevina", "age": 21 }, - { "id": 2, "name": "benoit", "age": 34 } - ]), - ) - .unwrap(); - - // In the same transaction we change the displayed fields to be only the "age". - // We also change the searchable fields to be the "name" field only. - index - .update_settings_using_wtxn(&mut wtxn, |settings| { - settings.set_displayed_fields(vec!["age".into()]); - settings.set_searchable_fields(vec!["name".into()]); - }) - .unwrap(); - wtxn.commit().unwrap(); - - // Check that the displayed fields are correctly set to `None` (default value). - let rtxn = index.read_txn().unwrap(); - let fields_ids = index.displayed_fields(&rtxn).unwrap(); - assert_eq!(fields_ids.unwrap(), (&["age"][..])); - drop(rtxn); - - // We change the searchable fields to be the "name" field only. - index - .update_settings(|settings| { - settings.reset_searchable_fields(); - }) - .unwrap(); - - // Check that the displayed fields always contains only the "age" field. - let rtxn = index.read_txn().unwrap(); - let fields_ids = index.displayed_fields(&rtxn).unwrap(); - assert_eq!(fields_ids.unwrap(), &["age"][..]); - } - - #[test] - fn default_displayed_fields() { - let index = TempIndex::new(); - - // First we send 3 documents with ids from 1 to 3. - index - .add_documents(documents!([ - { "id": 0, "name": "kevin", "age": 23}, - { "id": 1, "name": "kevina", "age": 21 }, - { "id": 2, "name": "benoit", "age": 34 } - ])) - .unwrap(); - - // Check that the displayed fields are correctly set to `None` (default value). - let rtxn = index.read_txn().unwrap(); - let fields_ids = index.displayed_fields(&rtxn).unwrap(); - assert_eq!(fields_ids, None); - } - - #[test] - fn set_and_reset_displayed_field() { - let index = TempIndex::new(); - - let mut wtxn = index.write_txn().unwrap(); - index - .add_documents_using_wtxn( - &mut wtxn, - documents!([ - { "id": 0, "name": "kevin", "age": 23}, - { "id": 1, "name": "kevina", "age": 21 }, - { "id": 2, "name": "benoit", "age": 34 } - ]), - ) - .unwrap(); - index - .update_settings_using_wtxn(&mut wtxn, |settings| { - settings.set_displayed_fields(vec!["age".into()]); - }) - .unwrap(); - wtxn.commit().unwrap(); - - // Check that the displayed fields are correctly set to only the "age" field. - let rtxn = index.read_txn().unwrap(); - let fields_ids = index.displayed_fields(&rtxn).unwrap(); - assert_eq!(fields_ids.unwrap(), &["age"][..]); - drop(rtxn); - - // We reset the fields ids to become `None`, the default value. - index - .update_settings(|settings| { - settings.reset_displayed_fields(); - }) - .unwrap(); - - // Check that the displayed fields are correctly set to `None` (default value). - let rtxn = index.read_txn().unwrap(); - let fields_ids = index.displayed_fields(&rtxn).unwrap(); - assert_eq!(fields_ids, None); - } - - #[test] - fn set_filterable_fields() { - let index = TempIndex::new(); - - // Set the filterable fields to be the age. - index - .update_settings(|settings| { - settings.set_filterable_fields(vec![FilterableAttributesRule::Field( - "age".to_string(), - )]); - }) - .unwrap(); - - // Then index some documents. - index - .add_documents(documents!([ - { "id": 0, "name": "kevin", "age": 23}, - { "id": 1, "name": "kevina", "age": 21 }, - { "id": 2, "name": "benoit", "age": 34 } - ])) - .unwrap(); - - // Check that the displayed fields are correctly set. - let rtxn = index.read_txn().unwrap(); - let fields_ids = index.filterable_attributes_rules(&rtxn).unwrap(); - assert_eq!(fields_ids, vec![FilterableAttributesRule::Field("age".to_string(),)]); - // Only count the field_id 0 and level 0 facet values. - // TODO we must support typed CSVs for numbers to be understood. - let fidmap = index.fields_ids_map(&rtxn).unwrap(); - for document in index.all_documents(&rtxn).unwrap() { - let document = document.unwrap(); - let json = crate::obkv_to_json(&fidmap.ids().collect::>(), &fidmap, document.1) - .unwrap(); - println!("json: {:?}", json); } - let count = index - .facet_id_f64_docids - .remap_key_type::() - // The faceted field id is 2u16 - .prefix_iter(&rtxn, &[0, 2, 0]) - .unwrap() - .count(); - assert_eq!(count, 3); - drop(rtxn); - - // Index a little more documents with new and current facets values. - index - .add_documents(documents!([ - { "id": 3, "name": "kevin2", "age": 23}, - { "id": 4, "name": "kevina2", "age": 21 }, - { "id": 5, "name": "benoit", "age": 35 } - ])) - .unwrap(); - - let rtxn = index.read_txn().unwrap(); - // Only count the field_id 0 and level 0 facet values. - let count = index - .facet_id_f64_docids - .remap_key_type::() - .prefix_iter(&rtxn, &[0, 2, 0]) - .unwrap() - .count(); - assert_eq!(count, 4); - - // Set the filterable fields to be the age and the name. - index - .update_settings(|settings| { - settings.set_filterable_fields(vec![ - FilterableAttributesRule::Field("age".to_string()), - FilterableAttributesRule::Field("name".to_string()), - ]); - }) - .unwrap(); - - // Check that the displayed fields are correctly set. - let rtxn = index.read_txn().unwrap(); - let fields_ids = index.filterable_attributes_rules(&rtxn).unwrap(); - assert_eq!( - fields_ids, - vec![ - FilterableAttributesRule::Field("age".to_string()), - FilterableAttributesRule::Field("name".to_string()), - ] - ); - - let rtxn = index.read_txn().unwrap(); - // Only count the field_id 2 and level 0 facet values. - let count = index - .facet_id_f64_docids - .remap_key_type::() - .prefix_iter(&rtxn, &[0, 2, 0]) - .unwrap() - .count(); - assert_eq!(count, 4); - - let rtxn = index.read_txn().unwrap(); - // Only count the field_id 1 and level 0 facet values. - let count = index - .facet_id_string_docids - .remap_key_type::() - .prefix_iter(&rtxn, &[0, 1]) - .unwrap() - .count(); - assert_eq!(count, 5); - - // Remove the age from the filterable fields. - index - .update_settings(|settings| { - settings.set_filterable_fields(vec![FilterableAttributesRule::Field( - "name".to_string(), - )]); - }) - .unwrap(); - - // Check that the displayed fields are correctly set. - let rtxn = index.read_txn().unwrap(); - let fields_ids = index.filterable_attributes_rules(&rtxn).unwrap(); - assert_eq!(fields_ids, vec![FilterableAttributesRule::Field("name".to_string())]); - - let rtxn = index.read_txn().unwrap(); - // Only count the field_id 2 and level 0 facet values. - let count = index - .facet_id_f64_docids - .remap_key_type::() - .prefix_iter(&rtxn, &[0, 2, 0]) - .unwrap() - .count(); - assert_eq!(count, 0); - - let rtxn = index.read_txn().unwrap(); - // Only count the field_id 1 and level 0 facet values. - let count = index - .facet_id_string_docids - .remap_key_type::() - .prefix_iter(&rtxn, &[0, 1]) - .unwrap() - .count(); - assert_eq!(count, 5); - } - - #[test] - fn set_asc_desc_field() { - let index = TempIndex::new(); - - // Set the filterable fields to be the age. - index - .update_settings(|settings| { - settings.set_displayed_fields(vec![S("name")]); - settings.set_criteria(vec![Criterion::Asc("age".to_owned())]); - }) - .unwrap(); - - // Then index some documents. - index - .add_documents(documents!([ - { "id": 0, "name": "kevin", "age": 23}, - { "id": 1, "name": "kevina", "age": 21 }, - { "id": 2, "name": "benoit", "age": 34 } - ])) - .unwrap(); - - // Run an empty query just to ensure that the search results are ordered. - let rtxn = index.read_txn().unwrap(); - let SearchResult { documents_ids, .. } = index.search(&rtxn).execute().unwrap(); - let documents = index.documents(&rtxn, documents_ids).unwrap(); - - // Fetch the documents "age" field in the ordre in which the documents appear. - let age_field_id = index.fields_ids_map(&rtxn).unwrap().id("age").unwrap(); - let iter = documents.into_iter().map(|(_, doc)| { - let bytes = doc.get(age_field_id).unwrap(); - let string = std::str::from_utf8(bytes).unwrap(); - string.parse::().unwrap() - }); - - assert_eq!(iter.collect::>(), vec![21, 23, 34]); - } - - #[test] - fn set_distinct_field() { - let index = TempIndex::new(); - - // Set the filterable fields to be the age. - index - .update_settings(|settings| { - // Don't display the generated `id` field. - settings.set_displayed_fields(vec![S("name"), S("age")]); - settings.set_distinct_field(S("age")); - }) - .unwrap(); - - // Then index some documents. - index - .add_documents(documents!([ - { "id": 0, "name": "kevin", "age": 23 }, - { "id": 1, "name": "kevina", "age": 21 }, - { "id": 2, "name": "benoit", "age": 34 }, - { "id": 3, "name": "bernard", "age": 34 }, - { "id": 4, "name": "bertrand", "age": 34 }, - { "id": 5, "name": "bernie", "age": 34 }, - { "id": 6, "name": "ben", "age": 34 } - ])) - .unwrap(); - - // Run an empty query just to ensure that the search results are ordered. - let rtxn = index.read_txn().unwrap(); - let SearchResult { documents_ids, .. } = index.search(&rtxn).execute().unwrap(); - - // There must be at least one document with a 34 as the age. - assert_eq!(documents_ids.len(), 3); - } - - #[test] - fn set_nested_distinct_field() { - let index = TempIndex::new(); - - // Set the filterable fields to be the age. - index - .update_settings(|settings| { - // Don't display the generated `id` field. - settings.set_displayed_fields(vec![S("person")]); - settings.set_distinct_field(S("person.age")); - }) - .unwrap(); - - // Then index some documents. - index - .add_documents(documents!([ - { "id": 0, "person": { "name": "kevin", "age": 23 }}, - { "id": 1, "person": { "name": "kevina", "age": 21 }}, - { "id": 2, "person": { "name": "benoit", "age": 34 }}, - { "id": 3, "person": { "name": "bernard", "age": 34 }}, - { "id": 4, "person": { "name": "bertrand", "age": 34 }}, - { "id": 5, "person": { "name": "bernie", "age": 34 }}, - { "id": 6, "person": { "name": "ben", "age": 34 }} - ])) - .unwrap(); - - // Run an empty query just to ensure that the search results are ordered. - let rtxn = index.read_txn().unwrap(); - let SearchResult { documents_ids, .. } = index.search(&rtxn).execute().unwrap(); - - // There must be at least one document with a 34 as the age. - assert_eq!(documents_ids.len(), 3); - } - - #[test] - fn default_stop_words() { - let index = TempIndex::new(); - - // First we send 3 documents with ids from 1 to 3. - index - .add_documents(documents!([ - { "id": 0, "name": "kevin", "age": 23}, - { "id": 1, "name": "kevina", "age": 21 }, - { "id": 2, "name": "benoit", "age": 34 } - ])) - .unwrap(); - - // Ensure there is no stop_words by default - let rtxn = index.read_txn().unwrap(); - let stop_words = index.stop_words(&rtxn).unwrap(); - assert!(stop_words.is_none()); - } - - #[test] - fn set_and_reset_stop_words() { - let index = TempIndex::new(); - - let mut wtxn = index.write_txn().unwrap(); - // First we send 3 documents with ids from 1 to 3. - index - .add_documents_using_wtxn( - &mut wtxn, - documents!([ - { "id": 0, "name": "kevin", "age": 23, "maxim": "I love dogs" }, - { "id": 1, "name": "kevina", "age": 21, "maxim": "Doggos are the best" }, - { "id": 2, "name": "benoit", "age": 34, "maxim": "The crepes are really good" }, - ]), - ) - .unwrap(); - - // In the same transaction we provide some stop_words - let set = btreeset! { "i".to_string(), "the".to_string(), "are".to_string() }; - index - .update_settings_using_wtxn(&mut wtxn, |settings| { - settings.set_stop_words(set.clone()); - }) - .unwrap(); - - wtxn.commit().unwrap(); - - // Ensure stop_words are effectively stored - let rtxn = index.read_txn().unwrap(); - let stop_words = index.stop_words(&rtxn).unwrap(); - assert!(stop_words.is_some()); // at this point the index should return something - - let stop_words = stop_words.unwrap(); - let expected = fst::Set::from_iter(&set).unwrap(); - assert_eq!(stop_words.as_fst().as_bytes(), expected.as_fst().as_bytes()); - - // when we search for something that is a non prefix stop_words it should be ignored - // thus we should get a placeholder search (all the results = 3) - let result = index.search(&rtxn).query("the ").execute().unwrap(); - assert_eq!(result.documents_ids.len(), 3); - let result = index.search(&rtxn).query("i ").execute().unwrap(); - assert_eq!(result.documents_ids.len(), 3); - let result = index.search(&rtxn).query("are ").execute().unwrap(); - assert_eq!(result.documents_ids.len(), 3); - - let result = index.search(&rtxn).query("dog").execute().unwrap(); - assert_eq!(result.documents_ids.len(), 2); // we have two maxims talking about doggos - let result = index.search(&rtxn).query("benoît").execute().unwrap(); - assert_eq!(result.documents_ids.len(), 1); // there is one benoit in our data - - // now we'll reset the stop_words and ensure it's None - index - .update_settings(|settings| { - settings.reset_stop_words(); - }) - .unwrap(); - - let rtxn = index.read_txn().unwrap(); - let stop_words = index.stop_words(&rtxn).unwrap(); - assert!(stop_words.is_none()); - - // now we can search for the stop words - let result = index.search(&rtxn).query("the").execute().unwrap(); - assert_eq!(result.documents_ids.len(), 2); - let result = index.search(&rtxn).query("i").execute().unwrap(); - assert_eq!(result.documents_ids.len(), 1); - let result = index.search(&rtxn).query("are").execute().unwrap(); - assert_eq!(result.documents_ids.len(), 2); - - // the rest of the search is still not impacted - let result = index.search(&rtxn).query("dog").execute().unwrap(); - assert_eq!(result.documents_ids.len(), 2); // we have two maxims talking about doggos - let result = index.search(&rtxn).query("benoît").execute().unwrap(); - assert_eq!(result.documents_ids.len(), 1); // there is one benoit in our data - } - - #[test] - fn set_and_reset_synonyms() { - let index = TempIndex::new(); - - let mut wtxn = index.write_txn().unwrap(); - // Send 3 documents with ids from 1 to 3. - index - .add_documents_using_wtxn( - &mut wtxn, - documents!([ - { "id": 0, "name": "kevin", "age": 23, "maxim": "I love dogs"}, - { "id": 1, "name": "kevina", "age": 21, "maxim": "Doggos are the best"}, - { "id": 2, "name": "benoit", "age": 34, "maxim": "The crepes are really good"}, - ]), - ) - .unwrap(); - - // In the same transaction provide some synonyms - index - .update_settings_using_wtxn(&mut wtxn, |settings| { - settings.set_synonyms(btreemap! { - "blini".to_string() => vec!["crepes".to_string()], - "super like".to_string() => vec!["love".to_string()], - "puppies".to_string() => vec!["dogs".to_string(), "doggos".to_string()] - }); - }) - .unwrap(); - wtxn.commit().unwrap(); - - // Ensure synonyms are effectively stored - let rtxn = index.read_txn().unwrap(); - let synonyms = index.synonyms(&rtxn).unwrap(); - assert!(!synonyms.is_empty()); // at this point the index should return something - - // Check that we can use synonyms - let result = index.search(&rtxn).query("blini").execute().unwrap(); - assert_eq!(result.documents_ids.len(), 1); - let result = index.search(&rtxn).query("super like").execute().unwrap(); - assert_eq!(result.documents_ids.len(), 1); - let result = index.search(&rtxn).query("puppies").execute().unwrap(); - assert_eq!(result.documents_ids.len(), 2); - - // Reset the synonyms - index - .update_settings(|settings| { - settings.reset_synonyms(); - }) - .unwrap(); - - // Ensure synonyms are reset - let rtxn = index.read_txn().unwrap(); - let synonyms = index.synonyms(&rtxn).unwrap(); - assert!(synonyms.is_empty()); - - // Check that synonyms are no longer work - let result = index.search(&rtxn).query("blini").execute().unwrap(); - assert!(result.documents_ids.is_empty()); - let result = index.search(&rtxn).query("super like").execute().unwrap(); - assert!(result.documents_ids.is_empty()); - let result = index.search(&rtxn).query("puppies").execute().unwrap(); - assert!(result.documents_ids.is_empty()); - } - - #[test] - fn thai_synonyms() { - let index = TempIndex::new(); - - let mut wtxn = index.write_txn().unwrap(); - // Send 3 documents with ids from 1 to 3. - index - .add_documents_using_wtxn( - &mut wtxn, - documents!([ - { "id": 0, "name": "ยี่ปุ่น" }, - { "id": 1, "name": "ญี่ปุ่น" }, - ]), - ) - .unwrap(); - - // In the same transaction provide some synonyms - index - .update_settings_using_wtxn(&mut wtxn, |settings| { - settings.set_synonyms(btreemap! { - "japanese".to_string() => vec![S("ญี่ปุ่น"), S("ยี่ปุ่น")], - }); - }) - .unwrap(); - wtxn.commit().unwrap(); - - // Ensure synonyms are effectively stored - let rtxn = index.read_txn().unwrap(); - let synonyms = index.synonyms(&rtxn).unwrap(); - assert!(!synonyms.is_empty()); // at this point the index should return something - - // Check that we can use synonyms - let result = index.search(&rtxn).query("japanese").execute().unwrap(); - assert_eq!(result.documents_ids.len(), 2); - } - - #[test] - fn setting_searchable_recomputes_other_settings() { - let index = TempIndex::new(); - - // Set all the settings except searchable - index - .update_settings(|settings| { - settings.set_displayed_fields(vec!["hello".to_string()]); - settings.set_filterable_fields(vec![ - FilterableAttributesRule::Field("age".to_string()), - FilterableAttributesRule::Field("toto".to_string()), - ]); - settings.set_criteria(vec![Criterion::Asc(S("toto"))]); - }) - .unwrap(); - - // check the output - let rtxn = index.read_txn().unwrap(); - assert_eq!(&["hello"][..], index.displayed_fields(&rtxn).unwrap().unwrap()); - // since no documents have been pushed the primary key is still unset - assert!(index.primary_key(&rtxn).unwrap().is_none()); - assert_eq!(vec![Criterion::Asc("toto".to_string())], index.criteria(&rtxn).unwrap()); - drop(rtxn); - - // We set toto and age as searchable to force reordering of the fields - index - .update_settings(|settings| { - settings.set_searchable_fields(vec!["toto".to_string(), "age".to_string()]); - }) - .unwrap(); - - let rtxn = index.read_txn().unwrap(); - assert_eq!(&["hello"][..], index.displayed_fields(&rtxn).unwrap().unwrap()); - assert!(index.primary_key(&rtxn).unwrap().is_none()); - assert_eq!(vec![Criterion::Asc("toto".to_string())], index.criteria(&rtxn).unwrap()); - } - - #[test] - fn setting_not_filterable_cant_filter() { - let index = TempIndex::new(); - - // Set all the settings except searchable - index - .update_settings(|settings| { - settings.set_displayed_fields(vec!["hello".to_string()]); - // It is only Asc(toto), there is a facet database but it is denied to filter with toto. - settings.set_criteria(vec![Criterion::Asc(S("toto"))]); - }) - .unwrap(); - - let rtxn = index.read_txn().unwrap(); - let filter = Filter::from_str("toto = 32").unwrap().unwrap(); - let _ = filter.evaluate(&rtxn, &index).unwrap_err(); - } - - #[test] - fn setting_primary_key() { - let index = TempIndex::new(); - - let mut wtxn = index.write_txn().unwrap(); - // Set the primary key settings - index - .update_settings_using_wtxn(&mut wtxn, |settings| { - settings.set_primary_key(S("mykey")); - }) - .unwrap(); - wtxn.commit().unwrap(); - let mut wtxn = index.write_txn().unwrap(); - assert_eq!(index.primary_key(&wtxn).unwrap(), Some("mykey")); - - // Then index some documents with the "mykey" primary key. - index - .add_documents_using_wtxn( - &mut wtxn, - documents!([ - { "mykey": 1, "name": "kevin", "age": 23 }, - { "mykey": 2, "name": "kevina", "age": 21 }, - { "mykey": 3, "name": "benoit", "age": 34 }, - { "mykey": 4, "name": "bernard", "age": 34 }, - { "mykey": 5, "name": "bertrand", "age": 34 }, - { "mykey": 6, "name": "bernie", "age": 34 }, - { "mykey": 7, "name": "ben", "age": 34 } - ]), - ) - .unwrap(); - wtxn.commit().unwrap(); - - // Updating settings with the same primary key should do nothing - let mut wtxn = index.write_txn().unwrap(); - index - .update_settings_using_wtxn(&mut wtxn, |settings| { - settings.set_primary_key(S("mykey")); - }) - .unwrap(); - assert_eq!(index.primary_key(&wtxn).unwrap(), Some("mykey")); - wtxn.commit().unwrap(); - - // Updating the settings with a different (or no) primary key causes an error - let mut wtxn = index.write_txn().unwrap(); - let error = index - .update_settings_using_wtxn(&mut wtxn, |settings| { - settings.reset_primary_key(); - }) - .unwrap_err(); - assert!(matches!(error, Error::UserError(UserError::PrimaryKeyCannotBeChanged(_)))); - wtxn.abort(); - - // But if we clear the database... - let mut wtxn = index.write_txn().unwrap(); - let builder = ClearDocuments::new(&mut wtxn, &index); - builder.execute().unwrap(); - wtxn.commit().unwrap(); - - // ...we can change the primary key - index - .update_settings(|settings| { - settings.set_primary_key(S("myid")); - }) - .unwrap(); - } - - #[test] - fn setting_impact_relevancy() { - let index = TempIndex::new(); - - // Set the genres setting - index - .update_settings(|settings| { - settings.set_filterable_fields(vec![FilterableAttributesRule::Field( - "genres".to_string(), - )]); - }) - .unwrap(); - - index.add_documents(documents!([ - { - "id": 11, - "title": "Star Wars", - "overview": - "Princess Leia is captured and held hostage by the evil Imperial forces in their effort to take over the galactic Empire. Venturesome Luke Skywalker and dashing captain Han Solo team together with the loveable robot duo R2-D2 and C-3PO to rescue the beautiful princess and restore peace and justice in the Empire.", - "genres": ["Adventure", "Action", "Science Fiction"], - "poster": "https://image.tmdb.org/t/p/w500/6FfCtAuVAW8XJjZ7eWeLibRLWTw.jpg", - "release_date": 233366400 - }, - { - "id": 30, - "title": "Magnetic Rose", - "overview": "", - "genres": ["Animation", "Science Fiction"], - "poster": "https://image.tmdb.org/t/p/w500/gSuHDeWemA1menrwfMRChnSmMVN.jpg", - "release_date": 819676800 - } - ])).unwrap(); - - let rtxn = index.read_txn().unwrap(); - let SearchResult { documents_ids, .. } = index.search(&rtxn).query("S").execute().unwrap(); - let first_id = documents_ids[0]; - let documents = index.documents(&rtxn, documents_ids).unwrap(); - let (_, content) = documents.iter().find(|(id, _)| *id == first_id).unwrap(); - - let fid = index.fields_ids_map(&rtxn).unwrap().id("title").unwrap(); - let line = std::str::from_utf8(content.get(fid).unwrap()).unwrap(); - assert_eq!(line, r#""Star Wars""#); - } - - #[test] - fn test_disable_typo() { - let index = TempIndex::new(); - - let mut txn = index.write_txn().unwrap(); - assert!(index.authorize_typos(&txn).unwrap()); - - index - .update_settings_using_wtxn(&mut txn, |settings| { - settings.set_autorize_typos(false); - }) - .unwrap(); - - assert!(!index.authorize_typos(&txn).unwrap()); - } - - #[test] - fn update_min_word_len_for_typo() { - let index = TempIndex::new(); - - // Set the genres setting - index - .update_settings(|settings| { - settings.set_min_word_len_one_typo(8); - settings.set_min_word_len_two_typos(8); - }) - .unwrap(); - - let txn = index.read_txn().unwrap(); - assert_eq!(index.min_word_len_one_typo(&txn).unwrap(), 8); - assert_eq!(index.min_word_len_two_typos(&txn).unwrap(), 8); - - index - .update_settings(|settings| { - settings.reset_min_word_len_one_typo(); - settings.reset_min_word_len_two_typos(); - }) - .unwrap(); - - let txn = index.read_txn().unwrap(); - assert_eq!(index.min_word_len_one_typo(&txn).unwrap(), DEFAULT_MIN_WORD_LEN_ONE_TYPO); - assert_eq!(index.min_word_len_two_typos(&txn).unwrap(), DEFAULT_MIN_WORD_LEN_TWO_TYPOS); - } - - #[test] - fn update_invalid_min_word_len_for_typo() { - let index = TempIndex::new(); - - // Set the genres setting - index - .update_settings(|settings| { - settings.set_min_word_len_one_typo(10); - settings.set_min_word_len_two_typos(7); - }) - .unwrap_err(); - } - - #[test] - fn update_exact_words_normalization() { - let index = TempIndex::new(); - - let mut txn = index.write_txn().unwrap(); - // Set the genres setting - index - .update_settings_using_wtxn(&mut txn, |settings| { - let words = btreeset! { S("Ab"), S("ac") }; - settings.set_exact_words(words); - }) - .unwrap(); - - let exact_words = index.exact_words(&txn).unwrap().unwrap(); - for word in exact_words.into_fst().stream().into_str_vec().unwrap() { - assert!(word.0 == "ac" || word.0 == "ab"); - } - } - - #[test] - fn test_correct_settings_init() { - let index = TempIndex::new(); - - index - .update_settings(|settings| { - // we don't actually update the settings, just check their content - let Settings { - wtxn: _, - index: _, - indexer_config: _, - searchable_fields, - displayed_fields, - filterable_fields, - sortable_fields, - criteria, - stop_words, - non_separator_tokens, - separator_tokens, - dictionary, - distinct_field, - synonyms, - primary_key, - authorize_typos, - min_word_len_two_typos, - min_word_len_one_typo, - exact_words, - exact_attributes, - max_values_per_facet, - sort_facet_values_by, - pagination_max_total_hits, - proximity_precision, - embedder_settings, - search_cutoff, - localized_attributes_rules, - prefix_search, - facet_search, - } = settings; - assert!(matches!(searchable_fields, Setting::NotSet)); - assert!(matches!(displayed_fields, Setting::NotSet)); - assert!(matches!(filterable_fields, Setting::NotSet)); - assert!(matches!(sortable_fields, Setting::NotSet)); - assert!(matches!(criteria, Setting::NotSet)); - assert!(matches!(stop_words, Setting::NotSet)); - assert!(matches!(non_separator_tokens, Setting::NotSet)); - assert!(matches!(separator_tokens, Setting::NotSet)); - assert!(matches!(dictionary, Setting::NotSet)); - assert!(matches!(distinct_field, Setting::NotSet)); - assert!(matches!(synonyms, Setting::NotSet)); - assert!(matches!(primary_key, Setting::NotSet)); - assert!(matches!(authorize_typos, Setting::NotSet)); - assert!(matches!(min_word_len_two_typos, Setting::NotSet)); - assert!(matches!(min_word_len_one_typo, Setting::NotSet)); - assert!(matches!(exact_words, Setting::NotSet)); - assert!(matches!(exact_attributes, Setting::NotSet)); - assert!(matches!(max_values_per_facet, Setting::NotSet)); - assert!(matches!(sort_facet_values_by, Setting::NotSet)); - assert!(matches!(pagination_max_total_hits, Setting::NotSet)); - assert!(matches!(proximity_precision, Setting::NotSet)); - assert!(matches!(embedder_settings, Setting::NotSet)); - assert!(matches!(search_cutoff, Setting::NotSet)); - assert!(matches!(localized_attributes_rules, Setting::NotSet)); - assert!(matches!(prefix_search, Setting::NotSet)); - assert!(matches!(facet_search, Setting::NotSet)); - }) - .unwrap(); - } - - #[test] - fn settings_must_ignore_soft_deleted() { - use serde_json::json; - - let index = TempIndex::new(); - - let mut docs = vec![]; - for i in 0..10 { - docs.push(json!({ "id": i, "title": format!("{:x}", i) })); - } - index.add_documents(documents! { docs }).unwrap(); - - index.delete_documents((0..5).map(|id| id.to_string()).collect()); - - let mut wtxn = index.write_txn().unwrap(); - index - .update_settings_using_wtxn(&mut wtxn, |settings| { - settings.set_searchable_fields(vec!["id".to_string()]); - }) - .unwrap(); - wtxn.commit().unwrap(); - - let rtxn = index.write_txn().unwrap(); - let docs: StdResult, _> = index.all_documents(&rtxn).unwrap().collect(); - let docs = docs.unwrap(); - assert_eq!(docs.len(), 5); } } + +#[cfg(test)] +#[path = "test_settings.rs"] +mod tests; diff --git a/crates/milli/src/update/test_settings.rs b/crates/milli/src/update/test_settings.rs new file mode 100644 index 000000000..00be0476a --- /dev/null +++ b/crates/milli/src/update/test_settings.rs @@ -0,0 +1,956 @@ +use big_s::S; +use heed::types::Bytes; +use maplit::{btreemap, btreeset}; +use meili_snap::snapshot; + +use super::*; +use crate::error::Error; +use crate::index::tests::TempIndex; +use crate::update::ClearDocuments; +use crate::{db_snap, Criterion, Filter, SearchResult}; + +#[test] +fn set_and_reset_searchable_fields() { + let index = TempIndex::new(); + + // First we send 3 documents with ids from 1 to 3. + let mut wtxn = index.write_txn().unwrap(); + + index + .add_documents_using_wtxn( + &mut wtxn, + documents!([ + { "id": 1, "name": "kevin", "age": 23 }, + { "id": 2, "name": "kevina", "age": 21}, + { "id": 3, "name": "benoit", "age": 34 } + ]), + ) + .unwrap(); + + // We change the searchable fields to be the "name" field only. + index + .update_settings_using_wtxn(&mut wtxn, |settings| { + settings.set_searchable_fields(vec!["name".into()]); + }) + .unwrap(); + + wtxn.commit().unwrap(); + + db_snap!(index, fields_ids_map, @r###" + 0 id | + 1 name | + 2 age | + "###); + db_snap!(index, searchable_fields, @r###"["name"]"###); + db_snap!(index, fieldids_weights_map, @r###" + fid weight + 1 0 | + "###); + + // Check that the searchable field is correctly set to "name" only. + let rtxn = index.read_txn().unwrap(); + // When we search for something that is not in + // the searchable fields it must not return any document. + let result = index.search(&rtxn).query("23").execute().unwrap(); + assert_eq!(result.documents_ids, Vec::::new()); + + // When we search for something that is in the searchable fields + // we must find the appropriate document. + let result = index.search(&rtxn).query(r#""kevin""#).execute().unwrap(); + let documents = index.documents(&rtxn, result.documents_ids).unwrap(); + let fid_map = index.fields_ids_map(&rtxn).unwrap(); + assert_eq!(documents.len(), 1); + assert_eq!(documents[0].1.get(fid_map.id("name").unwrap()), Some(&br#""kevin""#[..])); + drop(rtxn); + + // We change the searchable fields to be the "name" field only. + index + .update_settings(|settings| { + settings.reset_searchable_fields(); + }) + .unwrap(); + + db_snap!(index, fields_ids_map, @r###" + 0 id | + 1 name | + 2 age | + "###); + db_snap!(index, searchable_fields, @r###"["id", "name", "age"]"###); + db_snap!(index, fieldids_weights_map, @r###" + fid weight + 0 0 | + 1 0 | + 2 0 | + "###); + + // Check that the searchable field have been reset and documents are found now. + let rtxn = index.read_txn().unwrap(); + let fid_map = index.fields_ids_map(&rtxn).unwrap(); + let user_defined_searchable_fields = index.user_defined_searchable_fields(&rtxn).unwrap(); + snapshot!(format!("{user_defined_searchable_fields:?}"), @"None"); + // the searchable fields should contain all the fields + let searchable_fields = index.searchable_fields(&rtxn).unwrap(); + snapshot!(format!("{searchable_fields:?}"), @r###"["id", "name", "age"]"###); + let result = index.search(&rtxn).query("23").execute().unwrap(); + assert_eq!(result.documents_ids.len(), 1); + let documents = index.documents(&rtxn, result.documents_ids).unwrap(); + assert_eq!(documents[0].1.get(fid_map.id("name").unwrap()), Some(&br#""kevin""#[..])); +} + +#[test] +fn mixup_searchable_with_displayed_fields() { + let index = TempIndex::new(); + + let mut wtxn = index.write_txn().unwrap(); + // First we send 3 documents with ids from 1 to 3. + index + .add_documents_using_wtxn( + &mut wtxn, + documents!([ + { "id": 0, "name": "kevin", "age": 23}, + { "id": 1, "name": "kevina", "age": 21 }, + { "id": 2, "name": "benoit", "age": 34 } + ]), + ) + .unwrap(); + + // In the same transaction we change the displayed fields to be only the "age". + // We also change the searchable fields to be the "name" field only. + index + .update_settings_using_wtxn(&mut wtxn, |settings| { + settings.set_displayed_fields(vec!["age".into()]); + settings.set_searchable_fields(vec!["name".into()]); + }) + .unwrap(); + wtxn.commit().unwrap(); + + // Check that the displayed fields are correctly set to `None` (default value). + let rtxn = index.read_txn().unwrap(); + let fields_ids = index.displayed_fields(&rtxn).unwrap(); + assert_eq!(fields_ids.unwrap(), (&["age"][..])); + drop(rtxn); + + // We change the searchable fields to be the "name" field only. + index + .update_settings(|settings| { + settings.reset_searchable_fields(); + }) + .unwrap(); + + // Check that the displayed fields always contains only the "age" field. + let rtxn = index.read_txn().unwrap(); + let fields_ids = index.displayed_fields(&rtxn).unwrap(); + assert_eq!(fields_ids.unwrap(), &["age"][..]); +} + +#[test] +fn default_displayed_fields() { + let index = TempIndex::new(); + + // First we send 3 documents with ids from 1 to 3. + index + .add_documents(documents!([ + { "id": 0, "name": "kevin", "age": 23}, + { "id": 1, "name": "kevina", "age": 21 }, + { "id": 2, "name": "benoit", "age": 34 } + ])) + .unwrap(); + + // Check that the displayed fields are correctly set to `None` (default value). + let rtxn = index.read_txn().unwrap(); + let fields_ids = index.displayed_fields(&rtxn).unwrap(); + assert_eq!(fields_ids, None); +} + +#[test] +fn set_and_reset_displayed_field() { + let index = TempIndex::new(); + + let mut wtxn = index.write_txn().unwrap(); + index + .add_documents_using_wtxn( + &mut wtxn, + documents!([ + { "id": 0, "name": "kevin", "age": 23}, + { "id": 1, "name": "kevina", "age": 21 }, + { "id": 2, "name": "benoit", "age": 34 } + ]), + ) + .unwrap(); + index + .update_settings_using_wtxn(&mut wtxn, |settings| { + settings.set_displayed_fields(vec!["age".into()]); + }) + .unwrap(); + wtxn.commit().unwrap(); + + // Check that the displayed fields are correctly set to only the "age" field. + let rtxn = index.read_txn().unwrap(); + let fields_ids = index.displayed_fields(&rtxn).unwrap(); + assert_eq!(fields_ids.unwrap(), &["age"][..]); + drop(rtxn); + + // We reset the fields ids to become `None`, the default value. + index + .update_settings(|settings| { + settings.reset_displayed_fields(); + }) + .unwrap(); + + // Check that the displayed fields are correctly set to `None` (default value). + let rtxn = index.read_txn().unwrap(); + let fields_ids = index.displayed_fields(&rtxn).unwrap(); + assert_eq!(fields_ids, None); +} + +#[test] +fn set_filterable_fields() { + let index = TempIndex::new(); + + // Set the filterable fields to be the age. + index + .update_settings(|settings| { + settings.set_filterable_fields(vec![FilterableAttributesRule::Field(S("age"))]); + }) + .unwrap(); + + // Then index some documents. + index + .add_documents(documents!([ + { "id": 0, "name": "kevin", "age": 23}, + { "id": 1, "name": "kevina", "age": 21 }, + { "id": 2, "name": "benoit", "age": 34 } + ])) + .unwrap(); + + // Check that the displayed fields are correctly set. + let rtxn = index.read_txn().unwrap(); + // Only count the field_id 0 and level 0 facet values. + // TODO we must support typed CSVs for numbers to be understood. + let fidmap = index.fields_ids_map(&rtxn).unwrap(); + for document in index.all_documents(&rtxn).unwrap() { + let document = document.unwrap(); + let json = + crate::obkv_to_json(&fidmap.ids().collect::>(), &fidmap, document.1).unwrap(); + println!("json: {:?}", json); + } + let count = index + .facet_id_f64_docids + .remap_key_type::() + // The faceted field id is 2u16 + .prefix_iter(&rtxn, &[0, 2, 0]) + .unwrap() + .count(); + assert_eq!(count, 3); + drop(rtxn); + + // Index a little more documents with new and current facets values. + index + .add_documents(documents!([ + { "id": 3, "name": "kevin2", "age": 23}, + { "id": 4, "name": "kevina2", "age": 21 }, + { "id": 5, "name": "benoit", "age": 35 } + ])) + .unwrap(); + + let rtxn = index.read_txn().unwrap(); + // Only count the field_id 0 and level 0 facet values. + let count = index + .facet_id_f64_docids + .remap_key_type::() + .prefix_iter(&rtxn, &[0, 2, 0]) + .unwrap() + .count(); + assert_eq!(count, 4); + + // Set the filterable fields to be the age and the name. + index + .update_settings(|settings| { + settings.set_filterable_fields(vec![ + FilterableAttributesRule::Field(S("age")), + FilterableAttributesRule::Field(S("name")), + ]); + }) + .unwrap(); + + let rtxn = index.read_txn().unwrap(); + // Only count the field_id 2 and level 0 facet values. + let count = index + .facet_id_f64_docids + .remap_key_type::() + .prefix_iter(&rtxn, &[0, 2, 0]) + .unwrap() + .count(); + assert_eq!(count, 4); + + let rtxn = index.read_txn().unwrap(); + // Only count the field_id 1 and level 0 facet values. + let count = index + .facet_id_string_docids + .remap_key_type::() + .prefix_iter(&rtxn, &[0, 1]) + .unwrap() + .count(); + assert_eq!(count, 5); + + // Remove the age from the filterable fields. + index + .update_settings(|settings| { + settings.set_filterable_fields(vec![FilterableAttributesRule::Field(S("name"))]); + }) + .unwrap(); + + let rtxn = index.read_txn().unwrap(); + // Only count the field_id 2 and level 0 facet values. + let count = index + .facet_id_f64_docids + .remap_key_type::() + .prefix_iter(&rtxn, &[0, 2, 0]) + .unwrap() + .count(); + assert_eq!(count, 0); + + let rtxn = index.read_txn().unwrap(); + // Only count the field_id 1 and level 0 facet values. + let count = index + .facet_id_string_docids + .remap_key_type::() + .prefix_iter(&rtxn, &[0, 1]) + .unwrap() + .count(); + assert_eq!(count, 5); +} + +#[test] +fn set_asc_desc_field() { + let index = TempIndex::new(); + + // Set the filterable fields to be the age. + index + .update_settings(|settings| { + settings.set_displayed_fields(vec![S("name")]); + settings.set_criteria(vec![Criterion::Asc("age".to_owned())]); + }) + .unwrap(); + + // Then index some documents. + index + .add_documents(documents!([ + { "id": 0, "name": "kevin", "age": 23}, + { "id": 1, "name": "kevina", "age": 21 }, + { "id": 2, "name": "benoit", "age": 34 } + ])) + .unwrap(); + + // Run an empty query just to ensure that the search results are ordered. + let rtxn = index.read_txn().unwrap(); + let SearchResult { documents_ids, .. } = index.search(&rtxn).execute().unwrap(); + let documents = index.documents(&rtxn, documents_ids).unwrap(); + + // Fetch the documents "age" field in the ordre in which the documents appear. + let age_field_id = index.fields_ids_map(&rtxn).unwrap().id("age").unwrap(); + let iter = documents.into_iter().map(|(_, doc)| { + let bytes = doc.get(age_field_id).unwrap(); + let string = std::str::from_utf8(bytes).unwrap(); + string.parse::().unwrap() + }); + + assert_eq!(iter.collect::>(), vec![21, 23, 34]); +} + +#[test] +fn set_distinct_field() { + let index = TempIndex::new(); + + // Set the filterable fields to be the age. + index + .update_settings(|settings| { + // Don't display the generated `id` field. + settings.set_displayed_fields(vec![S("name"), S("age")]); + settings.set_distinct_field(S("age")); + }) + .unwrap(); + + // Then index some documents. + index + .add_documents(documents!([ + { "id": 0, "name": "kevin", "age": 23 }, + { "id": 1, "name": "kevina", "age": 21 }, + { "id": 2, "name": "benoit", "age": 34 }, + { "id": 3, "name": "bernard", "age": 34 }, + { "id": 4, "name": "bertrand", "age": 34 }, + { "id": 5, "name": "bernie", "age": 34 }, + { "id": 6, "name": "ben", "age": 34 } + ])) + .unwrap(); + + // Run an empty query just to ensure that the search results are ordered. + let rtxn = index.read_txn().unwrap(); + let SearchResult { documents_ids, .. } = index.search(&rtxn).execute().unwrap(); + + // There must be at least one document with a 34 as the age. + assert_eq!(documents_ids.len(), 3); +} + +#[test] +fn set_nested_distinct_field() { + let index = TempIndex::new(); + + // Set the filterable fields to be the age. + index + .update_settings(|settings| { + // Don't display the generated `id` field. + settings.set_displayed_fields(vec![S("person")]); + settings.set_distinct_field(S("person.age")); + }) + .unwrap(); + + // Then index some documents. + index + .add_documents(documents!([ + { "id": 0, "person": { "name": "kevin", "age": 23 }}, + { "id": 1, "person": { "name": "kevina", "age": 21 }}, + { "id": 2, "person": { "name": "benoit", "age": 34 }}, + { "id": 3, "person": { "name": "bernard", "age": 34 }}, + { "id": 4, "person": { "name": "bertrand", "age": 34 }}, + { "id": 5, "person": { "name": "bernie", "age": 34 }}, + { "id": 6, "person": { "name": "ben", "age": 34 }} + ])) + .unwrap(); + + // Run an empty query just to ensure that the search results are ordered. + let rtxn = index.read_txn().unwrap(); + let SearchResult { documents_ids, .. } = index.search(&rtxn).execute().unwrap(); + + // There must be at least one document with a 34 as the age. + assert_eq!(documents_ids.len(), 3); +} + +#[test] +fn default_stop_words() { + let index = TempIndex::new(); + + // First we send 3 documents with ids from 1 to 3. + index + .add_documents(documents!([ + { "id": 0, "name": "kevin", "age": 23}, + { "id": 1, "name": "kevina", "age": 21 }, + { "id": 2, "name": "benoit", "age": 34 } + ])) + .unwrap(); + + // Ensure there is no stop_words by default + let rtxn = index.read_txn().unwrap(); + let stop_words = index.stop_words(&rtxn).unwrap(); + assert!(stop_words.is_none()); +} + +#[test] +fn set_and_reset_stop_words() { + let index = TempIndex::new(); + + let mut wtxn = index.write_txn().unwrap(); + // First we send 3 documents with ids from 1 to 3. + index + .add_documents_using_wtxn( + &mut wtxn, + documents!([ + { "id": 0, "name": "kevin", "age": 23, "maxim": "I love dogs" }, + { "id": 1, "name": "kevina", "age": 21, "maxim": "Doggos are the best" }, + { "id": 2, "name": "benoit", "age": 34, "maxim": "The crepes are really good" }, + ]), + ) + .unwrap(); + + // In the same transaction we provide some stop_words + let set = btreeset! { "i".to_string(), "the".to_string(), "are".to_string() }; + index + .update_settings_using_wtxn(&mut wtxn, |settings| { + settings.set_stop_words(set.clone()); + }) + .unwrap(); + + wtxn.commit().unwrap(); + + // Ensure stop_words are effectively stored + let rtxn = index.read_txn().unwrap(); + let stop_words = index.stop_words(&rtxn).unwrap(); + assert!(stop_words.is_some()); // at this point the index should return something + + let stop_words = stop_words.unwrap(); + let expected = fst::Set::from_iter(&set).unwrap(); + assert_eq!(stop_words.as_fst().as_bytes(), expected.as_fst().as_bytes()); + + // when we search for something that is a non prefix stop_words it should be ignored + // thus we should get a placeholder search (all the results = 3) + let result = index.search(&rtxn).query("the ").execute().unwrap(); + assert_eq!(result.documents_ids.len(), 3); + let result = index.search(&rtxn).query("i ").execute().unwrap(); + assert_eq!(result.documents_ids.len(), 3); + let result = index.search(&rtxn).query("are ").execute().unwrap(); + assert_eq!(result.documents_ids.len(), 3); + + let result = index.search(&rtxn).query("dog").execute().unwrap(); + assert_eq!(result.documents_ids.len(), 2); // we have two maxims talking about doggos + let result = index.search(&rtxn).query("benoît").execute().unwrap(); + assert_eq!(result.documents_ids.len(), 1); // there is one benoit in our data + + // now we'll reset the stop_words and ensure it's None + index + .update_settings(|settings| { + settings.reset_stop_words(); + }) + .unwrap(); + + let rtxn = index.read_txn().unwrap(); + let stop_words = index.stop_words(&rtxn).unwrap(); + assert!(stop_words.is_none()); + + // now we can search for the stop words + let result = index.search(&rtxn).query("the").execute().unwrap(); + assert_eq!(result.documents_ids.len(), 2); + let result = index.search(&rtxn).query("i").execute().unwrap(); + assert_eq!(result.documents_ids.len(), 1); + let result = index.search(&rtxn).query("are").execute().unwrap(); + assert_eq!(result.documents_ids.len(), 2); + + // the rest of the search is still not impacted + let result = index.search(&rtxn).query("dog").execute().unwrap(); + assert_eq!(result.documents_ids.len(), 2); // we have two maxims talking about doggos + let result = index.search(&rtxn).query("benoît").execute().unwrap(); + assert_eq!(result.documents_ids.len(), 1); // there is one benoit in our data +} + +#[test] +fn set_and_reset_synonyms() { + let index = TempIndex::new(); + + let mut wtxn = index.write_txn().unwrap(); + // Send 3 documents with ids from 1 to 3. + index + .add_documents_using_wtxn( + &mut wtxn, + documents!([ + { "id": 0, "name": "kevin", "age": 23, "maxim": "I love dogs"}, + { "id": 1, "name": "kevina", "age": 21, "maxim": "Doggos are the best"}, + { "id": 2, "name": "benoit", "age": 34, "maxim": "The crepes are really good"}, + ]), + ) + .unwrap(); + + // In the same transaction provide some synonyms + index + .update_settings_using_wtxn(&mut wtxn, |settings| { + settings.set_synonyms(btreemap! { + "blini".to_string() => vec!["crepes".to_string()], + "super like".to_string() => vec!["love".to_string()], + "puppies".to_string() => vec!["dogs".to_string(), "doggos".to_string()] + }); + }) + .unwrap(); + wtxn.commit().unwrap(); + + // Ensure synonyms are effectively stored + let rtxn = index.read_txn().unwrap(); + let synonyms = index.synonyms(&rtxn).unwrap(); + assert!(!synonyms.is_empty()); // at this point the index should return something + + // Check that we can use synonyms + let result = index.search(&rtxn).query("blini").execute().unwrap(); + assert_eq!(result.documents_ids.len(), 1); + let result = index.search(&rtxn).query("super like").execute().unwrap(); + assert_eq!(result.documents_ids.len(), 1); + let result = index.search(&rtxn).query("puppies").execute().unwrap(); + assert_eq!(result.documents_ids.len(), 2); + + // Reset the synonyms + index + .update_settings(|settings| { + settings.reset_synonyms(); + }) + .unwrap(); + + // Ensure synonyms are reset + let rtxn = index.read_txn().unwrap(); + let synonyms = index.synonyms(&rtxn).unwrap(); + assert!(synonyms.is_empty()); + + // Check that synonyms are no longer work + let result = index.search(&rtxn).query("blini").execute().unwrap(); + assert!(result.documents_ids.is_empty()); + let result = index.search(&rtxn).query("super like").execute().unwrap(); + assert!(result.documents_ids.is_empty()); + let result = index.search(&rtxn).query("puppies").execute().unwrap(); + assert!(result.documents_ids.is_empty()); +} + +#[test] +fn thai_synonyms() { + let index = TempIndex::new(); + + let mut wtxn = index.write_txn().unwrap(); + // Send 3 documents with ids from 1 to 3. + index + .add_documents_using_wtxn( + &mut wtxn, + documents!([ + { "id": 0, "name": "ยี่ปุ่น" }, + { "id": 1, "name": "ญี่ปุ่น" }, + ]), + ) + .unwrap(); + + // In the same transaction provide some synonyms + index + .update_settings_using_wtxn(&mut wtxn, |settings| { + settings.set_synonyms(btreemap! { + "japanese".to_string() => vec![S("ญี่ปุ่น"), S("ยี่ปุ่น")], + }); + }) + .unwrap(); + wtxn.commit().unwrap(); + + // Ensure synonyms are effectively stored + let rtxn = index.read_txn().unwrap(); + let synonyms = index.synonyms(&rtxn).unwrap(); + assert!(!synonyms.is_empty()); // at this point the index should return something + + // Check that we can use synonyms + let result = index.search(&rtxn).query("japanese").execute().unwrap(); + assert_eq!(result.documents_ids.len(), 2); +} + +#[test] +fn setting_searchable_recomputes_other_settings() { + let index = TempIndex::new(); + + // Set all the settings except searchable + index + .update_settings(|settings| { + settings.set_displayed_fields(vec!["hello".to_string()]); + settings.set_filterable_fields(vec![ + FilterableAttributesRule::Field(S("age")), + FilterableAttributesRule::Field(S("toto")), + ]); + settings.set_criteria(vec![Criterion::Asc(S("toto"))]); + }) + .unwrap(); + + // check the output + let rtxn = index.read_txn().unwrap(); + assert_eq!(&["hello"][..], index.displayed_fields(&rtxn).unwrap().unwrap()); + // since no documents have been pushed the primary key is still unset + assert!(index.primary_key(&rtxn).unwrap().is_none()); + assert_eq!(vec![Criterion::Asc("toto".to_string())], index.criteria(&rtxn).unwrap()); + drop(rtxn); + + // We set toto and age as searchable to force reordering of the fields + index + .update_settings(|settings| { + settings.set_searchable_fields(vec!["toto".to_string(), "age".to_string()]); + }) + .unwrap(); + + let rtxn = index.read_txn().unwrap(); + assert_eq!(&["hello"][..], index.displayed_fields(&rtxn).unwrap().unwrap()); + assert!(index.primary_key(&rtxn).unwrap().is_none()); + assert_eq!(vec![Criterion::Asc("toto".to_string())], index.criteria(&rtxn).unwrap()); +} + +#[test] +fn setting_not_filterable_cant_filter() { + let index = TempIndex::new(); + + // Set all the settings except searchable + index + .update_settings(|settings| { + settings.set_displayed_fields(vec!["hello".to_string()]); + // It is only Asc(toto), there is a facet database but it is denied to filter with toto. + settings.set_criteria(vec![Criterion::Asc(S("toto"))]); + }) + .unwrap(); + + let rtxn = index.read_txn().unwrap(); + let filter = Filter::from_str("toto = 32").unwrap().unwrap(); + let _ = filter.evaluate(&rtxn, &index).unwrap_err(); +} + +#[test] +fn setting_primary_key() { + let index = TempIndex::new(); + + let mut wtxn = index.write_txn().unwrap(); + // Set the primary key settings + index + .update_settings_using_wtxn(&mut wtxn, |settings| { + settings.set_primary_key(S("mykey")); + }) + .unwrap(); + wtxn.commit().unwrap(); + let mut wtxn = index.write_txn().unwrap(); + assert_eq!(index.primary_key(&wtxn).unwrap(), Some("mykey")); + + // Then index some documents with the "mykey" primary key. + index + .add_documents_using_wtxn( + &mut wtxn, + documents!([ + { "mykey": 1, "name": "kevin", "age": 23 }, + { "mykey": 2, "name": "kevina", "age": 21 }, + { "mykey": 3, "name": "benoit", "age": 34 }, + { "mykey": 4, "name": "bernard", "age": 34 }, + { "mykey": 5, "name": "bertrand", "age": 34 }, + { "mykey": 6, "name": "bernie", "age": 34 }, + { "mykey": 7, "name": "ben", "age": 34 } + ]), + ) + .unwrap(); + wtxn.commit().unwrap(); + + // Updating settings with the same primary key should do nothing + let mut wtxn = index.write_txn().unwrap(); + index + .update_settings_using_wtxn(&mut wtxn, |settings| { + settings.set_primary_key(S("mykey")); + }) + .unwrap(); + assert_eq!(index.primary_key(&wtxn).unwrap(), Some("mykey")); + wtxn.commit().unwrap(); + + // Updating the settings with a different (or no) primary key causes an error + let mut wtxn = index.write_txn().unwrap(); + let error = index + .update_settings_using_wtxn(&mut wtxn, |settings| { + settings.reset_primary_key(); + }) + .unwrap_err(); + assert!(matches!(error, Error::UserError(UserError::PrimaryKeyCannotBeChanged(_)))); + wtxn.abort(); + + // But if we clear the database... + let mut wtxn = index.write_txn().unwrap(); + let builder = ClearDocuments::new(&mut wtxn, &index); + builder.execute().unwrap(); + wtxn.commit().unwrap(); + + // ...we can change the primary key + index + .update_settings(|settings| { + settings.set_primary_key(S("myid")); + }) + .unwrap(); +} + +#[test] +fn setting_impact_relevancy() { + let index = TempIndex::new(); + + // Set the genres setting + index + .update_settings(|settings| { + settings.set_filterable_fields(vec![FilterableAttributesRule::Field(S("genres"))]); + }) + .unwrap(); + + index.add_documents(documents!([ + { + "id": 11, + "title": "Star Wars", + "overview": + "Princess Leia is captured and held hostage by the evil Imperial forces in their effort to take over the galactic Empire. Venturesome Luke Skywalker and dashing captain Han Solo team together with the loveable robot duo R2-D2 and C-3PO to rescue the beautiful princess and restore peace and justice in the Empire.", + "genres": ["Adventure", "Action", "Science Fiction"], + "poster": "https://image.tmdb.org/t/p/w500/6FfCtAuVAW8XJjZ7eWeLibRLWTw.jpg", + "release_date": 233366400 + }, + { + "id": 30, + "title": "Magnetic Rose", + "overview": "", + "genres": ["Animation", "Science Fiction"], + "poster": "https://image.tmdb.org/t/p/w500/gSuHDeWemA1menrwfMRChnSmMVN.jpg", + "release_date": 819676800 + } + ])).unwrap(); + + let rtxn = index.read_txn().unwrap(); + let SearchResult { documents_ids, .. } = index.search(&rtxn).query("S").execute().unwrap(); + let first_id = documents_ids[0]; + let documents = index.documents(&rtxn, documents_ids).unwrap(); + let (_, content) = documents.iter().find(|(id, _)| *id == first_id).unwrap(); + + let fid = index.fields_ids_map(&rtxn).unwrap().id("title").unwrap(); + let line = std::str::from_utf8(content.get(fid).unwrap()).unwrap(); + assert_eq!(line, r#""Star Wars""#); +} + +#[test] +fn test_disable_typo() { + let index = TempIndex::new(); + + let mut txn = index.write_txn().unwrap(); + assert!(index.authorize_typos(&txn).unwrap()); + + index + .update_settings_using_wtxn(&mut txn, |settings| { + settings.set_autorize_typos(false); + }) + .unwrap(); + + assert!(!index.authorize_typos(&txn).unwrap()); +} + +#[test] +fn update_min_word_len_for_typo() { + let index = TempIndex::new(); + + // Set the genres setting + index + .update_settings(|settings| { + settings.set_min_word_len_one_typo(8); + settings.set_min_word_len_two_typos(8); + }) + .unwrap(); + + let txn = index.read_txn().unwrap(); + assert_eq!(index.min_word_len_one_typo(&txn).unwrap(), 8); + assert_eq!(index.min_word_len_two_typos(&txn).unwrap(), 8); + + index + .update_settings(|settings| { + settings.reset_min_word_len_one_typo(); + settings.reset_min_word_len_two_typos(); + }) + .unwrap(); + + let txn = index.read_txn().unwrap(); + assert_eq!(index.min_word_len_one_typo(&txn).unwrap(), DEFAULT_MIN_WORD_LEN_ONE_TYPO); + assert_eq!(index.min_word_len_two_typos(&txn).unwrap(), DEFAULT_MIN_WORD_LEN_TWO_TYPOS); +} + +#[test] +fn update_invalid_min_word_len_for_typo() { + let index = TempIndex::new(); + + // Set the genres setting + index + .update_settings(|settings| { + settings.set_min_word_len_one_typo(10); + settings.set_min_word_len_two_typos(7); + }) + .unwrap_err(); +} + +#[test] +fn update_exact_words_normalization() { + let index = TempIndex::new(); + + let mut txn = index.write_txn().unwrap(); + // Set the genres setting + index + .update_settings_using_wtxn(&mut txn, |settings| { + let words = btreeset! { S("Ab"), S("ac") }; + settings.set_exact_words(words); + }) + .unwrap(); + + let exact_words = index.exact_words(&txn).unwrap().unwrap(); + for word in exact_words.into_fst().stream().into_str_vec().unwrap() { + assert!(word.0 == "ac" || word.0 == "ab"); + } +} + +#[test] +fn test_correct_settings_init() { + let index = TempIndex::new(); + + index + .update_settings(|settings| { + // we don't actually update the settings, just check their content + let Settings { + wtxn: _, + index: _, + indexer_config: _, + searchable_fields, + displayed_fields, + filterable_fields, + sortable_fields, + criteria, + stop_words, + non_separator_tokens, + separator_tokens, + dictionary, + distinct_field, + synonyms, + primary_key, + authorize_typos, + min_word_len_two_typos, + min_word_len_one_typo, + exact_words, + exact_attributes, + max_values_per_facet, + sort_facet_values_by, + pagination_max_total_hits, + proximity_precision, + embedder_settings, + search_cutoff, + localized_attributes_rules, + prefix_search, + facet_search, + } = settings; + assert!(matches!(searchable_fields, Setting::NotSet)); + assert!(matches!(displayed_fields, Setting::NotSet)); + assert!(matches!(filterable_fields, Setting::NotSet)); + assert!(matches!(sortable_fields, Setting::NotSet)); + assert!(matches!(criteria, Setting::NotSet)); + assert!(matches!(stop_words, Setting::NotSet)); + assert!(matches!(non_separator_tokens, Setting::NotSet)); + assert!(matches!(separator_tokens, Setting::NotSet)); + assert!(matches!(dictionary, Setting::NotSet)); + assert!(matches!(distinct_field, Setting::NotSet)); + assert!(matches!(synonyms, Setting::NotSet)); + assert!(matches!(primary_key, Setting::NotSet)); + assert!(matches!(authorize_typos, Setting::NotSet)); + assert!(matches!(min_word_len_two_typos, Setting::NotSet)); + assert!(matches!(min_word_len_one_typo, Setting::NotSet)); + assert!(matches!(exact_words, Setting::NotSet)); + assert!(matches!(exact_attributes, Setting::NotSet)); + assert!(matches!(max_values_per_facet, Setting::NotSet)); + assert!(matches!(sort_facet_values_by, Setting::NotSet)); + assert!(matches!(pagination_max_total_hits, Setting::NotSet)); + assert!(matches!(proximity_precision, Setting::NotSet)); + assert!(matches!(embedder_settings, Setting::NotSet)); + assert!(matches!(search_cutoff, Setting::NotSet)); + assert!(matches!(localized_attributes_rules, Setting::NotSet)); + assert!(matches!(prefix_search, Setting::NotSet)); + assert!(matches!(facet_search, Setting::NotSet)); + }) + .unwrap(); +} + +#[test] +fn settings_must_ignore_soft_deleted() { + use serde_json::json; + + let index = TempIndex::new(); + + let mut docs = vec![]; + for i in 0..10 { + docs.push(json!({ "id": i, "title": format!("{:x}", i) })); + } + index.add_documents(documents! { docs }).unwrap(); + + index.delete_documents((0..5).map(|id| id.to_string()).collect()); + + let mut wtxn = index.write_txn().unwrap(); + index + .update_settings_using_wtxn(&mut wtxn, |settings| { + settings.set_searchable_fields(vec!["id".to_string()]); + }) + .unwrap(); + wtxn.commit().unwrap(); + + let rtxn = index.write_txn().unwrap(); + let docs: StdResult, _> = index.all_documents(&rtxn).unwrap().collect(); + let docs = docs.unwrap(); + assert_eq!(docs.len(), 5); +} diff --git a/crates/milli/src/update/upgrade/mod.rs b/crates/milli/src/update/upgrade/mod.rs index 0ed67f2cb..98cad3dad 100644 --- a/crates/milli/src/update/upgrade/mod.rs +++ b/crates/milli/src/update/upgrade/mod.rs @@ -39,9 +39,8 @@ pub fn upgrade( (1, 12, 0..=2) => 0, (1, 12, 3..) => 1, (1, 13, 0) => 2, - (1, 13, 1) => 3, // We must handle the current version in the match because in case of a failure some index may have been upgraded but not other. - (1, 13, _) => return Ok(false), + (1, 13, _) => 3, (major, minor, patch) => { return Err(InternalError::CannotUpgradeToVersion(major, minor, patch).into()) } diff --git a/crates/milli/src/vector/composite.rs b/crates/milli/src/vector/composite.rs new file mode 100644 index 000000000..d174232bf --- /dev/null +++ b/crates/milli/src/vector/composite.rs @@ -0,0 +1,280 @@ +use std::time::Instant; + +use arroy::Distance; + +use super::error::CompositeEmbedderContainsHuggingFace; +use super::{ + hf, manual, ollama, openai, rest, DistributionShift, EmbedError, Embedding, NewEmbedderError, +}; +use crate::ThreadPoolNoAbort; + +#[derive(Debug)] +pub enum SubEmbedder { + /// An embedder based on running local models, fetched from the Hugging Face Hub. + HuggingFace(hf::Embedder), + /// An embedder based on making embedding queries against the OpenAI API. + OpenAi(openai::Embedder), + /// An embedder based on the user providing the embeddings in the documents and queries. + UserProvided(manual::Embedder), + /// An embedder based on making embedding queries against an embedding server. + Ollama(ollama::Embedder), + /// An embedder based on making embedding queries against a generic JSON/REST embedding server. + Rest(rest::Embedder), +} + +#[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)] +pub enum SubEmbedderOptions { + HuggingFace(hf::EmbedderOptions), + OpenAi(openai::EmbedderOptions), + Ollama(ollama::EmbedderOptions), + UserProvided(manual::EmbedderOptions), + Rest(rest::EmbedderOptions), +} + +impl SubEmbedderOptions { + pub fn distribution(&self) -> Option { + match self { + SubEmbedderOptions::HuggingFace(embedder_options) => embedder_options.distribution, + SubEmbedderOptions::OpenAi(embedder_options) => embedder_options.distribution, + SubEmbedderOptions::Ollama(embedder_options) => embedder_options.distribution, + SubEmbedderOptions::UserProvided(embedder_options) => embedder_options.distribution, + SubEmbedderOptions::Rest(embedder_options) => embedder_options.distribution, + } + } +} + +#[derive(Debug)] +pub struct Embedder { + pub(super) search: SubEmbedder, + pub(super) index: SubEmbedder, +} + +#[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)] +pub struct EmbedderOptions { + pub search: SubEmbedderOptions, + pub index: SubEmbedderOptions, +} + +impl Embedder { + pub fn new( + EmbedderOptions { search, index }: EmbedderOptions, + ) -> Result { + let search = SubEmbedder::new(search)?; + let index = SubEmbedder::new(index)?; + + // check dimensions + if search.dimensions() != index.dimensions() { + return Err(NewEmbedderError::composite_dimensions_mismatch( + search.dimensions(), + index.dimensions(), + )); + } + // check similarity + let search_embeddings = search + .embed( + vec![ + "test".into(), + "a brave dog".into(), + "This is a sample text. It is meant to compare similarity.".into(), + ], + None, + ) + .map_err(|error| NewEmbedderError::composite_test_embedding_failed(error, "search"))?; + + let index_embeddings = index + .embed( + vec![ + "test".into(), + "a brave dog".into(), + "This is a sample text. It is meant to compare similarity.".into(), + ], + None, + ) + .map_err(|error| { + NewEmbedderError::composite_test_embedding_failed(error, "indexing") + })?; + + let hint = configuration_hint(&search, &index); + + check_similarity(search_embeddings, index_embeddings, hint)?; + + Ok(Self { search, index }) + } + + /// Indicates the dimensions of a single embedding produced by the embedder. + pub fn dimensions(&self) -> usize { + // can use the dimensions of any embedder since they should match + self.index.dimensions() + } + + /// An optional distribution used to apply an affine transformation to the similarity score of a document. + pub fn distribution(&self) -> Option { + // 3 cases here: + // 1. distribution provided by user => use that one, which was stored in search + // 2. no user-provided distribution, distribution in search embedder => use that one + // 2. no user-provided distribution, no distribution in search embedder => use the distribution in indexing embedder + self.search.distribution().or_else(|| self.index.distribution()) + } +} + +impl SubEmbedder { + pub fn new(options: SubEmbedderOptions) -> std::result::Result { + Ok(match options { + SubEmbedderOptions::HuggingFace(options) => { + Self::HuggingFace(hf::Embedder::new(options)?) + } + SubEmbedderOptions::OpenAi(options) => Self::OpenAi(openai::Embedder::new(options)?), + SubEmbedderOptions::Ollama(options) => Self::Ollama(ollama::Embedder::new(options)?), + SubEmbedderOptions::UserProvided(options) => { + Self::UserProvided(manual::Embedder::new(options)) + } + SubEmbedderOptions::Rest(options) => { + Self::Rest(rest::Embedder::new(options, rest::ConfigurationSource::User)?) + } + }) + } + + pub fn embed( + &self, + texts: Vec, + deadline: Option, + ) -> std::result::Result, EmbedError> { + match self { + SubEmbedder::HuggingFace(embedder) => embedder.embed(texts), + SubEmbedder::OpenAi(embedder) => embedder.embed(&texts, deadline), + SubEmbedder::Ollama(embedder) => embedder.embed(&texts, deadline), + SubEmbedder::UserProvided(embedder) => embedder.embed(&texts), + SubEmbedder::Rest(embedder) => embedder.embed(texts, deadline), + } + } + + /// Embed multiple chunks of texts. + /// + /// Each chunk is composed of one or multiple texts. + pub fn embed_index( + &self, + text_chunks: Vec>, + threads: &ThreadPoolNoAbort, + ) -> std::result::Result>, EmbedError> { + match self { + SubEmbedder::HuggingFace(embedder) => embedder.embed_index(text_chunks), + SubEmbedder::OpenAi(embedder) => embedder.embed_index(text_chunks, threads), + SubEmbedder::Ollama(embedder) => embedder.embed_index(text_chunks, threads), + SubEmbedder::UserProvided(embedder) => embedder.embed_index(text_chunks), + SubEmbedder::Rest(embedder) => embedder.embed_index(text_chunks, threads), + } + } + + /// Non-owning variant of [`Self::embed_index`]. + pub fn embed_index_ref( + &self, + texts: &[&str], + threads: &ThreadPoolNoAbort, + ) -> std::result::Result, EmbedError> { + match self { + SubEmbedder::HuggingFace(embedder) => embedder.embed_index_ref(texts), + SubEmbedder::OpenAi(embedder) => embedder.embed_index_ref(texts, threads), + SubEmbedder::Ollama(embedder) => embedder.embed_index_ref(texts, threads), + SubEmbedder::UserProvided(embedder) => embedder.embed_index_ref(texts), + SubEmbedder::Rest(embedder) => embedder.embed_index_ref(texts, threads), + } + } + + /// Indicates the preferred number of chunks to pass to [`Self::embed_chunks`] + pub fn chunk_count_hint(&self) -> usize { + match self { + SubEmbedder::HuggingFace(embedder) => embedder.chunk_count_hint(), + SubEmbedder::OpenAi(embedder) => embedder.chunk_count_hint(), + SubEmbedder::Ollama(embedder) => embedder.chunk_count_hint(), + SubEmbedder::UserProvided(_) => 100, + SubEmbedder::Rest(embedder) => embedder.chunk_count_hint(), + } + } + + /// Indicates the preferred number of texts in a single chunk passed to [`Self::embed`] + pub fn prompt_count_in_chunk_hint(&self) -> usize { + match self { + SubEmbedder::HuggingFace(embedder) => embedder.prompt_count_in_chunk_hint(), + SubEmbedder::OpenAi(embedder) => embedder.prompt_count_in_chunk_hint(), + SubEmbedder::Ollama(embedder) => embedder.prompt_count_in_chunk_hint(), + SubEmbedder::UserProvided(_) => 1, + SubEmbedder::Rest(embedder) => embedder.prompt_count_in_chunk_hint(), + } + } + + pub fn uses_document_template(&self) -> bool { + match self { + SubEmbedder::HuggingFace(_) + | SubEmbedder::OpenAi(_) + | SubEmbedder::Ollama(_) + | SubEmbedder::Rest(_) => true, + SubEmbedder::UserProvided(_) => false, + } + } + + /// Indicates the dimensions of a single embedding produced by the embedder. + pub fn dimensions(&self) -> usize { + match self { + SubEmbedder::HuggingFace(embedder) => embedder.dimensions(), + SubEmbedder::OpenAi(embedder) => embedder.dimensions(), + SubEmbedder::Ollama(embedder) => embedder.dimensions(), + SubEmbedder::UserProvided(embedder) => embedder.dimensions(), + SubEmbedder::Rest(embedder) => embedder.dimensions(), + } + } + + /// An optional distribution used to apply an affine transformation to the similarity score of a document. + pub fn distribution(&self) -> Option { + match self { + SubEmbedder::HuggingFace(embedder) => embedder.distribution(), + SubEmbedder::OpenAi(embedder) => embedder.distribution(), + SubEmbedder::Ollama(embedder) => embedder.distribution(), + SubEmbedder::UserProvided(embedder) => embedder.distribution(), + SubEmbedder::Rest(embedder) => embedder.distribution(), + } + } +} + +fn check_similarity( + left: Vec, + right: Vec, + hint: CompositeEmbedderContainsHuggingFace, +) -> Result<(), NewEmbedderError> { + if left.len() != right.len() { + return Err(NewEmbedderError::composite_embedding_count_mismatch(left.len(), right.len())); + } + + for (left, right) in left.into_iter().zip(right) { + let left = arroy::internals::UnalignedVector::from_slice(&left); + let right = arroy::internals::UnalignedVector::from_slice(&right); + let left = arroy::internals::Leaf { + header: arroy::distances::Cosine::new_header(&left), + vector: left, + }; + let right = arroy::internals::Leaf { + header: arroy::distances::Cosine::new_header(&right), + vector: right, + }; + + let distance = arroy::distances::Cosine::built_distance(&left, &right); + + if distance > super::MAX_COMPOSITE_DISTANCE { + return Err(NewEmbedderError::composite_embedding_value_mismatch(distance, hint)); + } + } + Ok(()) +} + +fn configuration_hint( + search: &SubEmbedder, + index: &SubEmbedder, +) -> CompositeEmbedderContainsHuggingFace { + match (search, index) { + (SubEmbedder::HuggingFace(_), SubEmbedder::HuggingFace(_)) => { + CompositeEmbedderContainsHuggingFace::Both + } + (SubEmbedder::HuggingFace(_), _) => CompositeEmbedderContainsHuggingFace::Search, + (_, SubEmbedder::HuggingFace(_)) => CompositeEmbedderContainsHuggingFace::Indexing, + _ => CompositeEmbedderContainsHuggingFace::None, + } +} diff --git a/crates/milli/src/vector/error.rs b/crates/milli/src/vector/error.rs index 650249bff..685022de8 100644 --- a/crates/milli/src/vector/error.rs +++ b/crates/milli/src/vector/error.rs @@ -6,6 +6,7 @@ use hf_hub::api::sync::ApiError; use super::parsed_vectors::ParsedVectorsDiff; use super::rest::ConfigurationSource; +use super::MAX_COMPOSITE_DISTANCE; use crate::error::FaultSource; use crate::update::new::vector_document::VectorDocument; use crate::{FieldDistribution, PanicCatched}; @@ -335,6 +336,77 @@ impl NewEmbedderError { pub(crate) fn ollama_unsupported_url(url: String) -> NewEmbedderError { Self { kind: NewEmbedderErrorKind::OllamaUnsupportedUrl(url), fault: FaultSource::User } } + + pub(crate) fn composite_dimensions_mismatch( + search_dimensions: usize, + index_dimensions: usize, + ) -> NewEmbedderError { + Self { + kind: NewEmbedderErrorKind::CompositeDimensionsMismatch { + search_dimensions, + index_dimensions, + }, + fault: FaultSource::User, + } + } + + pub(crate) fn composite_test_embedding_failed( + inner: EmbedError, + failing_embedder: &'static str, + ) -> NewEmbedderError { + Self { + kind: NewEmbedderErrorKind::CompositeTestEmbeddingFailed { inner, failing_embedder }, + fault: FaultSource::Runtime, + } + } + + pub(crate) fn composite_embedding_count_mismatch( + search_count: usize, + index_count: usize, + ) -> NewEmbedderError { + Self { + kind: NewEmbedderErrorKind::CompositeEmbeddingCountMismatch { + search_count, + index_count, + }, + fault: FaultSource::Runtime, + } + } + + pub(crate) fn composite_embedding_value_mismatch( + distance: f32, + hint: CompositeEmbedderContainsHuggingFace, + ) -> NewEmbedderError { + Self { + kind: NewEmbedderErrorKind::CompositeEmbeddingValueMismatch { distance, hint }, + fault: FaultSource::User, + } + } +} + +#[derive(Debug, Clone, Copy)] +pub enum CompositeEmbedderContainsHuggingFace { + Both, + Search, + Indexing, + None, +} + +impl std::fmt::Display for CompositeEmbedderContainsHuggingFace { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + CompositeEmbedderContainsHuggingFace::Both => f.write_str( + "\n - Make sure the `model`, `revision` and `pooling` of both embedders match.", + ), + CompositeEmbedderContainsHuggingFace::Search => f.write_str( + "\n - Consider trying a different `pooling` method for the search embedder.", + ), + CompositeEmbedderContainsHuggingFace::Indexing => f.write_str( + "\n - Consider trying a different `pooling` method for the indexing embedder.", + ), + CompositeEmbedderContainsHuggingFace::None => Ok(()), + } + } } #[derive(Debug, thiserror::Error)] @@ -419,6 +491,14 @@ pub enum NewEmbedderErrorKind { CouldNotParseTemplate(String), #[error("unsupported Ollama URL.\n - For `ollama` sources, the URL must end with `/api/embed` or `/api/embeddings`\n - Got `{0}`")] OllamaUnsupportedUrl(String), + #[error("error while generating test embeddings.\n - the dimensions of embeddings produced at search time and at indexing time don't match.\n - Search time dimensions: {search_dimensions}\n - Indexing time dimensions: {index_dimensions}\n - Note: Dimensions of embeddings produced by both embedders are required to match.")] + CompositeDimensionsMismatch { search_dimensions: usize, index_dimensions: usize }, + #[error("error while generating test embeddings.\n - could not generate test embedding with embedder at {failing_embedder} time.\n - Embedding failed with {inner}")] + CompositeTestEmbeddingFailed { inner: EmbedError, failing_embedder: &'static str }, + #[error("error while generating test embeddings.\n - the number of generated embeddings differs.\n - {search_count} embeddings for the search time embedder.\n - {index_count} embeddings for the indexing time embedder.")] + CompositeEmbeddingCountMismatch { search_count: usize, index_count: usize }, + #[error("error while generating test embeddings.\n - the embeddings produced at search time and indexing time are not similar enough.\n - angular distance {distance:.2}\n - Meilisearch requires a maximum distance of {MAX_COMPOSITE_DISTANCE}.\n - Note: check that both embedders produce similar embeddings.{hint}")] + CompositeEmbeddingValueMismatch { distance: f32, hint: CompositeEmbedderContainsHuggingFace }, } pub struct PossibleEmbeddingMistakes { diff --git a/crates/milli/src/vector/hf.rs b/crates/milli/src/vector/hf.rs index b01a66255..60e40e367 100644 --- a/crates/milli/src/vector/hf.rs +++ b/crates/milli/src/vector/hf.rs @@ -255,34 +255,8 @@ impl Embedder { Ok(this) } - pub fn embed(&self, mut texts: Vec) -> std::result::Result, EmbedError> { - let tokens = match texts.len() { - 1 => vec![self - .tokenizer - .encode(texts.pop().unwrap(), true) - .map_err(EmbedError::tokenize)?], - _ => self.tokenizer.encode_batch(texts, true).map_err(EmbedError::tokenize)?, - }; - let token_ids = tokens - .iter() - .map(|tokens| { - let mut tokens = tokens.get_ids().to_vec(); - tokens.truncate(512); - Tensor::new(tokens.as_slice(), &self.model.device).map_err(EmbedError::tensor_shape) - }) - .collect::, EmbedError>>()?; - - let token_ids = Tensor::stack(&token_ids, 0).map_err(EmbedError::tensor_shape)?; - let token_type_ids = token_ids.zeros_like().map_err(EmbedError::tensor_shape)?; - let embeddings = self - .model - .forward(&token_ids, &token_type_ids, None) - .map_err(EmbedError::model_forward)?; - - let embeddings = Self::pooling(embeddings, self.pooling)?; - - let embeddings: Vec = embeddings.to_vec2().map_err(EmbedError::tensor_shape)?; - Ok(embeddings) + pub fn embed(&self, texts: Vec) -> std::result::Result, EmbedError> { + texts.into_iter().map(|text| self.embed_one(&text)).collect() } fn pooling(embeddings: Tensor, pooling: Pooling) -> Result { @@ -346,7 +320,7 @@ impl Embedder { Ok(embedding) } - pub fn embed_chunks( + pub fn embed_index( &self, text_chunks: Vec>, ) -> std::result::Result>, EmbedError> { @@ -378,7 +352,7 @@ impl Embedder { }) } - pub(crate) fn embed_chunks_ref(&self, texts: &[&str]) -> Result, EmbedError> { + pub(crate) fn embed_index_ref(&self, texts: &[&str]) -> Result, EmbedError> { texts.iter().map(|text| self.embed_one(text)).collect() } } diff --git a/crates/milli/src/vector/manual.rs b/crates/milli/src/vector/manual.rs index 8c2ef97b2..b95bf0ea2 100644 --- a/crates/milli/src/vector/manual.rs +++ b/crates/milli/src/vector/manual.rs @@ -30,7 +30,7 @@ impl Embedder { self.dimensions } - pub fn embed_chunks( + pub fn embed_index( &self, text_chunks: Vec>, ) -> Result>, EmbedError> { @@ -41,7 +41,7 @@ impl Embedder { self.distribution } - pub(crate) fn embed_chunks_ref(&self, texts: &[&str]) -> Result, EmbedError> { + pub(crate) fn embed_index_ref(&self, texts: &[&str]) -> Result, EmbedError> { texts.iter().map(|text| self.embed_one(text)).collect() } } diff --git a/crates/milli/src/vector/mod.rs b/crates/milli/src/vector/mod.rs index 74b52b1fe..f67912b89 100644 --- a/crates/milli/src/vector/mod.rs +++ b/crates/milli/src/vector/mod.rs @@ -15,6 +15,7 @@ use self::error::{EmbedError, NewEmbedderError}; use crate::prompt::{Prompt, PromptData}; use crate::ThreadPoolNoAbort; +pub mod composite; pub mod error; pub mod hf; pub mod json_template; @@ -31,6 +32,7 @@ pub use self::error::Error; pub type Embedding = Vec; pub const REQUEST_PARALLELISM: usize = 40; +pub const MAX_COMPOSITE_DISTANCE: f32 = 0.01; pub struct ArroyWrapper { quantized: bool, @@ -536,6 +538,8 @@ pub enum Embedder { Ollama(ollama::Embedder), /// An embedder based on making embedding queries against a generic JSON/REST embedding server. Rest(rest::Embedder), + /// An embedder composed of an embedder at search time and an embedder at indexing time. + Composite(composite::Embedder), } /// Configuration for an embedder. @@ -605,6 +609,7 @@ pub enum EmbedderOptions { Ollama(ollama::EmbedderOptions), UserProvided(manual::EmbedderOptions), Rest(rest::EmbedderOptions), + Composite(composite::EmbedderOptions), } impl Default for EmbedderOptions { @@ -626,33 +631,29 @@ impl Embedder { EmbedderOptions::Rest(options) => { Self::Rest(rest::Embedder::new(options, rest::ConfigurationSource::User)?) } + EmbedderOptions::Composite(options) => { + Self::Composite(composite::Embedder::new(options)?) + } }) } - /// Embed one or multiple texts. - /// - /// Each text can be embedded as one or multiple embeddings. - pub fn embed( + /// Embed in search context + + #[tracing::instrument(level = "debug", skip_all, target = "search")] + pub fn embed_search( &self, - texts: Vec, + text: String, deadline: Option, - ) -> std::result::Result, EmbedError> { - match self { + ) -> std::result::Result { + let texts = vec![text]; + let mut embedding = match self { Embedder::HuggingFace(embedder) => embedder.embed(texts), Embedder::OpenAi(embedder) => embedder.embed(&texts, deadline), Embedder::Ollama(embedder) => embedder.embed(&texts, deadline), Embedder::UserProvided(embedder) => embedder.embed(&texts), Embedder::Rest(embedder) => embedder.embed(texts, deadline), - } - } - - #[tracing::instrument(level = "debug", skip_all, target = "search")] - pub fn embed_one( - &self, - text: String, - deadline: Option, - ) -> std::result::Result { - let mut embedding = self.embed(vec![text], deadline)?; + Embedder::Composite(embedder) => embedder.search.embed(texts, deadline), + }?; let embedding = embedding.pop().ok_or_else(EmbedError::missing_embedding)?; Ok(embedding) } @@ -660,31 +661,34 @@ impl Embedder { /// Embed multiple chunks of texts. /// /// Each chunk is composed of one or multiple texts. - pub fn embed_chunks( + pub fn embed_index( &self, text_chunks: Vec>, threads: &ThreadPoolNoAbort, ) -> std::result::Result>, EmbedError> { match self { - Embedder::HuggingFace(embedder) => embedder.embed_chunks(text_chunks), - Embedder::OpenAi(embedder) => embedder.embed_chunks(text_chunks, threads), - Embedder::Ollama(embedder) => embedder.embed_chunks(text_chunks, threads), - Embedder::UserProvided(embedder) => embedder.embed_chunks(text_chunks), - Embedder::Rest(embedder) => embedder.embed_chunks(text_chunks, threads), + Embedder::HuggingFace(embedder) => embedder.embed_index(text_chunks), + Embedder::OpenAi(embedder) => embedder.embed_index(text_chunks, threads), + Embedder::Ollama(embedder) => embedder.embed_index(text_chunks, threads), + Embedder::UserProvided(embedder) => embedder.embed_index(text_chunks), + Embedder::Rest(embedder) => embedder.embed_index(text_chunks, threads), + Embedder::Composite(embedder) => embedder.index.embed_index(text_chunks, threads), } } - pub fn embed_chunks_ref( + /// Non-owning variant of [`Self::embed_index`]. + pub fn embed_index_ref( &self, texts: &[&str], threads: &ThreadPoolNoAbort, ) -> std::result::Result, EmbedError> { match self { - Embedder::HuggingFace(embedder) => embedder.embed_chunks_ref(texts), - Embedder::OpenAi(embedder) => embedder.embed_chunks_ref(texts, threads), - Embedder::Ollama(embedder) => embedder.embed_chunks_ref(texts, threads), - Embedder::UserProvided(embedder) => embedder.embed_chunks_ref(texts), - Embedder::Rest(embedder) => embedder.embed_chunks_ref(texts, threads), + Embedder::HuggingFace(embedder) => embedder.embed_index_ref(texts), + Embedder::OpenAi(embedder) => embedder.embed_index_ref(texts, threads), + Embedder::Ollama(embedder) => embedder.embed_index_ref(texts, threads), + Embedder::UserProvided(embedder) => embedder.embed_index_ref(texts), + Embedder::Rest(embedder) => embedder.embed_index_ref(texts, threads), + Embedder::Composite(embedder) => embedder.index.embed_index_ref(texts, threads), } } @@ -696,6 +700,7 @@ impl Embedder { Embedder::Ollama(embedder) => embedder.chunk_count_hint(), Embedder::UserProvided(_) => 100, Embedder::Rest(embedder) => embedder.chunk_count_hint(), + Embedder::Composite(embedder) => embedder.index.chunk_count_hint(), } } @@ -707,6 +712,7 @@ impl Embedder { Embedder::Ollama(embedder) => embedder.prompt_count_in_chunk_hint(), Embedder::UserProvided(_) => 1, Embedder::Rest(embedder) => embedder.prompt_count_in_chunk_hint(), + Embedder::Composite(embedder) => embedder.index.prompt_count_in_chunk_hint(), } } @@ -718,6 +724,7 @@ impl Embedder { Embedder::Ollama(embedder) => embedder.dimensions(), Embedder::UserProvided(embedder) => embedder.dimensions(), Embedder::Rest(embedder) => embedder.dimensions(), + Embedder::Composite(embedder) => embedder.dimensions(), } } @@ -729,6 +736,7 @@ impl Embedder { Embedder::Ollama(embedder) => embedder.distribution(), Embedder::UserProvided(embedder) => embedder.distribution(), Embedder::Rest(embedder) => embedder.distribution(), + Embedder::Composite(embedder) => embedder.distribution(), } } @@ -739,6 +747,7 @@ impl Embedder { | Embedder::Ollama(_) | Embedder::Rest(_) => true, Embedder::UserProvided(_) => false, + Embedder::Composite(embedder) => embedder.index.uses_document_template(), } } } diff --git a/crates/milli/src/vector/ollama.rs b/crates/milli/src/vector/ollama.rs index d2a80d6b5..130e90cee 100644 --- a/crates/milli/src/vector/ollama.rs +++ b/crates/milli/src/vector/ollama.rs @@ -113,7 +113,7 @@ impl Embedder { } } - pub fn embed_chunks( + pub fn embed_index( &self, text_chunks: Vec>, threads: &ThreadPoolNoAbort, @@ -134,7 +134,7 @@ impl Embedder { } } - pub(crate) fn embed_chunks_ref( + pub(crate) fn embed_index_ref( &self, texts: &[&str], threads: &ThreadPoolNoAbort, diff --git a/crates/milli/src/vector/openai.rs b/crates/milli/src/vector/openai.rs index c7aec5d93..8a5e6266a 100644 --- a/crates/milli/src/vector/openai.rs +++ b/crates/milli/src/vector/openai.rs @@ -250,7 +250,7 @@ impl Embedder { Ok(all_embeddings) } - pub fn embed_chunks( + pub fn embed_index( &self, text_chunks: Vec>, threads: &ThreadPoolNoAbort, @@ -271,7 +271,7 @@ impl Embedder { } } - pub(crate) fn embed_chunks_ref( + pub(crate) fn embed_index_ref( &self, texts: &[&str], threads: &ThreadPoolNoAbort, diff --git a/crates/milli/src/vector/rest.rs b/crates/milli/src/vector/rest.rs index 467169d9c..a31bc5d2f 100644 --- a/crates/milli/src/vector/rest.rs +++ b/crates/milli/src/vector/rest.rs @@ -184,7 +184,7 @@ impl Embedder { Ok(embeddings.pop().unwrap()) } - pub fn embed_chunks( + pub fn embed_index( &self, text_chunks: Vec>, threads: &ThreadPoolNoAbort, @@ -205,7 +205,7 @@ impl Embedder { } } - pub(crate) fn embed_chunks_ref( + pub(crate) fn embed_index_ref( &self, texts: &[&str], threads: &ThreadPoolNoAbort, diff --git a/crates/milli/src/vector/settings.rs b/crates/milli/src/vector/settings.rs index f10407e42..3948ad4d8 100644 --- a/crates/milli/src/vector/settings.rs +++ b/crates/milli/src/vector/settings.rs @@ -6,8 +6,9 @@ use roaring::RoaringBitmap; use serde::{Deserialize, Serialize}; use utoipa::ToSchema; +use super::composite::SubEmbedderOptions; use super::hf::OverridePooling; -use super::{ollama, openai, DistributionShift}; +use super::{ollama, openai, DistributionShift, EmbedderOptions}; use crate::prompt::{default_max_bytes, PromptData}; use crate::update::Setting; use crate::vector::EmbeddingConfig; @@ -20,78 +21,525 @@ pub struct EmbeddingSettings { #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] #[schema(value_type = Option)] + /// The source used to provide the embeddings. + /// + /// Which embedder parameters are available and mandatory is determined by the value of this setting. + /// + /// # 🔄 Reindexing + /// + /// - 🏗️ Changing the value of this parameter always regenerates embeddings. + /// + /// # Defaults + /// + /// - Defaults to `openAi` pub source: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] #[schema(value_type = Option)] + /// The name of the model to use. + /// + /// # Mandatory + /// + /// - This parameter is mandatory for source `ollama` + /// + /// # Availability + /// + /// - This parameter is available for sources `openAi`, `huggingFace`, `ollama` + /// + /// # 🔄 Reindexing + /// + /// - 🏗️ Changing the value of this parameter always regenerates embeddings. + /// + /// # Defaults + /// + /// - For source `openAi`, defaults to `text-embedding-3-small` + /// - For source `huggingFace`, defaults to `BAAI/bge-base-en-v1.5` pub model: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] #[schema(value_type = Option)] + /// The revision (commit SHA1) of the model to use. + /// + /// If unspecified, Meilisearch picks the latest revision of the model. + /// + /// # Availability + /// + /// - This parameter is available for source `huggingFace` + /// + /// # 🔄 Reindexing + /// + /// - 🏗️ Changing the value of this parameter always regenerates embeddings + /// + /// # Defaults + /// + /// - When `model` is set to default, defaults to `617ca489d9e86b49b8167676d8220688b99db36e` + /// - Otherwise, defaults to `null` pub revision: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] #[schema(value_type = Option)] + /// The pooling method to use. + /// + /// # Availability + /// + /// - This parameter is available for source `huggingFace` + /// + /// # 🔄 Reindexing + /// + /// - 🏗️ Changing the value of this parameter always regenerates embeddings + /// + /// # Defaults + /// + /// - Defaults to `useModel` + /// + /// # Compatibility Note + /// + /// - Embedders created before this parameter was available default to `forceMean` to preserve the existing behavior. pub pooling: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] #[schema(value_type = Option)] + /// The API key to pass to the remote embedder while making requests. + /// + /// # Availability + /// + /// - This parameter is available for source `openAi`, `ollama`, `rest` + /// + /// # 🔄 Reindexing + /// + /// - 🌱 Changing the value of this parameter never regenerates embeddings + /// + /// # Defaults + /// + /// - For source `openAi`, the key is read from `OPENAI_API_KEY`, then `MEILI_OPENAI_API_KEY`. + /// - For other sources, no bearer token is sent if this parameter is not set. + /// + /// # Note + /// + /// - This setting is partially hidden when returned by the settings pub api_key: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] #[schema(value_type = Option)] + /// The expected dimensions of the embeddings produced by this embedder. + /// + /// # Mandatory + /// + /// - This parameter is mandatory for source `userProvided` + /// + /// # Availability + /// + /// - This parameter is available for source `openAi`, `ollama`, `rest`, `userProvided` + /// + /// # 🔄 Reindexing + /// + /// - 🏗️ When the source is `openAi`, changing the value of this parameter always regenerates embeddings + /// - 🌱 For other sources, changing the value of this parameter never regenerates embeddings + /// + /// # Defaults + /// + /// - For source `openAi`, the dimensions is the maximum allowed by the model. + /// - For sources `ollama` and `rest`, the dimensions are inferred by embedding a sample text. pub dimensions: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] #[schema(value_type = Option)] + /// Whether to binary quantize the embeddings of this embedder. + /// + /// Binary quantized embeddings are smaller than regular embeddings, which improves + /// disk usage and retrieval speed, at the cost of relevancy. + /// + /// # Availability + /// + /// - This parameter is available for all embedders + /// + /// # 🔄 Reindexing + /// + /// - 🏗️ When set to `true`, embeddings are not regenerated, but they are binary quantized, which takes time. + /// + /// # Defaults + /// + /// - Defaults to `false` + /// + /// # Note + /// + /// As binary quantization is a destructive operation, it is not possible to disable again this setting after + /// first enabling it. If you are unsure of whether the performance-relevancy tradeoff is right for you, + /// we recommend to use this parameter on a test index first. pub binary_quantized: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] #[schema(value_type = Option)] + /// A liquid template used to render documents to a text that can be embedded. + /// + /// Meillisearch interpolates the template for each document and sends the resulting text to the embedder. + /// The embedder then generates document vectors based on this text. + /// + /// # Availability + /// + /// - This parameter is available for source `openAi`, `huggingFace`, `ollama` and `rest + /// + /// # 🔄 Reindexing + /// + /// - 🏗️ When modified, embeddings are regenerated for documents whose rendering through the template produces a different text. pub document_template: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] #[schema(value_type = Option)] + /// Rendered texts are truncated to this size. + /// + /// # Availability + /// + /// - This parameter is available for source `openAi`, `huggingFace`, `ollama` and `rest` + /// + /// # 🔄 Reindexing + /// + /// - 🏗️ When increased, embeddings are regenerated for documents whose rendering through the template produces a different text. + /// - 🌱 When decreased, embeddings are never regenerated + /// + /// # Default + /// + /// - Defaults to 400 pub document_template_max_bytes: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] #[schema(value_type = Option)] + /// URL to reach the remote embedder. + /// + /// # Mandatory + /// + /// - This parameter is mandatory for source `rest` + /// + /// # Availability + /// + /// - This parameter is available for source `openAi`, `ollama` and `rest` + /// + /// # 🔄 Reindexing + /// + /// - 🌱 When modified for source `openAi`, embeddings are never regenerated + /// - 🏗️ When modified for sources `ollama` and `rest`, embeddings are always regenerated pub url: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] #[schema(value_type = Option)] + /// Template request to send to the remote embedder. + /// + /// # Mandatory + /// + /// - This parameter is mandatory for source `rest` + /// + /// # Availability + /// + /// - This parameter is available for source `rest` + /// + /// # 🔄 Reindexing + /// + /// - 🏗️ Changing the value of this parameter always regenerates embeddings pub request: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] #[schema(value_type = Option)] + /// Template response indicating how to find the embeddings in the response from the remote embedder. + /// + /// # Mandatory + /// + /// - This parameter is mandatory for source `rest` + /// + /// # Availability + /// + /// - This parameter is available for source `rest` + /// + /// # 🔄 Reindexing + /// + /// - 🏗️ Changing the value of this parameter always regenerates embeddings pub response: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] #[schema(value_type = Option>)] + /// Additional headers to send to the remote embedder. + /// + /// # Availability + /// + /// - This parameter is available for source `rest` + /// + /// # 🔄 Reindexing + /// + /// - 🌱 Changing the value of this parameter never regenerates embeddings pub headers: Setting>, + + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default)] + #[schema(value_type = Option)] + pub search_embedder: Setting, + + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default)] + #[schema(value_type = Option)] + pub indexing_embedder: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] #[schema(value_type = Option)] + /// Affine transformation applied to the semantic score to make it more comparable to the ranking score. + /// + /// # Availability + /// + /// - This parameter is available for all embedders + /// + /// # 🔄 Reindexing + /// + /// - 🌱 Changing the value of this parameter never regenerates embeddings pub distribution: Setting, } -pub fn check_unset( - key: &Setting, - field: &'static str, - source: EmbedderSource, - embedder_name: &str, -) -> Result<(), UserError> { - if matches!(key, Setting::NotSet) { - Ok(()) - } else { - Err(UserError::InvalidFieldForSource { - embedder_name: embedder_name.to_owned(), - source_: source, - field, - allowed_fields_for_source: EmbeddingSettings::allowed_fields_for_source(source), - allowed_sources_for_field: EmbeddingSettings::allowed_sources_for_field(field), - }) - } +#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr, ToSchema)] +#[serde(deny_unknown_fields, rename_all = "camelCase")] +#[deserr(rename_all = camelCase, deny_unknown_fields)] +pub struct SubEmbeddingSettings { + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default)] + #[schema(value_type = Option)] + /// The source used to provide the embeddings. + /// + /// Which embedder parameters are available and mandatory is determined by the value of this setting. + /// + /// # 🔄 Reindexing + /// + /// - 🏗️ Changing the value of this parameter always regenerates embeddings. + /// + /// # Defaults + /// + /// - Defaults to `openAi` + pub source: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default)] + #[schema(value_type = Option)] + /// The name of the model to use. + /// + /// # Mandatory + /// + /// - This parameter is mandatory for source `ollama` + /// + /// # Availability + /// + /// - This parameter is available for sources `openAi`, `huggingFace`, `ollama` + /// + /// # 🔄 Reindexing + /// + /// - 🏗️ Changing the value of this parameter always regenerates embeddings. + /// + /// # Defaults + /// + /// - For source `openAi`, defaults to `text-embedding-3-small` + /// - For source `huggingFace`, defaults to `BAAI/bge-base-en-v1.5` + pub model: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default)] + #[schema(value_type = Option)] + /// The revision (commit SHA1) of the model to use. + /// + /// If unspecified, Meilisearch picks the latest revision of the model. + /// + /// # Availability + /// + /// - This parameter is available for source `huggingFace` + /// + /// # 🔄 Reindexing + /// + /// - 🏗️ Changing the value of this parameter always regenerates embeddings + /// + /// # Defaults + /// + /// - When `model` is set to default, defaults to `617ca489d9e86b49b8167676d8220688b99db36e` + /// - Otherwise, defaults to `null` + pub revision: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default)] + #[schema(value_type = Option)] + /// The pooling method to use. + /// + /// # Availability + /// + /// - This parameter is available for source `huggingFace` + /// + /// # 🔄 Reindexing + /// + /// - 🏗️ Changing the value of this parameter always regenerates embeddings + /// + /// # Defaults + /// + /// - Defaults to `useModel` + /// + /// # Compatibility Note + /// + /// - Embedders created before this parameter was available default to `forceMean` to preserve the existing behavior. + pub pooling: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default)] + #[schema(value_type = Option)] + /// The API key to pass to the remote embedder while making requests. + /// + /// # Availability + /// + /// - This parameter is available for source `openAi`, `ollama`, `rest` + /// + /// # 🔄 Reindexing + /// + /// - 🌱 Changing the value of this parameter never regenerates embeddings + /// + /// # Defaults + /// + /// - For source `openAi`, the key is read from `OPENAI_API_KEY`, then `MEILI_OPENAI_API_KEY`. + /// - For other sources, no bearer token is sent if this parameter is not set. + /// + /// # Note + /// + /// - This setting is partially hidden when returned by the settings + pub api_key: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default)] + #[schema(value_type = Option)] + /// The expected dimensions of the embeddings produced by this embedder. + /// + /// # Mandatory + /// + /// - This parameter is mandatory for source `userProvided` + /// + /// # Availability + /// + /// - This parameter is available for source `openAi`, `ollama`, `rest`, `userProvided` + /// + /// # 🔄 Reindexing + /// + /// - 🏗️ When the source is `openAi`, changing the value of this parameter always regenerates embeddings + /// - 🌱 For other sources, changing the value of this parameter never regenerates embeddings + /// + /// # Defaults + /// + /// - For source `openAi`, the dimensions is the maximum allowed by the model. + /// - For sources `ollama` and `rest`, the dimensions are inferred by embedding a sample text. + pub dimensions: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default)] + #[schema(value_type = Option)] + /// A liquid template used to render documents to a text that can be embedded. + /// + /// Meillisearch interpolates the template for each document and sends the resulting text to the embedder. + /// The embedder then generates document vectors based on this text. + /// + /// # Availability + /// + /// - This parameter is available for source `openAi`, `huggingFace`, `ollama` and `rest + /// + /// # 🔄 Reindexing + /// + /// - 🏗️ When modified, embeddings are regenerated for documents whose rendering through the template produces a different text. + pub document_template: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default)] + #[schema(value_type = Option)] + /// Rendered texts are truncated to this size. + /// + /// # Availability + /// + /// - This parameter is available for source `openAi`, `huggingFace`, `ollama` and `rest` + /// + /// # 🔄 Reindexing + /// + /// - 🏗️ When increased, embeddings are regenerated for documents whose rendering through the template produces a different text. + /// - 🌱 When decreased, embeddings are never regenerated + /// + /// # Default + /// + /// - Defaults to 400 + pub document_template_max_bytes: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default)] + #[schema(value_type = Option)] + /// URL to reach the remote embedder. + /// + /// # Mandatory + /// + /// - This parameter is mandatory for source `rest` + /// + /// # Availability + /// + /// - This parameter is available for source `openAi`, `ollama` and `rest` + /// + /// # 🔄 Reindexing + /// + /// - 🌱 When modified for source `openAi`, embeddings are never regenerated + /// - 🏗️ When modified for sources `ollama` and `rest`, embeddings are always regenerated + pub url: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default)] + #[schema(value_type = Option)] + /// Template request to send to the remote embedder. + /// + /// # Mandatory + /// + /// - This parameter is mandatory for source `rest` + /// + /// # Availability + /// + /// - This parameter is available for source `rest` + /// + /// # 🔄 Reindexing + /// + /// - 🏗️ Changing the value of this parameter always regenerates embeddings + pub request: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default)] + #[schema(value_type = Option)] + /// Template response indicating how to find the embeddings in the response from the remote embedder. + /// + /// # Mandatory + /// + /// - This parameter is mandatory for source `rest` + /// + /// # Availability + /// + /// - This parameter is available for source `rest` + /// + /// # 🔄 Reindexing + /// + /// - 🏗️ Changing the value of this parameter always regenerates embeddings + pub response: Setting, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default)] + #[schema(value_type = Option>)] + /// Additional headers to send to the remote embedder. + /// + /// # Availability + /// + /// - This parameter is available for source `rest` + /// + /// # 🔄 Reindexing + /// + /// - 🌱 Changing the value of this parameter never regenerates embeddings + pub headers: Setting>, + + // The following fields are provided for the sake of improving error handling + // They should always be set to `NotSet`, otherwise an error will be returned + #[serde(default, skip_serializing)] + #[deserr(default)] + #[schema(ignore)] + pub distribution: Setting, + + #[serde(default, skip_serializing)] + #[deserr(default)] + #[schema(ignore)] + pub binary_quantized: Setting, + + #[serde(default, skip_serializing)] + #[deserr(default)] + #[schema(ignore)] + pub search_embedder: Setting, + + #[serde(default, skip_serializing)] + #[deserr(default)] + #[schema(ignore)] + pub indexing_embedder: Setting, } /// Indicates what action should take place during a reindexing operation for an embedder @@ -176,6 +624,8 @@ impl SettingsDiff { mut url, mut request, mut response, + mut search_embedder, + mut indexing_embedder, mut distribution, mut headers, mut document_template_max_bytes, @@ -193,6 +643,8 @@ impl SettingsDiff { url: new_url, request: new_request, response: new_response, + search_embedder: new_search_embedder, + indexing_embedder: new_indexing_embedder, distribution: new_distribution, headers: new_headers, document_template_max_bytes: new_document_template_max_bytes, @@ -209,93 +661,45 @@ impl SettingsDiff { let mut reindex_action = None; - // **Warning**: do not use short-circuiting || here, we want all these operations applied - if source.apply(new_source) { - ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex); - // when the source changes, we need to reapply the default settings for the new source - apply_default_for_source( - &source, - &mut model, - &mut revision, - &mut pooling, - &mut dimensions, - &mut url, - &mut request, - &mut response, - &mut document_template, - &mut document_template_max_bytes, - &mut headers, - ) - } - if model.apply(new_model) { - ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex); - } - if revision.apply(new_revision) { - ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex); - } - if pooling.apply(new_pooling) { - ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex); - } - if dimensions.apply(new_dimensions) { - match source { - // regenerate on dimensions change in OpenAI since truncation is supported - Setting::Set(EmbedderSource::OpenAi) | Setting::Reset => { - ReindexAction::push_action( - &mut reindex_action, - ReindexAction::FullReindex, - ); - } - // for all other embedders, the parameter is a hint that should not be able to change the result - // and so won't cause a reindex by itself. - _ => {} - } - } + Self::apply_and_diff( + &mut reindex_action, + &mut source, + &mut model, + &mut revision, + &mut pooling, + &mut api_key, + &mut dimensions, + &mut document_template, + &mut document_template_max_bytes, + &mut url, + &mut request, + &mut response, + &mut headers, + new_source, + new_model, + new_revision, + new_pooling, + new_api_key, + new_dimensions, + new_document_template, + new_document_template_max_bytes, + new_url, + new_request, + new_response, + new_headers, + ); + let binary_quantize_changed = binary_quantize.apply(new_binary_quantize); - if url.apply(new_url) { - match source { - // do not regenerate on an url change in OpenAI - Setting::Set(EmbedderSource::OpenAi) | Setting::Reset => {} - _ => { - ReindexAction::push_action( - &mut reindex_action, - ReindexAction::FullReindex, - ); - } - } - } - if request.apply(new_request) { - ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex); - } - if response.apply(new_response) { - ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex); - } - if document_template.apply(new_document_template) { - ReindexAction::push_action( - &mut reindex_action, - ReindexAction::RegeneratePrompts, - ); - } - if document_template_max_bytes.apply(new_document_template_max_bytes) { - let previous_document_template_max_bytes = - document_template_max_bytes.set().unwrap_or(default_max_bytes().get()); - let new_document_template_max_bytes = - new_document_template_max_bytes.set().unwrap_or(default_max_bytes().get()); - - // only reindex if the size increased. Reasoning: - // - size decrease is a performance optimization, so we don't reindex and we keep the more accurate vectors - // - size increase is an accuracy optimization, so we want to reindex - if new_document_template_max_bytes > previous_document_template_max_bytes { - ReindexAction::push_action( - &mut reindex_action, - ReindexAction::RegeneratePrompts, - ) - } - } + // changes to the *search* embedder never triggers any reindexing + search_embedder.apply(new_search_embedder); + indexing_embedder = Self::from_sub_settings( + indexing_embedder, + new_indexing_embedder, + &mut reindex_action, + )?; distribution.apply(new_distribution); - api_key.apply(new_api_key); - headers.apply(new_headers); let updated_settings = EmbeddingSettings { source, @@ -308,6 +712,8 @@ impl SettingsDiff { url, request, response, + search_embedder, + indexing_embedder, distribution, headers, document_template_max_bytes, @@ -333,6 +739,223 @@ impl SettingsDiff { }; Ok(ret) } + + fn from_sub_settings( + sub_embedder: Setting, + new_sub_embedder: Setting, + reindex_action: &mut Option, + ) -> Result, UserError> { + let ret = match new_sub_embedder { + Setting::Set(new_sub_embedder) => { + let Setting::Set(SubEmbeddingSettings { + mut source, + mut model, + mut revision, + mut pooling, + mut api_key, + mut dimensions, + mut document_template, + mut document_template_max_bytes, + mut url, + mut request, + mut response, + mut headers, + // phony settings + mut distribution, + mut binary_quantized, + mut search_embedder, + mut indexing_embedder, + }) = sub_embedder + else { + // return the new_indexing_embedder if the indexing_embedder was not set + // this should happen only when changing the source, so the decision to reindex is already taken. + return Ok(Setting::Set(new_sub_embedder)); + }; + + let SubEmbeddingSettings { + source: new_source, + model: new_model, + revision: new_revision, + pooling: new_pooling, + api_key: new_api_key, + dimensions: new_dimensions, + document_template: new_document_template, + document_template_max_bytes: new_document_template_max_bytes, + url: new_url, + request: new_request, + response: new_response, + headers: new_headers, + distribution: new_distribution, + binary_quantized: new_binary_quantized, + search_embedder: new_search_embedder, + indexing_embedder: new_indexing_embedder, + } = new_sub_embedder; + + Self::apply_and_diff( + reindex_action, + &mut source, + &mut model, + &mut revision, + &mut pooling, + &mut api_key, + &mut dimensions, + &mut document_template, + &mut document_template_max_bytes, + &mut url, + &mut request, + &mut response, + &mut headers, + new_source, + new_model, + new_revision, + new_pooling, + new_api_key, + new_dimensions, + new_document_template, + new_document_template_max_bytes, + new_url, + new_request, + new_response, + new_headers, + ); + + // update phony settings, it is always an error to have them set. + distribution.apply(new_distribution); + binary_quantized.apply(new_binary_quantized); + search_embedder.apply(new_search_embedder); + indexing_embedder.apply(new_indexing_embedder); + + let updated_settings = SubEmbeddingSettings { + source, + model, + revision, + pooling, + api_key, + dimensions, + document_template, + url, + request, + response, + headers, + document_template_max_bytes, + distribution, + binary_quantized, + search_embedder, + indexing_embedder, + }; + Setting::Set(updated_settings) + } + // handled during validation of the settings + Setting::Reset | Setting::NotSet => sub_embedder, + }; + Ok(ret) + } + + #[allow(clippy::too_many_arguments)] + fn apply_and_diff( + reindex_action: &mut Option, + source: &mut Setting, + model: &mut Setting, + revision: &mut Setting, + pooling: &mut Setting, + api_key: &mut Setting, + dimensions: &mut Setting, + document_template: &mut Setting, + document_template_max_bytes: &mut Setting, + url: &mut Setting, + request: &mut Setting, + response: &mut Setting, + headers: &mut Setting>, + new_source: Setting, + new_model: Setting, + new_revision: Setting, + new_pooling: Setting, + new_api_key: Setting, + new_dimensions: Setting, + new_document_template: Setting, + new_document_template_max_bytes: Setting, + new_url: Setting, + new_request: Setting, + new_response: Setting, + new_headers: Setting>, + ) { + // **Warning**: do not use short-circuiting || here, we want all these operations applied + if source.apply(new_source) { + ReindexAction::push_action(reindex_action, ReindexAction::FullReindex); + // when the source changes, we need to reapply the default settings for the new source + apply_default_for_source( + &*source, + model, + revision, + pooling, + dimensions, + url, + request, + response, + document_template, + document_template_max_bytes, + headers, + // send dummy values, the source cannot recursively be composite + &mut Setting::NotSet, + &mut Setting::NotSet, + ) + } + if model.apply(new_model) { + ReindexAction::push_action(reindex_action, ReindexAction::FullReindex); + } + if revision.apply(new_revision) { + ReindexAction::push_action(reindex_action, ReindexAction::FullReindex); + } + if pooling.apply(new_pooling) { + ReindexAction::push_action(reindex_action, ReindexAction::FullReindex); + } + if dimensions.apply(new_dimensions) { + match *source { + // regenerate on dimensions change in OpenAI since truncation is supported + Setting::Set(EmbedderSource::OpenAi) | Setting::Reset => { + ReindexAction::push_action(reindex_action, ReindexAction::FullReindex); + } + // for all other embedders, the parameter is a hint that should not be able to change the result + // and so won't cause a reindex by itself. + _ => {} + } + } + if url.apply(new_url) { + match *source { + // do not regenerate on an url change in OpenAI + Setting::Set(EmbedderSource::OpenAi) | Setting::Reset => {} + _ => { + ReindexAction::push_action(reindex_action, ReindexAction::FullReindex); + } + } + } + if request.apply(new_request) { + ReindexAction::push_action(reindex_action, ReindexAction::FullReindex); + } + if response.apply(new_response) { + ReindexAction::push_action(reindex_action, ReindexAction::FullReindex); + } + if document_template.apply(new_document_template) { + ReindexAction::push_action(reindex_action, ReindexAction::RegeneratePrompts); + } + + if document_template_max_bytes.apply(new_document_template_max_bytes) { + let previous_document_template_max_bytes = + document_template_max_bytes.set().unwrap_or(default_max_bytes().get()); + let new_document_template_max_bytes = + new_document_template_max_bytes.set().unwrap_or(default_max_bytes().get()); + + // only reindex if the size increased. Reasoning: + // - size decrease is a performance optimization, so we don't reindex and we keep the more accurate vectors + // - size increase is an accuracy optimization, so we want to reindex + if new_document_template_max_bytes > previous_document_template_max_bytes { + ReindexAction::push_action(reindex_action, ReindexAction::RegeneratePrompts) + } + } + + api_key.apply(new_api_key); + headers.apply(new_headers); + } } impl ReindexAction { @@ -358,6 +981,8 @@ fn apply_default_for_source( document_template: &mut Setting, document_template_max_bytes: &mut Setting, headers: &mut Setting>, + search_embedder: &mut Setting, + indexing_embedder: &mut Setting, ) { match source { Setting::Set(EmbedderSource::HuggingFace) => { @@ -369,6 +994,8 @@ fn apply_default_for_source( *request = Setting::NotSet; *response = Setting::NotSet; *headers = Setting::NotSet; + *search_embedder = Setting::NotSet; + *indexing_embedder = Setting::NotSet; } Setting::Set(EmbedderSource::Ollama) => { *model = Setting::Reset; @@ -379,6 +1006,8 @@ fn apply_default_for_source( *request = Setting::NotSet; *response = Setting::NotSet; *headers = Setting::NotSet; + *search_embedder = Setting::NotSet; + *indexing_embedder = Setting::NotSet; } Setting::Set(EmbedderSource::OpenAi) | Setting::Reset => { *model = Setting::Reset; @@ -389,6 +1018,8 @@ fn apply_default_for_source( *request = Setting::NotSet; *response = Setting::NotSet; *headers = Setting::NotSet; + *search_embedder = Setting::NotSet; + *indexing_embedder = Setting::NotSet; } Setting::Set(EmbedderSource::Rest) => { *model = Setting::NotSet; @@ -399,6 +1030,8 @@ fn apply_default_for_source( *request = Setting::Reset; *response = Setting::Reset; *headers = Setting::Reset; + *search_embedder = Setting::NotSet; + *indexing_embedder = Setting::NotSet; } Setting::Set(EmbedderSource::UserProvided) => { *model = Setting::NotSet; @@ -411,148 +1044,374 @@ fn apply_default_for_source( *document_template = Setting::NotSet; *document_template_max_bytes = Setting::NotSet; *headers = Setting::NotSet; + *search_embedder = Setting::NotSet; + *indexing_embedder = Setting::NotSet; + } + Setting::Set(EmbedderSource::Composite) => { + *model = Setting::NotSet; + *revision = Setting::NotSet; + *pooling = Setting::NotSet; + *dimensions = Setting::NotSet; + *url = Setting::NotSet; + *request = Setting::NotSet; + *response = Setting::NotSet; + *document_template = Setting::NotSet; + *document_template_max_bytes = Setting::NotSet; + *headers = Setting::NotSet; + *search_embedder = Setting::Reset; + *indexing_embedder = Setting::Reset; } Setting::NotSet => {} } } -pub fn check_set( - key: &Setting, - field: &'static str, - source: EmbedderSource, - embedder_name: &str, -) -> Result<(), UserError> { - if matches!(key, Setting::Set(_)) { - Ok(()) - } else { - Err(UserError::MissingFieldForSource { - field, - source_: source, - embedder_name: embedder_name.to_owned(), - }) +pub(crate) enum FieldStatus { + Mandatory, + Allowed, + Disallowed, +} + +#[derive(Debug, Clone, Copy)] +pub enum NestingContext { + NotNested, + Search, + Indexing, +} + +impl NestingContext { + pub fn embedder_name_with_context(&self, embedder_name: &str) -> String { + match self { + NestingContext::NotNested => embedder_name.to_string(), + NestingContext::Search => format!("{embedder_name}.searchEmbedder"), + NestingContext::Indexing => format!("{embedder_name}.indexingEmbedder",), + } + } + + pub fn in_context(&self) -> &'static str { + match self { + NestingContext::NotNested => "", + NestingContext::Search => " for the search embedder", + NestingContext::Indexing => " for the indexing embedder", + } + } + + pub fn nesting_embedders(&self) -> &'static str { + match self { + NestingContext::NotNested => "", + NestingContext::Search => { + "\n - note: nesting embedders in `searchEmbedder` is not allowed" + } + NestingContext::Indexing => { + "\n - note: nesting embedders in `indexingEmbedder` is not allowed" + } + } + } +} + +#[derive(Debug, Clone, Copy, enum_iterator::Sequence)] +pub enum MetaEmbeddingSetting { + Source, + Model, + Revision, + Pooling, + ApiKey, + Dimensions, + DocumentTemplate, + DocumentTemplateMaxBytes, + Url, + Request, + Response, + Headers, + SearchEmbedder, + IndexingEmbedder, + Distribution, + BinaryQuantized, +} + +impl MetaEmbeddingSetting { + pub(crate) fn name(&self) -> &'static str { + use MetaEmbeddingSetting::*; + match self { + Source => "source", + Model => "model", + Revision => "revision", + Pooling => "pooling", + ApiKey => "apiKey", + Dimensions => "dimensions", + DocumentTemplate => "documentTemplate", + DocumentTemplateMaxBytes => "documentTemplateMaxBytes", + Url => "url", + Request => "request", + Response => "response", + Headers => "headers", + SearchEmbedder => "searchEmbedder", + IndexingEmbedder => "indexingEmbedder", + Distribution => "distribution", + BinaryQuantized => "binaryQuantized", + } } } impl EmbeddingSettings { - pub const SOURCE: &'static str = "source"; - pub const MODEL: &'static str = "model"; - pub const REVISION: &'static str = "revision"; - pub const POOLING: &'static str = "pooling"; - pub const API_KEY: &'static str = "apiKey"; - pub const DIMENSIONS: &'static str = "dimensions"; - pub const DOCUMENT_TEMPLATE: &'static str = "documentTemplate"; - pub const DOCUMENT_TEMPLATE_MAX_BYTES: &'static str = "documentTemplateMaxBytes"; + #[allow(clippy::too_many_arguments)] + pub(crate) fn check_settings( + embedder_name: &str, + source: EmbedderSource, + context: NestingContext, + model: &Setting, + revision: &Setting, + pooling: &Setting, + dimensions: &Setting, + api_key: &Setting, + url: &Setting, + request: &Setting, + response: &Setting, + document_template: &Setting, + document_template_max_bytes: &Setting, + headers: &Setting>, + search_embedder: &Setting, + indexing_embedder: &Setting, + binary_quantized: &Setting, + distribution: &Setting, + ) -> Result<(), UserError> { + Self::check_setting(embedder_name, source, MetaEmbeddingSetting::Model, context, model)?; + Self::check_setting( + embedder_name, + source, + MetaEmbeddingSetting::Revision, + context, + revision, + )?; + Self::check_setting( + embedder_name, + source, + MetaEmbeddingSetting::Pooling, + context, + pooling, + )?; + Self::check_setting( + embedder_name, + source, + MetaEmbeddingSetting::Dimensions, + context, + dimensions, + )?; + Self::check_setting(embedder_name, source, MetaEmbeddingSetting::ApiKey, context, api_key)?; + Self::check_setting(embedder_name, source, MetaEmbeddingSetting::Url, context, url)?; + Self::check_setting( + embedder_name, + source, + MetaEmbeddingSetting::Request, + context, + request, + )?; + Self::check_setting( + embedder_name, + source, + MetaEmbeddingSetting::Response, + context, + response, + )?; + Self::check_setting( + embedder_name, + source, + MetaEmbeddingSetting::DocumentTemplate, + context, + document_template, + )?; + Self::check_setting( + embedder_name, + source, + MetaEmbeddingSetting::DocumentTemplateMaxBytes, + context, + document_template_max_bytes, + )?; + Self::check_setting( + embedder_name, + source, + MetaEmbeddingSetting::Headers, + context, + headers, + )?; + Self::check_setting( + embedder_name, + source, + MetaEmbeddingSetting::SearchEmbedder, + context, + search_embedder, + )?; + Self::check_setting( + embedder_name, + source, + MetaEmbeddingSetting::IndexingEmbedder, + context, + indexing_embedder, + )?; + Self::check_setting( + embedder_name, + source, + MetaEmbeddingSetting::BinaryQuantized, + context, + binary_quantized, + )?; + Self::check_setting( + embedder_name, + source, + MetaEmbeddingSetting::Distribution, + context, + distribution, + ) + } - pub const URL: &'static str = "url"; - pub const REQUEST: &'static str = "request"; - pub const RESPONSE: &'static str = "response"; - pub const HEADERS: &'static str = "headers"; + pub(crate) fn allowed_sources_for_field( + field: MetaEmbeddingSetting, + context: NestingContext, + ) -> Vec { + enum_iterator::all() + .filter(|source| { + !matches!(Self::field_status(*source, field, context), FieldStatus::Disallowed) + }) + .collect() + } - pub const DISTRIBUTION: &'static str = "distribution"; + pub(crate) fn allowed_fields_for_source( + source: EmbedderSource, + context: NestingContext, + ) -> Vec<&'static str> { + enum_iterator::all() + .filter(|field| { + !matches!(Self::field_status(source, *field, context), FieldStatus::Disallowed) + }) + .map(|field| field.name()) + .collect() + } - pub const BINARY_QUANTIZED: &'static str = "binaryQuantized"; - - pub fn allowed_sources_for_field(field: &'static str) -> &'static [EmbedderSource] { - match field { - Self::SOURCE => &[ - EmbedderSource::HuggingFace, - EmbedderSource::OpenAi, - EmbedderSource::UserProvided, - EmbedderSource::Rest, - EmbedderSource::Ollama, - ], - Self::MODEL => { - &[EmbedderSource::HuggingFace, EmbedderSource::OpenAi, EmbedderSource::Ollama] - } - Self::REVISION => &[EmbedderSource::HuggingFace], - Self::POOLING => &[EmbedderSource::HuggingFace], - Self::API_KEY => { - &[EmbedderSource::OpenAi, EmbedderSource::Ollama, EmbedderSource::Rest] - } - Self::DIMENSIONS => &[ - EmbedderSource::OpenAi, - EmbedderSource::UserProvided, - EmbedderSource::Ollama, - EmbedderSource::Rest, - ], - Self::DOCUMENT_TEMPLATE | Self::DOCUMENT_TEMPLATE_MAX_BYTES => &[ - EmbedderSource::HuggingFace, - EmbedderSource::OpenAi, - EmbedderSource::Ollama, - EmbedderSource::Rest, - ], - Self::URL => &[EmbedderSource::Ollama, EmbedderSource::Rest, EmbedderSource::OpenAi], - Self::REQUEST => &[EmbedderSource::Rest], - Self::RESPONSE => &[EmbedderSource::Rest], - Self::HEADERS => &[EmbedderSource::Rest], - Self::DISTRIBUTION => &[ - EmbedderSource::HuggingFace, - EmbedderSource::Ollama, - EmbedderSource::OpenAi, - EmbedderSource::Rest, - EmbedderSource::UserProvided, - ], - Self::BINARY_QUANTIZED => &[ - EmbedderSource::HuggingFace, - EmbedderSource::Ollama, - EmbedderSource::OpenAi, - EmbedderSource::Rest, - EmbedderSource::UserProvided, - ], - _other => unreachable!("unknown field"), + fn check_setting( + embedder_name: &str, + source: EmbedderSource, + field: MetaEmbeddingSetting, + context: NestingContext, + setting: &Setting, + ) -> Result<(), UserError> { + match (Self::field_status(source, field, context), setting) { + (FieldStatus::Mandatory, Setting::Set(_)) + | (FieldStatus::Allowed, _) + | (FieldStatus::Disallowed, Setting::NotSet) => Ok(()), + (FieldStatus::Disallowed, _) => Err(UserError::InvalidFieldForSource { + embedder_name: context.embedder_name_with_context(embedder_name), + source_: source, + context, + field, + }), + (FieldStatus::Mandatory, _) => Err(UserError::MissingFieldForSource { + field: field.name(), + source_: source, + embedder_name: embedder_name.to_owned(), + }), } } - pub fn allowed_fields_for_source(source: EmbedderSource) -> &'static [&'static str] { - match source { - EmbedderSource::OpenAi => &[ - Self::SOURCE, - Self::MODEL, - Self::API_KEY, - Self::DOCUMENT_TEMPLATE, - Self::DOCUMENT_TEMPLATE_MAX_BYTES, - Self::DIMENSIONS, - Self::DISTRIBUTION, - Self::URL, - Self::BINARY_QUANTIZED, - ], - EmbedderSource::HuggingFace => &[ - Self::SOURCE, - Self::MODEL, - Self::REVISION, - Self::POOLING, - Self::DOCUMENT_TEMPLATE, - Self::DOCUMENT_TEMPLATE_MAX_BYTES, - Self::DISTRIBUTION, - Self::BINARY_QUANTIZED, - ], - EmbedderSource::Ollama => &[ - Self::SOURCE, - Self::MODEL, - Self::DOCUMENT_TEMPLATE, - Self::DOCUMENT_TEMPLATE_MAX_BYTES, - Self::URL, - Self::API_KEY, - Self::DIMENSIONS, - Self::DISTRIBUTION, - Self::BINARY_QUANTIZED, - ], - EmbedderSource::UserProvided => { - &[Self::SOURCE, Self::DIMENSIONS, Self::DISTRIBUTION, Self::BINARY_QUANTIZED] + pub(crate) fn field_status( + source: EmbedderSource, + field: MetaEmbeddingSetting, + context: NestingContext, + ) -> FieldStatus { + use EmbedderSource::*; + use MetaEmbeddingSetting::*; + use NestingContext::*; + match (source, field, context) { + (_, Distribution | BinaryQuantized, NotNested) => FieldStatus::Allowed, + (_, Distribution | BinaryQuantized, _) => FieldStatus::Disallowed, + (_, DocumentTemplate | DocumentTemplateMaxBytes, Search) => FieldStatus::Disallowed, + ( + OpenAi, + Source + | Model + | ApiKey + | DocumentTemplate + | DocumentTemplateMaxBytes + | Dimensions + | Url, + _, + ) => FieldStatus::Allowed, + ( + OpenAi, + Revision | Pooling | Request | Response | Headers | SearchEmbedder + | IndexingEmbedder, + _, + ) => FieldStatus::Disallowed, + ( + HuggingFace, + Source | Model | Revision | Pooling | DocumentTemplate | DocumentTemplateMaxBytes, + _, + ) => FieldStatus::Allowed, + ( + HuggingFace, + ApiKey | Dimensions | Url | Request | Response | Headers | SearchEmbedder + | IndexingEmbedder, + _, + ) => FieldStatus::Disallowed, + (Ollama, Model, _) => FieldStatus::Mandatory, + ( + Ollama, + Source | DocumentTemplate | DocumentTemplateMaxBytes | Url | ApiKey | Dimensions, + _, + ) => FieldStatus::Allowed, + ( + Ollama, + Revision | Pooling | Request | Response | Headers | SearchEmbedder + | IndexingEmbedder, + _, + ) => FieldStatus::Disallowed, + (UserProvided, Dimensions, _) => FieldStatus::Mandatory, + (UserProvided, Source, _) => FieldStatus::Allowed, + ( + UserProvided, + Model + | Revision + | Pooling + | ApiKey + | DocumentTemplate + | DocumentTemplateMaxBytes + | Url + | Request + | Response + | Headers + | SearchEmbedder + | IndexingEmbedder, + _, + ) => FieldStatus::Disallowed, + (Rest, Url | Request | Response, _) => FieldStatus::Mandatory, + ( + Rest, + Source + | ApiKey + | Dimensions + | DocumentTemplate + | DocumentTemplateMaxBytes + | Headers, + _, + ) => FieldStatus::Allowed, + (Rest, Model | Revision | Pooling | SearchEmbedder | IndexingEmbedder, _) => { + FieldStatus::Disallowed } - EmbedderSource::Rest => &[ - Self::SOURCE, - Self::API_KEY, - Self::DIMENSIONS, - Self::DOCUMENT_TEMPLATE, - Self::DOCUMENT_TEMPLATE_MAX_BYTES, - Self::URL, - Self::REQUEST, - Self::RESPONSE, - Self::HEADERS, - Self::DISTRIBUTION, - Self::BINARY_QUANTIZED, - ], + (Composite, SearchEmbedder | IndexingEmbedder, _) => FieldStatus::Mandatory, + (Composite, Source, _) => FieldStatus::Allowed, + ( + Composite, + Model + | Revision + | Pooling + | ApiKey + | Dimensions + | DocumentTemplate + | DocumentTemplateMaxBytes + | Url + | Request + | Response + | Headers, + _, + ) => FieldStatus::Disallowed, } } @@ -576,9 +1435,45 @@ impl EmbeddingSettings { *model = Setting::Set(openai::EmbeddingModel::default().name().to_owned()) } } + + pub(crate) fn check_nested_source( + embedder_name: &str, + source: EmbedderSource, + context: NestingContext, + ) -> Result<(), UserError> { + match (context, source) { + (NestingContext::NotNested, _) => Ok(()), + ( + NestingContext::Search | NestingContext::Indexing, + EmbedderSource::Composite | EmbedderSource::UserProvided, + ) => Err(UserError::InvalidSourceForNested { + embedder_name: context.embedder_name_with_context(embedder_name), + source_: source, + }), + ( + NestingContext::Search | NestingContext::Indexing, + EmbedderSource::OpenAi + | EmbedderSource::HuggingFace + | EmbedderSource::Ollama + | EmbedderSource::Rest, + ) => Ok(()), + } + } } -#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq, Deserr, ToSchema)] +#[derive( + Debug, + Clone, + Copy, + Default, + Serialize, + Deserialize, + PartialEq, + Eq, + Deserr, + ToSchema, + enum_iterator::Sequence, +)] #[serde(deny_unknown_fields, rename_all = "camelCase")] #[deserr(rename_all = camelCase, deny_unknown_fields)] pub enum EmbedderSource { @@ -588,6 +1483,7 @@ pub enum EmbedderSource { Ollama, UserProvided, Rest, + Composite, } impl std::fmt::Display for EmbedderSource { @@ -598,125 +1494,311 @@ impl std::fmt::Display for EmbedderSource { EmbedderSource::UserProvided => "userProvided", EmbedderSource::Ollama => "ollama", EmbedderSource::Rest => "rest", + EmbedderSource::Composite => "composite", }; f.write_str(s) } } +impl EmbeddingSettings { + fn from_hugging_face( + super::hf::EmbedderOptions { + model, + revision, + distribution, + pooling, + }: super::hf::EmbedderOptions, + document_template: Setting, + document_template_max_bytes: Setting, + quantized: Option, + ) -> Self { + Self { + source: Setting::Set(EmbedderSource::HuggingFace), + model: Setting::Set(model), + revision: Setting::some_or_not_set(revision), + pooling: Setting::Set(pooling), + api_key: Setting::NotSet, + dimensions: Setting::NotSet, + document_template, + document_template_max_bytes, + url: Setting::NotSet, + request: Setting::NotSet, + response: Setting::NotSet, + headers: Setting::NotSet, + search_embedder: Setting::NotSet, + indexing_embedder: Setting::NotSet, + distribution: Setting::some_or_not_set(distribution), + binary_quantized: Setting::some_or_not_set(quantized), + } + } + + fn from_openai( + super::openai::EmbedderOptions { + url, + api_key, + embedding_model, + dimensions, + distribution, + }: super::openai::EmbedderOptions, + document_template: Setting, + document_template_max_bytes: Setting, + quantized: Option, + ) -> Self { + Self { + source: Setting::Set(EmbedderSource::OpenAi), + model: Setting::Set(embedding_model.name().to_owned()), + revision: Setting::NotSet, + pooling: Setting::NotSet, + api_key: Setting::some_or_not_set(api_key), + dimensions: Setting::some_or_not_set(dimensions), + document_template, + document_template_max_bytes, + url: Setting::some_or_not_set(url), + request: Setting::NotSet, + response: Setting::NotSet, + headers: Setting::NotSet, + search_embedder: Setting::NotSet, + indexing_embedder: Setting::NotSet, + distribution: Setting::some_or_not_set(distribution), + binary_quantized: Setting::some_or_not_set(quantized), + } + } + + fn from_ollama( + super::ollama::EmbedderOptions { + embedding_model, + url, + api_key, + distribution, + dimensions, + }: super::ollama::EmbedderOptions, + document_template: Setting, + document_template_max_bytes: Setting, + quantized: Option, + ) -> Self { + Self { + source: Setting::Set(EmbedderSource::Ollama), + model: Setting::Set(embedding_model), + revision: Setting::NotSet, + pooling: Setting::NotSet, + api_key: Setting::some_or_not_set(api_key), + dimensions: Setting::some_or_not_set(dimensions), + document_template, + document_template_max_bytes, + url: Setting::some_or_not_set(url), + request: Setting::NotSet, + response: Setting::NotSet, + headers: Setting::NotSet, + search_embedder: Setting::NotSet, + indexing_embedder: Setting::NotSet, + distribution: Setting::some_or_not_set(distribution), + binary_quantized: Setting::some_or_not_set(quantized), + } + } + + fn from_user_provided( + super::manual::EmbedderOptions { dimensions, distribution }: super::manual::EmbedderOptions, + quantized: Option, + ) -> Self { + Self { + source: Setting::Set(EmbedderSource::UserProvided), + model: Setting::NotSet, + revision: Setting::NotSet, + pooling: Setting::NotSet, + api_key: Setting::NotSet, + dimensions: Setting::Set(dimensions), + document_template: Setting::NotSet, + document_template_max_bytes: Setting::NotSet, + url: Setting::NotSet, + request: Setting::NotSet, + response: Setting::NotSet, + headers: Setting::NotSet, + search_embedder: Setting::NotSet, + indexing_embedder: Setting::NotSet, + distribution: Setting::some_or_not_set(distribution), + binary_quantized: Setting::some_or_not_set(quantized), + } + } + + fn from_rest( + super::rest::EmbedderOptions { + api_key, + dimensions, + url, + request, + response, + distribution, + headers, + }: super::rest::EmbedderOptions, + document_template: Setting, + document_template_max_bytes: Setting, + quantized: Option, + ) -> Self { + Self { + source: Setting::Set(EmbedderSource::Rest), + model: Setting::NotSet, + revision: Setting::NotSet, + pooling: Setting::NotSet, + api_key: Setting::some_or_not_set(api_key), + dimensions: Setting::some_or_not_set(dimensions), + document_template, + document_template_max_bytes, + url: Setting::Set(url), + request: Setting::Set(request), + response: Setting::Set(response), + distribution: Setting::some_or_not_set(distribution), + headers: Setting::Set(headers), + search_embedder: Setting::NotSet, + indexing_embedder: Setting::NotSet, + binary_quantized: Setting::some_or_not_set(quantized), + } + } +} + impl From for EmbeddingSettings { fn from(value: EmbeddingConfig) -> Self { let EmbeddingConfig { embedder_options, prompt, quantized } = value; let document_template_max_bytes = Setting::Set(prompt.max_bytes.unwrap_or(default_max_bytes()).get()); match embedder_options { - super::EmbedderOptions::HuggingFace(super::hf::EmbedderOptions { - model, - revision, - distribution, - pooling, - }) => Self { - source: Setting::Set(EmbedderSource::HuggingFace), - model: Setting::Set(model), - revision: Setting::some_or_not_set(revision), - pooling: Setting::Set(pooling), - api_key: Setting::NotSet, - dimensions: Setting::NotSet, - document_template: Setting::Set(prompt.template), + super::EmbedderOptions::HuggingFace(options) => Self::from_hugging_face( + options, + Setting::Set(prompt.template), document_template_max_bytes, - url: Setting::NotSet, - request: Setting::NotSet, - response: Setting::NotSet, - headers: Setting::NotSet, - distribution: Setting::some_or_not_set(distribution), - binary_quantized: Setting::some_or_not_set(quantized), - }, - super::EmbedderOptions::OpenAi(super::openai::EmbedderOptions { - url, - api_key, - embedding_model, - dimensions, - distribution, - }) => Self { - source: Setting::Set(EmbedderSource::OpenAi), - model: Setting::Set(embedding_model.name().to_owned()), - revision: Setting::NotSet, - pooling: Setting::NotSet, - api_key: Setting::some_or_not_set(api_key), - dimensions: Setting::some_or_not_set(dimensions), - document_template: Setting::Set(prompt.template), + quantized, + ), + super::EmbedderOptions::OpenAi(options) => Self::from_openai( + options, + Setting::Set(prompt.template), document_template_max_bytes, - url: Setting::some_or_not_set(url), - request: Setting::NotSet, - response: Setting::NotSet, - headers: Setting::NotSet, - distribution: Setting::some_or_not_set(distribution), - binary_quantized: Setting::some_or_not_set(quantized), - }, - super::EmbedderOptions::Ollama(super::ollama::EmbedderOptions { - embedding_model, - url, - api_key, - distribution, - dimensions, - }) => Self { - source: Setting::Set(EmbedderSource::Ollama), - model: Setting::Set(embedding_model), - revision: Setting::NotSet, - pooling: Setting::NotSet, - api_key: Setting::some_or_not_set(api_key), - dimensions: Setting::some_or_not_set(dimensions), - document_template: Setting::Set(prompt.template), + quantized, + ), + super::EmbedderOptions::Ollama(options) => Self::from_ollama( + options, + Setting::Set(prompt.template), document_template_max_bytes, - url: Setting::some_or_not_set(url), - request: Setting::NotSet, - response: Setting::NotSet, - headers: Setting::NotSet, - distribution: Setting::some_or_not_set(distribution), - binary_quantized: Setting::some_or_not_set(quantized), - }, - super::EmbedderOptions::UserProvided(super::manual::EmbedderOptions { - dimensions, - distribution, + quantized, + ), + super::EmbedderOptions::UserProvided(options) => { + Self::from_user_provided(options, quantized) + } + super::EmbedderOptions::Rest(options) => Self::from_rest( + options, + Setting::Set(prompt.template), + document_template_max_bytes, + quantized, + ), + super::EmbedderOptions::Composite(super::composite::EmbedderOptions { + search, + index, }) => Self { - source: Setting::Set(EmbedderSource::UserProvided), + source: Setting::Set(EmbedderSource::Composite), model: Setting::NotSet, revision: Setting::NotSet, pooling: Setting::NotSet, api_key: Setting::NotSet, - dimensions: Setting::Set(dimensions), + dimensions: Setting::NotSet, + binary_quantized: Setting::some_or_not_set(quantized), document_template: Setting::NotSet, document_template_max_bytes: Setting::NotSet, url: Setting::NotSet, request: Setting::NotSet, response: Setting::NotSet, headers: Setting::NotSet, - distribution: Setting::some_or_not_set(distribution), - binary_quantized: Setting::some_or_not_set(quantized), + distribution: Setting::some_or_not_set(search.distribution()), + search_embedder: Setting::Set(SubEmbeddingSettings::from_options( + search, + Setting::NotSet, + Setting::NotSet, + )), + indexing_embedder: Setting::Set(SubEmbeddingSettings::from_options( + index, + Setting::Set(prompt.template), + document_template_max_bytes, + )), }, - super::EmbedderOptions::Rest(super::rest::EmbedderOptions { - api_key, - dimensions, - url, - request, - response, - distribution, - headers, - }) => Self { - source: Setting::Set(EmbedderSource::Rest), - model: Setting::NotSet, - revision: Setting::NotSet, - pooling: Setting::NotSet, - api_key: Setting::some_or_not_set(api_key), - dimensions: Setting::some_or_not_set(dimensions), - document_template: Setting::Set(prompt.template), + } + } +} + +impl SubEmbeddingSettings { + fn from_options( + options: SubEmbedderOptions, + document_template: Setting, + document_template_max_bytes: Setting, + ) -> Self { + let settings = match options { + SubEmbedderOptions::HuggingFace(embedder_options) => { + EmbeddingSettings::from_hugging_face( + embedder_options, + document_template, + document_template_max_bytes, + None, + ) + } + SubEmbedderOptions::OpenAi(embedder_options) => EmbeddingSettings::from_openai( + embedder_options, + document_template, document_template_max_bytes, - url: Setting::Set(url), - request: Setting::Set(request), - response: Setting::Set(response), - distribution: Setting::some_or_not_set(distribution), - headers: Setting::Set(headers), - binary_quantized: Setting::some_or_not_set(quantized), - }, + None, + ), + SubEmbedderOptions::Ollama(embedder_options) => EmbeddingSettings::from_ollama( + embedder_options, + document_template, + document_template_max_bytes, + None, + ), + SubEmbedderOptions::UserProvided(embedder_options) => { + EmbeddingSettings::from_user_provided(embedder_options, None) + } + SubEmbedderOptions::Rest(embedder_options) => EmbeddingSettings::from_rest( + embedder_options, + document_template, + document_template_max_bytes, + None, + ), + }; + settings.into() + } +} + +impl From for SubEmbeddingSettings { + fn from(value: EmbeddingSettings) -> Self { + let EmbeddingSettings { + source, + model, + revision, + pooling, + api_key, + dimensions, + document_template, + document_template_max_bytes, + url, + request, + response, + headers, + binary_quantized: _, + search_embedder: _, + indexing_embedder: _, + distribution: _, + } = value; + Self { + source, + model, + revision, + pooling, + api_key, + dimensions, + document_template, + document_template_max_bytes, + url, + request, + response, + headers, + distribution: Setting::NotSet, + binary_quantized: Setting::NotSet, + search_embedder: Setting::NotSet, + indexing_embedder: Setting::NotSet, } } } @@ -739,88 +1821,26 @@ impl From for EmbeddingConfig { distribution, headers, binary_quantized, + search_embedder, + mut indexing_embedder, } = value; this.quantized = binary_quantized.set(); - - if let Some(source) = source.set() { - match source { - EmbedderSource::OpenAi => { - let mut options = super::openai::EmbedderOptions::with_default_model(None); - if let Some(model) = model.set() { - if let Some(model) = super::openai::EmbeddingModel::from_name(&model) { - options.embedding_model = model; - } - } - if let Some(url) = url.set() { - options.url = Some(url); - } - if let Some(api_key) = api_key.set() { - options.api_key = Some(api_key); - } - if let Some(dimensions) = dimensions.set() { - options.dimensions = Some(dimensions); - } - options.distribution = distribution.set(); - this.embedder_options = super::EmbedderOptions::OpenAi(options); - } - EmbedderSource::Ollama => { - let mut options: ollama::EmbedderOptions = - super::ollama::EmbedderOptions::with_default_model( - api_key.set(), - url.set(), - dimensions.set(), - ); - if let Some(model) = model.set() { - options.embedding_model = model; - } - - options.distribution = distribution.set(); - this.embedder_options = super::EmbedderOptions::Ollama(options); - } - EmbedderSource::HuggingFace => { - let mut options = super::hf::EmbedderOptions::default(); - if let Some(model) = model.set() { - options.model = model; - // Reset the revision if we are setting the model. - // This allows the following: - // "huggingFace": {} -> default model with default revision - // "huggingFace": { "model": "name-of-the-default-model" } -> default model without a revision - // "huggingFace": { "model": "some-other-model" } -> most importantly, other model without a revision - options.revision = None; - } - if let Some(revision) = revision.set() { - options.revision = Some(revision); - } - if let Some(pooling) = pooling.set() { - options.pooling = pooling; - } - options.distribution = distribution.set(); - this.embedder_options = super::EmbedderOptions::HuggingFace(options); - } - EmbedderSource::UserProvided => { - this.embedder_options = - super::EmbedderOptions::UserProvided(super::manual::EmbedderOptions { - dimensions: dimensions.set().unwrap(), - distribution: distribution.set(), - }); - } - EmbedderSource::Rest => { - this.embedder_options = - super::EmbedderOptions::Rest(super::rest::EmbedderOptions { - api_key: api_key.set(), - dimensions: dimensions.set(), - url: url.set().unwrap(), - request: request.set().unwrap(), - response: response.set().unwrap(), - distribution: distribution.set(), - headers: headers.set().unwrap_or_default(), - }) - } + if let Some((template, document_template_max_bytes)) = + match (document_template, &mut indexing_embedder) { + (Setting::Set(template), _) => Some((template, document_template_max_bytes)), + // retrieve the prompt from the indexing embedder in case of a composite embedder + ( + _, + Setting::Set(SubEmbeddingSettings { + document_template: Setting::Set(document_template), + document_template_max_bytes, + .. + }), + ) => Some((std::mem::take(document_template), *document_template_max_bytes)), + _ => None, } - } - - if let Setting::Set(template) = document_template { + { let max_bytes = document_template_max_bytes .set() .and_then(NonZeroUsize::new) @@ -829,6 +1849,208 @@ impl From for EmbeddingConfig { this.prompt = PromptData { template, max_bytes: Some(max_bytes) } } + if let Some(source) = source.set() { + this.embedder_options = match source { + EmbedderSource::OpenAi => { + SubEmbedderOptions::openai(model, url, api_key, dimensions, distribution).into() + } + EmbedderSource::Ollama => { + SubEmbedderOptions::ollama(model, url, api_key, dimensions, distribution).into() + } + EmbedderSource::HuggingFace => { + SubEmbedderOptions::hugging_face(model, revision, pooling, distribution).into() + } + EmbedderSource::UserProvided => { + SubEmbedderOptions::user_provided(dimensions.set().unwrap(), distribution) + .into() + } + EmbedderSource::Rest => SubEmbedderOptions::rest( + url.set().unwrap(), + api_key, + request.set().unwrap(), + response.set().unwrap(), + headers, + dimensions, + distribution, + ) + .into(), + EmbedderSource::Composite => { + super::EmbedderOptions::Composite(super::composite::EmbedderOptions { + // it is important to give the distribution to the search here, as this is from where we'll retrieve it + search: SubEmbedderOptions::from_settings( + search_embedder.set().unwrap(), + distribution, + ), + index: SubEmbedderOptions::from_settings( + indexing_embedder.set().unwrap(), + Setting::NotSet, + ), + }) + } + }; + } + this } } + +impl SubEmbedderOptions { + fn from_settings( + settings: SubEmbeddingSettings, + distribution: Setting, + ) -> Self { + let SubEmbeddingSettings { + source, + model, + revision, + pooling, + api_key, + dimensions, + // retrieved by the EmbeddingConfig + document_template: _, + document_template_max_bytes: _, + url, + request, + response, + headers, + // phony parameters + distribution: _, + binary_quantized: _, + search_embedder: _, + indexing_embedder: _, + } = settings; + + match source.set().unwrap() { + EmbedderSource::OpenAi => Self::openai(model, url, api_key, dimensions, distribution), + EmbedderSource::HuggingFace => { + Self::hugging_face(model, revision, pooling, distribution) + } + EmbedderSource::Ollama => Self::ollama(model, url, api_key, dimensions, distribution), + EmbedderSource::UserProvided => { + Self::user_provided(dimensions.set().unwrap(), distribution) + } + EmbedderSource::Rest => Self::rest( + url.set().unwrap(), + api_key, + request.set().unwrap(), + response.set().unwrap(), + headers, + dimensions, + distribution, + ), + EmbedderSource::Composite => panic!("nested composite embedders"), + } + } + + fn openai( + model: Setting, + url: Setting, + api_key: Setting, + dimensions: Setting, + distribution: Setting, + ) -> Self { + let mut options = super::openai::EmbedderOptions::with_default_model(None); + if let Some(model) = model.set() { + if let Some(model) = super::openai::EmbeddingModel::from_name(&model) { + options.embedding_model = model; + } + } + if let Some(url) = url.set() { + options.url = Some(url); + } + if let Some(api_key) = api_key.set() { + options.api_key = Some(api_key); + } + if let Some(dimensions) = dimensions.set() { + options.dimensions = Some(dimensions); + } + options.distribution = distribution.set(); + SubEmbedderOptions::OpenAi(options) + } + fn hugging_face( + model: Setting, + revision: Setting, + pooling: Setting, + distribution: Setting, + ) -> Self { + let mut options = super::hf::EmbedderOptions::default(); + if let Some(model) = model.set() { + options.model = model; + // Reset the revision if we are setting the model. + // This allows the following: + // "huggingFace": {} -> default model with default revision + // "huggingFace": { "model": "name-of-the-default-model" } -> default model without a revision + // "huggingFace": { "model": "some-other-model" } -> most importantly, other model without a revision + options.revision = None; + } + if let Some(revision) = revision.set() { + options.revision = Some(revision); + } + if let Some(pooling) = pooling.set() { + options.pooling = pooling; + } + options.distribution = distribution.set(); + SubEmbedderOptions::HuggingFace(options) + } + fn user_provided(dimensions: usize, distribution: Setting) -> Self { + Self::UserProvided(super::manual::EmbedderOptions { + dimensions, + distribution: distribution.set(), + }) + } + fn rest( + url: String, + api_key: Setting, + request: serde_json::Value, + response: serde_json::Value, + headers: Setting>, + dimensions: Setting, + distribution: Setting, + ) -> Self { + Self::Rest(super::rest::EmbedderOptions { + api_key: api_key.set(), + dimensions: dimensions.set(), + url, + request, + response, + distribution: distribution.set(), + headers: headers.set().unwrap_or_default(), + }) + } + fn ollama( + model: Setting, + url: Setting, + api_key: Setting, + dimensions: Setting, + distribution: Setting, + ) -> Self { + let mut options: ollama::EmbedderOptions = + super::ollama::EmbedderOptions::with_default_model( + api_key.set(), + url.set(), + dimensions.set(), + ); + if let Some(model) = model.set() { + options.embedding_model = model; + } + + options.distribution = distribution.set(); + SubEmbedderOptions::Ollama(options) + } +} + +impl From for EmbedderOptions { + fn from(value: SubEmbedderOptions) -> Self { + match value { + SubEmbedderOptions::HuggingFace(embedder_options) => { + Self::HuggingFace(embedder_options) + } + SubEmbedderOptions::OpenAi(embedder_options) => Self::OpenAi(embedder_options), + SubEmbedderOptions::Ollama(embedder_options) => Self::Ollama(embedder_options), + SubEmbedderOptions::UserProvided(embedder_options) => { + Self::UserProvided(embedder_options) + } + SubEmbedderOptions::Rest(embedder_options) => Self::Rest(embedder_options), + } + } +}