mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-01 17:15:46 +01:00
Merge #4892
4892: Add a documentTemplateMaxBytes parameter to limit the max length of document templates r=ManyTheFish a=dureuill # Pull Request ## Related issue Fixes #4885 See [public usage](https://meilisearch.notion.site/v1-11-AI-search-changes-0e37727193884a70999f254fa953ce6e#a3d63628129e40adba943ae7b8ec06c2) Co-authored-by: Louis Dureuil <louis@meilisearch.com>
This commit is contained in:
commit
40e13ceef3
@ -5403,6 +5403,9 @@ mod tests {
|
|||||||
),
|
),
|
||||||
prompt: PromptData {
|
prompt: PromptData {
|
||||||
template: "{{doc.doggo}}",
|
template: "{{doc.doggo}}",
|
||||||
|
max_bytes: Some(
|
||||||
|
400,
|
||||||
|
),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
user_provided: RoaringBitmap<[1, 2]>,
|
user_provided: RoaringBitmap<[1, 2]>,
|
||||||
@ -5618,6 +5621,9 @@ mod tests {
|
|||||||
),
|
),
|
||||||
prompt: PromptData {
|
prompt: PromptData {
|
||||||
template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}",
|
template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}",
|
||||||
|
max_bytes: Some(
|
||||||
|
400,
|
||||||
|
),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
user_provided: RoaringBitmap<[0]>,
|
user_provided: RoaringBitmap<[0]>,
|
||||||
@ -5658,6 +5664,9 @@ mod tests {
|
|||||||
),
|
),
|
||||||
prompt: PromptData {
|
prompt: PromptData {
|
||||||
template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}",
|
template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}",
|
||||||
|
max_bytes: Some(
|
||||||
|
400,
|
||||||
|
),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
user_provided: RoaringBitmap<[]>,
|
user_provided: RoaringBitmap<[]>,
|
||||||
|
@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||||
2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
|
2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
|
@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||||
2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
|
2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
|
@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
|
@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
|
@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
enqueued [0,]
|
enqueued [0,]
|
||||||
|
@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), document_template_max_bytes: NotSet, url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
enqueued []
|
enqueued []
|
||||||
|
@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
enqueued [0,]
|
enqueued [0,]
|
||||||
|
@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
|
|||||||
[]
|
[]
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### All Tasks:
|
### All Tasks:
|
||||||
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, document_template_max_bytes: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||||
----------------------------------------------------------------------
|
----------------------------------------------------------------------
|
||||||
### Status:
|
### Status:
|
||||||
enqueued []
|
enqueued []
|
||||||
|
@ -388,6 +388,7 @@ impl ErrorCode for milli::Error {
|
|||||||
| UserError::InvalidOpenAiModelDimensionsMax { .. }
|
| UserError::InvalidOpenAiModelDimensionsMax { .. }
|
||||||
| UserError::InvalidSettingsDimensions { .. }
|
| UserError::InvalidSettingsDimensions { .. }
|
||||||
| UserError::InvalidUrl { .. }
|
| UserError::InvalidUrl { .. }
|
||||||
|
| UserError::InvalidSettingsDocumentTemplateMaxBytes { .. }
|
||||||
| UserError::InvalidPrompt(_) => Code::InvalidSettingsEmbedders,
|
| UserError::InvalidPrompt(_) => Code::InvalidSettingsEmbedders,
|
||||||
UserError::TooManyEmbedders(_) => Code::InvalidSettingsEmbedders,
|
UserError::TooManyEmbedders(_) => Code::InvalidSettingsEmbedders,
|
||||||
UserError::InvalidPromptForEmbeddings(..) => Code::InvalidSettingsEmbedders,
|
UserError::InvalidPromptForEmbeddings(..) => Code::InvalidSettingsEmbedders,
|
||||||
|
@ -636,11 +636,19 @@ fn embedder_analytics(
|
|||||||
.any(|config| config.document_template.set().is_some())
|
.any(|config| config.document_template.set().is_some())
|
||||||
});
|
});
|
||||||
|
|
||||||
|
let document_template_max_bytes = setting.as_ref().and_then(|map| {
|
||||||
|
map.values()
|
||||||
|
.filter_map(|config| config.clone().set())
|
||||||
|
.filter_map(|config| config.document_template_max_bytes.set())
|
||||||
|
.max()
|
||||||
|
});
|
||||||
|
|
||||||
json!(
|
json!(
|
||||||
{
|
{
|
||||||
"total": setting.as_ref().map(|s| s.len()),
|
"total": setting.as_ref().map(|s| s.len()),
|
||||||
"sources": sources,
|
"sources": sources,
|
||||||
"document_template_used": document_template_used,
|
"document_template_used": document_template_used,
|
||||||
|
"document_template_max_bytes": document_template_max_bytes
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
@ -2097,7 +2097,8 @@ async fn generate_and_import_dump_containing_vectors() {
|
|||||||
"source": "huggingFace",
|
"source": "huggingFace",
|
||||||
"model": "sentence-transformers/all-MiniLM-L6-v2",
|
"model": "sentence-transformers/all-MiniLM-L6-v2",
|
||||||
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
|
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
|
||||||
"documentTemplate": "{{doc.doggo}}"
|
"documentTemplate": "{{doc.doggo}}",
|
||||||
|
"documentTemplateMaxBytes": 400
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"searchCutoffMs": null,
|
"searchCutoffMs": null,
|
||||||
|
@ -191,6 +191,7 @@ async fn secrets_are_hidden_in_settings() {
|
|||||||
"apiKey": "My suXXXXXX...",
|
"apiKey": "My suXXXXXX...",
|
||||||
"dimensions": 4,
|
"dimensions": 4,
|
||||||
"documentTemplate": "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}",
|
"documentTemplate": "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}",
|
||||||
|
"documentTemplateMaxBytes": 400,
|
||||||
"url": "https://localhost:7777",
|
"url": "https://localhost:7777",
|
||||||
"request": "{{text}}",
|
"request": "{{text}}",
|
||||||
"response": "{{embedding}}",
|
"response": "{{embedding}}",
|
||||||
|
@ -302,7 +302,8 @@ async fn create_mock_with_template(
|
|||||||
"source": "openAi",
|
"source": "openAi",
|
||||||
"url": url,
|
"url": url,
|
||||||
"apiKey": API_KEY,
|
"apiKey": API_KEY,
|
||||||
"documentTemplate": document_template
|
"documentTemplate": document_template,
|
||||||
|
"documentTemplateMaxBytes": 8000000,
|
||||||
});
|
});
|
||||||
|
|
||||||
model_dimensions.add_to_settings(&mut embedder_settings);
|
model_dimensions.add_to_settings(&mut embedder_settings);
|
||||||
@ -693,6 +694,7 @@ async fn bad_api_key() {
|
|||||||
"model": "text-embedding-3-large",
|
"model": "text-embedding-3-large",
|
||||||
"apiKey": "XXX...",
|
"apiKey": "XXX...",
|
||||||
"documentTemplate": "{%- if doc.gender == \"F\" -%}Une chienne nommée {{doc.name}}, née en {{doc.birthyear}}\n {%- else -%}\n Un chien nommé {{doc.name}}, né en {{doc.birthyear}}\n {%- endif %}, de race {{doc.breed}}.",
|
"documentTemplate": "{%- if doc.gender == \"F\" -%}Une chienne nommée {{doc.name}}, née en {{doc.birthyear}}\n {%- else -%}\n Un chien nommé {{doc.name}}, né en {{doc.birthyear}}\n {%- endif %}, de race {{doc.breed}}.",
|
||||||
|
"documentTemplateMaxBytes": 8000000,
|
||||||
"url": "[url]"
|
"url": "[url]"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -735,6 +737,7 @@ async fn bad_api_key() {
|
|||||||
"source": "openAi",
|
"source": "openAi",
|
||||||
"model": "text-embedding-3-large",
|
"model": "text-embedding-3-large",
|
||||||
"documentTemplate": "{%- if doc.gender == \"F\" -%}Une chienne nommée {{doc.name}}, née en {{doc.birthyear}}\n {%- else -%}\n Un chien nommé {{doc.name}}, né en {{doc.birthyear}}\n {%- endif %}, de race {{doc.breed}}.",
|
"documentTemplate": "{%- if doc.gender == \"F\" -%}Une chienne nommée {{doc.name}}, née en {{doc.birthyear}}\n {%- else -%}\n Un chien nommé {{doc.name}}, né en {{doc.birthyear}}\n {%- endif %}, de race {{doc.breed}}.",
|
||||||
|
"documentTemplateMaxBytes": 8000000,
|
||||||
"url": "[url]"
|
"url": "[url]"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -258,6 +258,8 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
|
|||||||
},
|
},
|
||||||
#[error("`.embedders.{embedder_name}.dimensions`: `dimensions` cannot be zero")]
|
#[error("`.embedders.{embedder_name}.dimensions`: `dimensions` cannot be zero")]
|
||||||
InvalidSettingsDimensions { embedder_name: String },
|
InvalidSettingsDimensions { embedder_name: String },
|
||||||
|
#[error("`.embedders.{embedder_name}.documentTemplateMaxBytes`: `documentTemplateMaxBytes` cannot be zero")]
|
||||||
|
InvalidSettingsDocumentTemplateMaxBytes { embedder_name: String },
|
||||||
#[error("`.embedders.{embedder_name}.url`: could not parse `{url}`: {inner_error}")]
|
#[error("`.embedders.{embedder_name}.url`: could not parse `{url}`: {inner_error}")]
|
||||||
InvalidUrl { embedder_name: String, inner_error: url::ParseError, url: String },
|
InvalidUrl { embedder_name: String, inner_error: url::ParseError, url: String },
|
||||||
#[error("Document editions cannot modify a document's primary key")]
|
#[error("Document editions cannot modify a document's primary key")]
|
||||||
|
@ -6,6 +6,7 @@ mod template_checker;
|
|||||||
|
|
||||||
use std::collections::BTreeMap;
|
use std::collections::BTreeMap;
|
||||||
use std::convert::TryFrom;
|
use std::convert::TryFrom;
|
||||||
|
use std::num::NonZeroUsize;
|
||||||
use std::ops::Deref;
|
use std::ops::Deref;
|
||||||
|
|
||||||
use error::{NewPromptError, RenderPromptError};
|
use error::{NewPromptError, RenderPromptError};
|
||||||
@ -18,16 +19,18 @@ use crate::{FieldId, FieldsIdsMap};
|
|||||||
pub struct Prompt {
|
pub struct Prompt {
|
||||||
template: liquid::Template,
|
template: liquid::Template,
|
||||||
template_text: String,
|
template_text: String,
|
||||||
|
max_bytes: Option<NonZeroUsize>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
|
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
|
||||||
pub struct PromptData {
|
pub struct PromptData {
|
||||||
pub template: String,
|
pub template: String,
|
||||||
|
pub max_bytes: Option<NonZeroUsize>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<Prompt> for PromptData {
|
impl From<Prompt> for PromptData {
|
||||||
fn from(value: Prompt) -> Self {
|
fn from(value: Prompt) -> Self {
|
||||||
Self { template: value.template_text }
|
Self { template: value.template_text, max_bytes: value.max_bytes }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -35,14 +38,18 @@ impl TryFrom<PromptData> for Prompt {
|
|||||||
type Error = NewPromptError;
|
type Error = NewPromptError;
|
||||||
|
|
||||||
fn try_from(value: PromptData) -> Result<Self, Self::Error> {
|
fn try_from(value: PromptData) -> Result<Self, Self::Error> {
|
||||||
Prompt::new(value.template)
|
Prompt::new(value.template, value.max_bytes)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Clone for Prompt {
|
impl Clone for Prompt {
|
||||||
fn clone(&self) -> Self {
|
fn clone(&self) -> Self {
|
||||||
let template_text = self.template_text.clone();
|
let template_text = self.template_text.clone();
|
||||||
Self { template: new_template(&template_text).unwrap(), template_text }
|
Self {
|
||||||
|
template: new_template(&template_text).unwrap(),
|
||||||
|
template_text,
|
||||||
|
max_bytes: self.max_bytes,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -62,20 +69,28 @@ fn default_template_text() -> &'static str {
|
|||||||
{% endfor %}"
|
{% endfor %}"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn default_max_bytes() -> NonZeroUsize {
|
||||||
|
NonZeroUsize::new(400).unwrap()
|
||||||
|
}
|
||||||
|
|
||||||
impl Default for Prompt {
|
impl Default for Prompt {
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
Self { template: default_template(), template_text: default_template_text().into() }
|
Self {
|
||||||
|
template: default_template(),
|
||||||
|
template_text: default_template_text().into(),
|
||||||
|
max_bytes: Some(default_max_bytes()),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for PromptData {
|
impl Default for PromptData {
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
Self { template: default_template_text().into() }
|
Self { template: default_template_text().into(), max_bytes: Some(default_max_bytes()) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Prompt {
|
impl Prompt {
|
||||||
pub fn new(template: String) -> Result<Self, NewPromptError> {
|
pub fn new(template: String, max_bytes: Option<NonZeroUsize>) -> Result<Self, NewPromptError> {
|
||||||
let this = Self {
|
let this = Self {
|
||||||
template: liquid::ParserBuilder::with_stdlib()
|
template: liquid::ParserBuilder::with_stdlib()
|
||||||
.build()
|
.build()
|
||||||
@ -83,6 +98,7 @@ impl Prompt {
|
|||||||
.parse(&template)
|
.parse(&template)
|
||||||
.map_err(NewPromptError::cannot_parse_template)?,
|
.map_err(NewPromptError::cannot_parse_template)?,
|
||||||
template_text: template,
|
template_text: template,
|
||||||
|
max_bytes,
|
||||||
};
|
};
|
||||||
|
|
||||||
// render template with special object that's OK with `doc.*` and `fields.*`
|
// render template with special object that's OK with `doc.*` and `fields.*`
|
||||||
@ -102,7 +118,24 @@ impl Prompt {
|
|||||||
let document = Document::new(document, side, field_id_map);
|
let document = Document::new(document, side, field_id_map);
|
||||||
let context = Context::new(&document, field_id_map);
|
let context = Context::new(&document, field_id_map);
|
||||||
|
|
||||||
self.template.render(&context).map_err(RenderPromptError::missing_context)
|
let mut rendered =
|
||||||
|
self.template.render(&context).map_err(RenderPromptError::missing_context)?;
|
||||||
|
if let Some(max_bytes) = self.max_bytes {
|
||||||
|
truncate(&mut rendered, max_bytes.get());
|
||||||
|
}
|
||||||
|
Ok(rendered)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn truncate(s: &mut String, max_bytes: usize) {
|
||||||
|
if max_bytes >= s.len() {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
for i in (0..=max_bytes).rev() {
|
||||||
|
if s.is_char_boundary(i) {
|
||||||
|
s.truncate(i);
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -145,6 +178,7 @@ mod test {
|
|||||||
use super::Prompt;
|
use super::Prompt;
|
||||||
use crate::error::FaultSource;
|
use crate::error::FaultSource;
|
||||||
use crate::prompt::error::{NewPromptError, NewPromptErrorKind};
|
use crate::prompt::error::{NewPromptError, NewPromptErrorKind};
|
||||||
|
use crate::prompt::truncate;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn default_template() {
|
fn default_template() {
|
||||||
@ -154,18 +188,18 @@ mod test {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn empty_template() {
|
fn empty_template() {
|
||||||
Prompt::new("".into()).unwrap();
|
Prompt::new("".into(), None).unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn template_ok() {
|
fn template_ok() {
|
||||||
Prompt::new("{{doc.title}}: {{doc.overview}}".into()).unwrap();
|
Prompt::new("{{doc.title}}: {{doc.overview}}".into(), None).unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn template_syntax() {
|
fn template_syntax() {
|
||||||
assert!(matches!(
|
assert!(matches!(
|
||||||
Prompt::new("{{doc.title: {{doc.overview}}".into()),
|
Prompt::new("{{doc.title: {{doc.overview}}".into(), None),
|
||||||
Err(NewPromptError {
|
Err(NewPromptError {
|
||||||
kind: NewPromptErrorKind::CannotParseTemplate(_),
|
kind: NewPromptErrorKind::CannotParseTemplate(_),
|
||||||
fault: FaultSource::User
|
fault: FaultSource::User
|
||||||
@ -176,7 +210,7 @@ mod test {
|
|||||||
#[test]
|
#[test]
|
||||||
fn template_missing_doc() {
|
fn template_missing_doc() {
|
||||||
assert!(matches!(
|
assert!(matches!(
|
||||||
Prompt::new("{{title}}: {{overview}}".into()),
|
Prompt::new("{{title}}: {{overview}}".into(), None),
|
||||||
Err(NewPromptError {
|
Err(NewPromptError {
|
||||||
kind: NewPromptErrorKind::InvalidFieldsInTemplate(_),
|
kind: NewPromptErrorKind::InvalidFieldsInTemplate(_),
|
||||||
fault: FaultSource::User
|
fault: FaultSource::User
|
||||||
@ -186,17 +220,20 @@ mod test {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn template_nested_doc() {
|
fn template_nested_doc() {
|
||||||
Prompt::new("{{doc.actor.firstName}}: {{doc.actor.lastName}}".into()).unwrap();
|
Prompt::new("{{doc.actor.firstName}}: {{doc.actor.lastName}}".into(), None).unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn template_fields() {
|
fn template_fields() {
|
||||||
Prompt::new("{% for field in fields %}{{field}}{% endfor %}".into()).unwrap();
|
Prompt::new("{% for field in fields %}{{field}}{% endfor %}".into(), None).unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn template_fields_ok() {
|
fn template_fields_ok() {
|
||||||
Prompt::new("{% for field in fields %}{{field.name}}: {{field.value}}{% endfor %}".into())
|
Prompt::new(
|
||||||
|
"{% for field in fields %}{{field.name}}: {{field.value}}{% endfor %}".into(),
|
||||||
|
None,
|
||||||
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -204,11 +241,41 @@ mod test {
|
|||||||
fn template_fields_invalid() {
|
fn template_fields_invalid() {
|
||||||
assert!(matches!(
|
assert!(matches!(
|
||||||
// intentionally garbled field
|
// intentionally garbled field
|
||||||
Prompt::new("{% for field in fields %}{{field.vaelu}} {% endfor %}".into()),
|
Prompt::new("{% for field in fields %}{{field.vaelu}} {% endfor %}".into(), None),
|
||||||
Err(NewPromptError {
|
Err(NewPromptError {
|
||||||
kind: NewPromptErrorKind::InvalidFieldsInTemplate(_),
|
kind: NewPromptErrorKind::InvalidFieldsInTemplate(_),
|
||||||
fault: FaultSource::User
|
fault: FaultSource::User
|
||||||
})
|
})
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// todo: test truncation
|
||||||
|
#[test]
|
||||||
|
fn template_truncation() {
|
||||||
|
let mut s = "インテル ザー ビーグル".to_string();
|
||||||
|
|
||||||
|
truncate(&mut s, 42);
|
||||||
|
assert_eq!(s, "インテル ザー ビーグル");
|
||||||
|
|
||||||
|
assert_eq!(s.len(), 32);
|
||||||
|
truncate(&mut s, 32);
|
||||||
|
assert_eq!(s, "インテル ザー ビーグル");
|
||||||
|
|
||||||
|
truncate(&mut s, 31);
|
||||||
|
assert_eq!(s, "インテル ザー ビーグ");
|
||||||
|
truncate(&mut s, 30);
|
||||||
|
assert_eq!(s, "インテル ザー ビーグ");
|
||||||
|
truncate(&mut s, 28);
|
||||||
|
assert_eq!(s, "インテル ザー ビー");
|
||||||
|
truncate(&mut s, 26);
|
||||||
|
assert_eq!(s, "インテル ザー ビー");
|
||||||
|
truncate(&mut s, 25);
|
||||||
|
assert_eq!(s, "インテル ザー ビ");
|
||||||
|
|
||||||
|
assert_eq!("イ".len(), 3);
|
||||||
|
truncate(&mut s, 3);
|
||||||
|
assert_eq!(s, "イ");
|
||||||
|
truncate(&mut s, 2);
|
||||||
|
assert_eq!(s, "");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -2740,6 +2740,7 @@ mod tests {
|
|||||||
api_key: Setting::NotSet,
|
api_key: Setting::NotSet,
|
||||||
dimensions: Setting::Set(3),
|
dimensions: Setting::Set(3),
|
||||||
document_template: Setting::NotSet,
|
document_template: Setting::NotSet,
|
||||||
|
document_template_max_bytes: Setting::NotSet,
|
||||||
url: Setting::NotSet,
|
url: Setting::NotSet,
|
||||||
request: Setting::NotSet,
|
request: Setting::NotSet,
|
||||||
response: Setting::NotSet,
|
response: Setting::NotSet,
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
|
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
|
||||||
use std::convert::TryInto;
|
use std::convert::TryInto;
|
||||||
|
use std::num::NonZeroUsize;
|
||||||
use std::result::Result as StdResult;
|
use std::result::Result as StdResult;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
@ -19,6 +20,7 @@ use crate::index::{
|
|||||||
IndexEmbeddingConfig, DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS,
|
IndexEmbeddingConfig, DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS,
|
||||||
};
|
};
|
||||||
use crate::order_by_map::OrderByMap;
|
use crate::order_by_map::OrderByMap;
|
||||||
|
use crate::prompt::default_max_bytes;
|
||||||
use crate::proximity::ProximityPrecision;
|
use crate::proximity::ProximityPrecision;
|
||||||
use crate::update::index_documents::IndexDocumentsMethod;
|
use crate::update::index_documents::IndexDocumentsMethod;
|
||||||
use crate::update::{IndexDocuments, UpdateIndexingStep};
|
use crate::update::{IndexDocuments, UpdateIndexingStep};
|
||||||
@ -1573,14 +1575,28 @@ fn validate_prompt(
|
|||||||
api_key,
|
api_key,
|
||||||
dimensions,
|
dimensions,
|
||||||
document_template: Setting::Set(template),
|
document_template: Setting::Set(template),
|
||||||
|
document_template_max_bytes,
|
||||||
url,
|
url,
|
||||||
request,
|
request,
|
||||||
response,
|
response,
|
||||||
distribution,
|
distribution,
|
||||||
headers,
|
headers,
|
||||||
}) => {
|
}) => {
|
||||||
|
let max_bytes = match document_template_max_bytes.set() {
|
||||||
|
Some(max_bytes) => NonZeroUsize::new(max_bytes).ok_or_else(|| {
|
||||||
|
crate::error::UserError::InvalidSettingsDocumentTemplateMaxBytes {
|
||||||
|
embedder_name: name.to_owned(),
|
||||||
|
}
|
||||||
|
})?,
|
||||||
|
None => default_max_bytes(),
|
||||||
|
};
|
||||||
|
|
||||||
// validate
|
// validate
|
||||||
let template = crate::prompt::Prompt::new(template)
|
let template = crate::prompt::Prompt::new(
|
||||||
|
template,
|
||||||
|
// always specify a max_bytes
|
||||||
|
Some(max_bytes),
|
||||||
|
)
|
||||||
.map(|prompt| crate::prompt::PromptData::from(prompt).template)
|
.map(|prompt| crate::prompt::PromptData::from(prompt).template)
|
||||||
.map_err(|inner| UserError::InvalidPromptForEmbeddings(name.to_owned(), inner))?;
|
.map_err(|inner| UserError::InvalidPromptForEmbeddings(name.to_owned(), inner))?;
|
||||||
|
|
||||||
@ -1591,6 +1607,7 @@ fn validate_prompt(
|
|||||||
api_key,
|
api_key,
|
||||||
dimensions,
|
dimensions,
|
||||||
document_template: Setting::Set(template),
|
document_template: Setting::Set(template),
|
||||||
|
document_template_max_bytes,
|
||||||
url,
|
url,
|
||||||
request,
|
request,
|
||||||
response,
|
response,
|
||||||
@ -1615,6 +1632,7 @@ pub fn validate_embedding_settings(
|
|||||||
api_key,
|
api_key,
|
||||||
dimensions,
|
dimensions,
|
||||||
document_template,
|
document_template,
|
||||||
|
document_template_max_bytes,
|
||||||
url,
|
url,
|
||||||
request,
|
request,
|
||||||
response,
|
response,
|
||||||
@ -1654,6 +1672,7 @@ pub fn validate_embedding_settings(
|
|||||||
api_key,
|
api_key,
|
||||||
dimensions,
|
dimensions,
|
||||||
document_template,
|
document_template,
|
||||||
|
document_template_max_bytes,
|
||||||
url,
|
url,
|
||||||
request,
|
request,
|
||||||
response,
|
response,
|
||||||
@ -1726,6 +1745,12 @@ pub fn validate_embedding_settings(
|
|||||||
inferred_source,
|
inferred_source,
|
||||||
name,
|
name,
|
||||||
)?;
|
)?;
|
||||||
|
check_unset(
|
||||||
|
&document_template_max_bytes,
|
||||||
|
EmbeddingSettings::DOCUMENT_TEMPLATE_MAX_BYTES,
|
||||||
|
inferred_source,
|
||||||
|
name,
|
||||||
|
)?;
|
||||||
check_set(&dimensions, EmbeddingSettings::DIMENSIONS, inferred_source, name)?;
|
check_set(&dimensions, EmbeddingSettings::DIMENSIONS, inferred_source, name)?;
|
||||||
|
|
||||||
check_unset(&url, EmbeddingSettings::URL, inferred_source, name)?;
|
check_unset(&url, EmbeddingSettings::URL, inferred_source, name)?;
|
||||||
@ -1748,6 +1773,7 @@ pub fn validate_embedding_settings(
|
|||||||
api_key,
|
api_key,
|
||||||
dimensions,
|
dimensions,
|
||||||
document_template,
|
document_template,
|
||||||
|
document_template_max_bytes,
|
||||||
url,
|
url,
|
||||||
request,
|
request,
|
||||||
response,
|
response,
|
||||||
|
@ -1,11 +1,12 @@
|
|||||||
use std::collections::BTreeMap;
|
use std::collections::BTreeMap;
|
||||||
|
use std::num::NonZeroUsize;
|
||||||
|
|
||||||
use deserr::Deserr;
|
use deserr::Deserr;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use super::{ollama, openai, DistributionShift};
|
use super::{ollama, openai, DistributionShift};
|
||||||
use crate::prompt::PromptData;
|
use crate::prompt::{default_max_bytes, PromptData};
|
||||||
use crate::update::Setting;
|
use crate::update::Setting;
|
||||||
use crate::vector::EmbeddingConfig;
|
use crate::vector::EmbeddingConfig;
|
||||||
use crate::UserError;
|
use crate::UserError;
|
||||||
@ -34,6 +35,9 @@ pub struct EmbeddingSettings {
|
|||||||
pub document_template: Setting<String>,
|
pub document_template: Setting<String>,
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||||
#[deserr(default)]
|
#[deserr(default)]
|
||||||
|
pub document_template_max_bytes: Setting<usize>,
|
||||||
|
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||||
|
#[deserr(default)]
|
||||||
pub url: Setting<String>,
|
pub url: Setting<String>,
|
||||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||||
#[deserr(default)]
|
#[deserr(default)]
|
||||||
@ -111,6 +115,7 @@ impl SettingsDiff {
|
|||||||
mut response,
|
mut response,
|
||||||
mut distribution,
|
mut distribution,
|
||||||
mut headers,
|
mut headers,
|
||||||
|
mut document_template_max_bytes,
|
||||||
} = old;
|
} = old;
|
||||||
|
|
||||||
let EmbeddingSettings {
|
let EmbeddingSettings {
|
||||||
@ -125,6 +130,7 @@ impl SettingsDiff {
|
|||||||
response: new_response,
|
response: new_response,
|
||||||
distribution: new_distribution,
|
distribution: new_distribution,
|
||||||
headers: new_headers,
|
headers: new_headers,
|
||||||
|
document_template_max_bytes: new_document_template_max_bytes,
|
||||||
} = new;
|
} = new;
|
||||||
|
|
||||||
let mut reindex_action = None;
|
let mut reindex_action = None;
|
||||||
@ -142,6 +148,7 @@ impl SettingsDiff {
|
|||||||
&mut request,
|
&mut request,
|
||||||
&mut response,
|
&mut response,
|
||||||
&mut document_template,
|
&mut document_template,
|
||||||
|
&mut document_template_max_bytes,
|
||||||
&mut headers,
|
&mut headers,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
@ -190,6 +197,23 @@ impl SettingsDiff {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if document_template_max_bytes.apply(new_document_template_max_bytes) {
|
||||||
|
let previous_document_template_max_bytes =
|
||||||
|
document_template_max_bytes.set().unwrap_or(default_max_bytes().get());
|
||||||
|
let new_document_template_max_bytes =
|
||||||
|
new_document_template_max_bytes.set().unwrap_or(default_max_bytes().get());
|
||||||
|
|
||||||
|
// only reindex if the size increased. Reasoning:
|
||||||
|
// - size decrease is a performance optimization, so we don't reindex and we keep the more accurate vectors
|
||||||
|
// - size increase is an accuracy optimization, so we want to reindex
|
||||||
|
if new_document_template_max_bytes > previous_document_template_max_bytes {
|
||||||
|
ReindexAction::push_action(
|
||||||
|
&mut reindex_action,
|
||||||
|
ReindexAction::RegeneratePrompts,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
distribution.apply(new_distribution);
|
distribution.apply(new_distribution);
|
||||||
api_key.apply(new_api_key);
|
api_key.apply(new_api_key);
|
||||||
headers.apply(new_headers);
|
headers.apply(new_headers);
|
||||||
@ -206,6 +230,7 @@ impl SettingsDiff {
|
|||||||
response,
|
response,
|
||||||
distribution,
|
distribution,
|
||||||
headers,
|
headers,
|
||||||
|
document_template_max_bytes,
|
||||||
};
|
};
|
||||||
|
|
||||||
match reindex_action {
|
match reindex_action {
|
||||||
@ -239,6 +264,7 @@ fn apply_default_for_source(
|
|||||||
request: &mut Setting<serde_json::Value>,
|
request: &mut Setting<serde_json::Value>,
|
||||||
response: &mut Setting<serde_json::Value>,
|
response: &mut Setting<serde_json::Value>,
|
||||||
document_template: &mut Setting<String>,
|
document_template: &mut Setting<String>,
|
||||||
|
document_template_max_bytes: &mut Setting<usize>,
|
||||||
headers: &mut Setting<BTreeMap<String, String>>,
|
headers: &mut Setting<BTreeMap<String, String>>,
|
||||||
) {
|
) {
|
||||||
match source {
|
match source {
|
||||||
@ -286,6 +312,7 @@ fn apply_default_for_source(
|
|||||||
*request = Setting::NotSet;
|
*request = Setting::NotSet;
|
||||||
*response = Setting::NotSet;
|
*response = Setting::NotSet;
|
||||||
*document_template = Setting::NotSet;
|
*document_template = Setting::NotSet;
|
||||||
|
*document_template_max_bytes = Setting::NotSet;
|
||||||
*headers = Setting::NotSet;
|
*headers = Setting::NotSet;
|
||||||
}
|
}
|
||||||
Setting::NotSet => {}
|
Setting::NotSet => {}
|
||||||
@ -316,6 +343,7 @@ impl EmbeddingSettings {
|
|||||||
pub const API_KEY: &'static str = "apiKey";
|
pub const API_KEY: &'static str = "apiKey";
|
||||||
pub const DIMENSIONS: &'static str = "dimensions";
|
pub const DIMENSIONS: &'static str = "dimensions";
|
||||||
pub const DOCUMENT_TEMPLATE: &'static str = "documentTemplate";
|
pub const DOCUMENT_TEMPLATE: &'static str = "documentTemplate";
|
||||||
|
pub const DOCUMENT_TEMPLATE_MAX_BYTES: &'static str = "documentTemplateMaxBytes";
|
||||||
|
|
||||||
pub const URL: &'static str = "url";
|
pub const URL: &'static str = "url";
|
||||||
pub const REQUEST: &'static str = "request";
|
pub const REQUEST: &'static str = "request";
|
||||||
@ -459,6 +487,8 @@ impl std::fmt::Display for EmbedderSource {
|
|||||||
impl From<EmbeddingConfig> for EmbeddingSettings {
|
impl From<EmbeddingConfig> for EmbeddingSettings {
|
||||||
fn from(value: EmbeddingConfig) -> Self {
|
fn from(value: EmbeddingConfig) -> Self {
|
||||||
let EmbeddingConfig { embedder_options, prompt } = value;
|
let EmbeddingConfig { embedder_options, prompt } = value;
|
||||||
|
let document_template_max_bytes =
|
||||||
|
Setting::Set(prompt.max_bytes.unwrap_or(default_max_bytes()).get());
|
||||||
match embedder_options {
|
match embedder_options {
|
||||||
super::EmbedderOptions::HuggingFace(super::hf::EmbedderOptions {
|
super::EmbedderOptions::HuggingFace(super::hf::EmbedderOptions {
|
||||||
model,
|
model,
|
||||||
@ -471,6 +501,7 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
|
|||||||
api_key: Setting::NotSet,
|
api_key: Setting::NotSet,
|
||||||
dimensions: Setting::NotSet,
|
dimensions: Setting::NotSet,
|
||||||
document_template: Setting::Set(prompt.template),
|
document_template: Setting::Set(prompt.template),
|
||||||
|
document_template_max_bytes,
|
||||||
url: Setting::NotSet,
|
url: Setting::NotSet,
|
||||||
request: Setting::NotSet,
|
request: Setting::NotSet,
|
||||||
response: Setting::NotSet,
|
response: Setting::NotSet,
|
||||||
@ -490,6 +521,7 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
|
|||||||
api_key: Setting::some_or_not_set(api_key),
|
api_key: Setting::some_or_not_set(api_key),
|
||||||
dimensions: Setting::some_or_not_set(dimensions),
|
dimensions: Setting::some_or_not_set(dimensions),
|
||||||
document_template: Setting::Set(prompt.template),
|
document_template: Setting::Set(prompt.template),
|
||||||
|
document_template_max_bytes,
|
||||||
url: Setting::some_or_not_set(url),
|
url: Setting::some_or_not_set(url),
|
||||||
request: Setting::NotSet,
|
request: Setting::NotSet,
|
||||||
response: Setting::NotSet,
|
response: Setting::NotSet,
|
||||||
@ -509,6 +541,7 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
|
|||||||
api_key: Setting::some_or_not_set(api_key),
|
api_key: Setting::some_or_not_set(api_key),
|
||||||
dimensions: Setting::some_or_not_set(dimensions),
|
dimensions: Setting::some_or_not_set(dimensions),
|
||||||
document_template: Setting::Set(prompt.template),
|
document_template: Setting::Set(prompt.template),
|
||||||
|
document_template_max_bytes,
|
||||||
url: Setting::some_or_not_set(url),
|
url: Setting::some_or_not_set(url),
|
||||||
request: Setting::NotSet,
|
request: Setting::NotSet,
|
||||||
response: Setting::NotSet,
|
response: Setting::NotSet,
|
||||||
@ -525,6 +558,7 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
|
|||||||
api_key: Setting::NotSet,
|
api_key: Setting::NotSet,
|
||||||
dimensions: Setting::Set(dimensions),
|
dimensions: Setting::Set(dimensions),
|
||||||
document_template: Setting::NotSet,
|
document_template: Setting::NotSet,
|
||||||
|
document_template_max_bytes: Setting::NotSet,
|
||||||
url: Setting::NotSet,
|
url: Setting::NotSet,
|
||||||
request: Setting::NotSet,
|
request: Setting::NotSet,
|
||||||
response: Setting::NotSet,
|
response: Setting::NotSet,
|
||||||
@ -546,6 +580,7 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
|
|||||||
api_key: Setting::some_or_not_set(api_key),
|
api_key: Setting::some_or_not_set(api_key),
|
||||||
dimensions: Setting::some_or_not_set(dimensions),
|
dimensions: Setting::some_or_not_set(dimensions),
|
||||||
document_template: Setting::Set(prompt.template),
|
document_template: Setting::Set(prompt.template),
|
||||||
|
document_template_max_bytes,
|
||||||
url: Setting::Set(url),
|
url: Setting::Set(url),
|
||||||
request: Setting::Set(request),
|
request: Setting::Set(request),
|
||||||
response: Setting::Set(response),
|
response: Setting::Set(response),
|
||||||
@ -566,6 +601,7 @@ impl From<EmbeddingSettings> for EmbeddingConfig {
|
|||||||
api_key,
|
api_key,
|
||||||
dimensions,
|
dimensions,
|
||||||
document_template,
|
document_template,
|
||||||
|
document_template_max_bytes,
|
||||||
url,
|
url,
|
||||||
request,
|
request,
|
||||||
response,
|
response,
|
||||||
@ -648,7 +684,12 @@ impl From<EmbeddingSettings> for EmbeddingConfig {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if let Setting::Set(template) = document_template {
|
if let Setting::Set(template) = document_template {
|
||||||
this.prompt = PromptData { template }
|
let max_bytes = document_template_max_bytes
|
||||||
|
.set()
|
||||||
|
.and_then(NonZeroUsize::new)
|
||||||
|
.unwrap_or(default_max_bytes());
|
||||||
|
|
||||||
|
this.prompt = PromptData { template, max_bytes: Some(max_bytes) }
|
||||||
}
|
}
|
||||||
|
|
||||||
this
|
this
|
||||||
|
Loading…
Reference in New Issue
Block a user