mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-09 22:48:54 +01:00
Merge #4906
4906: Add searchable fields to template r=dureuill a=dureuill # Pull Request ## Related issue Fixes #4886 See [public usage](https://meilisearch.notion.site/v1-11-AI-search-changes-0e37727193884a70999f254fa953ce6e#1dd6f0eee5a1422888e1c5d48e107cd1) ## What does this PR do? - `Prompt::render` now requires and uses metadata to indicate if the fields are searchable or not - Changes default template - Updated tests - Correctly reindex vectors when the list of searchable fields changes in a settings update. Co-authored-by: Louis Dureuil <louis@meilisearch.com>
This commit is contained in:
commit
80408c92dc
@ -5617,7 +5617,7 @@ mod tests {
|
||||
},
|
||||
),
|
||||
prompt: PromptData {
|
||||
template: "{% for field in fields %} {{ field.name }}: {{ field.value }}\n{% endfor %}",
|
||||
template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}",
|
||||
},
|
||||
},
|
||||
user_provided: RoaringBitmap<[0]>,
|
||||
@ -5657,7 +5657,7 @@ mod tests {
|
||||
},
|
||||
),
|
||||
prompt: PromptData {
|
||||
template: "{% for field in fields %} {{ field.name }}: {{ field.value }}\n{% endfor %}",
|
||||
template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}",
|
||||
},
|
||||
},
|
||||
user_provided: RoaringBitmap<[]>,
|
||||
|
@ -190,7 +190,7 @@ async fn secrets_are_hidden_in_settings() {
|
||||
"source": "rest",
|
||||
"apiKey": "My suXXXXXX...",
|
||||
"dimensions": 4,
|
||||
"documentTemplate": "{% for field in fields %} {{ field.name }}: {{ field.value }}\n{% endfor %}",
|
||||
"documentTemplate": "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}",
|
||||
"url": "https://localhost:7777",
|
||||
"request": "{{text}}",
|
||||
"response": "{{embedding}}",
|
||||
|
@ -1,3 +1,4 @@
|
||||
use std::collections::BTreeMap;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
|
||||
use meili_snap::{json_string, snapshot};
|
||||
@ -37,6 +38,46 @@ async fn create_mock() -> (MockServer, Value) {
|
||||
(mock_server, embedder_settings)
|
||||
}
|
||||
|
||||
async fn create_mock_map() -> (MockServer, Value) {
|
||||
let mock_server = MockServer::start().await;
|
||||
|
||||
let text_to_embedding: BTreeMap<_, _> = vec![
|
||||
// text -> embedding
|
||||
("name: kefir\n", [0.0, 0.1, 0.2]),
|
||||
]
|
||||
// turn into btree
|
||||
.into_iter()
|
||||
.collect();
|
||||
|
||||
Mock::given(method("POST"))
|
||||
.and(path("/"))
|
||||
.respond_with(move |req: &Request| {
|
||||
let text: String = req.body_json().unwrap();
|
||||
match text_to_embedding.get(text.as_str()) {
|
||||
Some(embedding) => {
|
||||
ResponseTemplate::new(200).set_body_json(json!({ "data": embedding }))
|
||||
}
|
||||
None => ResponseTemplate::new(404)
|
||||
.set_body_json(json!({"error": "text not found", "text": text})),
|
||||
}
|
||||
})
|
||||
.mount(&mock_server)
|
||||
.await;
|
||||
let url = mock_server.uri();
|
||||
|
||||
let embedder_settings = json!({
|
||||
"source": "rest",
|
||||
"url": url,
|
||||
"dimensions": 3,
|
||||
"request": "{{text}}",
|
||||
"response": {
|
||||
"data": "{{embedding}}"
|
||||
}
|
||||
});
|
||||
|
||||
(mock_server, embedder_settings)
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)]
|
||||
struct MultipleRequest {
|
||||
input: Vec<String>,
|
||||
@ -1100,6 +1141,7 @@ async fn server_returns_bad_request() {
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"searchableAttributes": ["name", "missing_field"],
|
||||
"embedders": {
|
||||
"rest": json!({ "source": "rest", "url": mock.uri(), "request": "{{text}}", "response": "{{embedding}}", "dimensions": 3 }),
|
||||
},
|
||||
@ -1115,6 +1157,10 @@ async fn server_returns_bad_request() {
|
||||
"type": "settingsUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"searchableAttributes": [
|
||||
"name",
|
||||
"missing_field"
|
||||
],
|
||||
"embedders": {
|
||||
"rest": {
|
||||
"source": "rest",
|
||||
@ -1148,7 +1194,7 @@ async fn server_returns_bad_request() {
|
||||
"indexedDocuments": 0
|
||||
},
|
||||
"error": {
|
||||
"message": "While embedding documents for embedder `rest`: user error: sent a bad request to embedding server\n - Hint: check that the `request` in the embedder configuration matches the remote server's API\n - server replied with `{\"error\":\"Invalid request: invalid type: string \\\" id: 1\\\\n name: kefir\\\\n\\\", expected struct MultipleRequest at line 1 column 24\"}`",
|
||||
"message": "While embedding documents for embedder `rest`: user error: sent a bad request to embedding server\n - Hint: check that the `request` in the embedder configuration matches the remote server's API\n - server replied with `{\"error\":\"Invalid request: invalid type: string \\\"name: kefir\\\\n\\\", expected struct MultipleRequest at line 1 column 15\"}`",
|
||||
"code": "vector_embedding_error",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||
@ -1891,3 +1937,109 @@ async fn server_custom_header() {
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn searchable_reindex() {
|
||||
let (_mock, setting) = create_mock_map().await;
|
||||
let server = get_server_vector().await;
|
||||
let index = server.index("doggo");
|
||||
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"searchableAttributes": ["name", "missing_field"],
|
||||
"embedders": {
|
||||
"rest": setting,
|
||||
},
|
||||
}))
|
||||
.await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
let task = server.wait_task(response.uid()).await;
|
||||
snapshot!(task, @r###"
|
||||
{
|
||||
"uid": "[uid]",
|
||||
"indexUid": "doggo",
|
||||
"status": "succeeded",
|
||||
"type": "settingsUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"searchableAttributes": [
|
||||
"name",
|
||||
"missing_field"
|
||||
],
|
||||
"embedders": {
|
||||
"rest": {
|
||||
"source": "rest",
|
||||
"dimensions": 3,
|
||||
"url": "[url]",
|
||||
"request": "{{text}}",
|
||||
"response": {
|
||||
"data": "{{embedding}}"
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) =
|
||||
index.add_documents(json!( { "id": 1, "name": "kefir", "breed": "patou" }), None).await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
let task = server.wait_task(response.uid()).await;
|
||||
snapshot!(task, @r###"
|
||||
{
|
||||
"uid": "[uid]",
|
||||
"indexUid": "doggo",
|
||||
"status": "succeeded",
|
||||
"type": "documentAdditionOrUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"receivedDocuments": 1,
|
||||
"indexedDocuments": 1
|
||||
},
|
||||
"error": null,
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
|
||||
// triggers reindexing with the new searchable attribute.
|
||||
// as the mock intentionally doesn't know of this text, the task will fail, outputting the putative rendered text.
|
||||
let (response, code) = index
|
||||
.update_settings(json!({
|
||||
"searchableAttributes": ["breed"],
|
||||
}))
|
||||
.await;
|
||||
snapshot!(code, @"202 Accepted");
|
||||
let task = server.wait_task(response.uid()).await;
|
||||
snapshot!(task, @r###"
|
||||
{
|
||||
"uid": "[uid]",
|
||||
"indexUid": "doggo",
|
||||
"status": "failed",
|
||||
"type": "settingsUpdate",
|
||||
"canceledBy": null,
|
||||
"details": {
|
||||
"searchableAttributes": [
|
||||
"breed"
|
||||
]
|
||||
},
|
||||
"error": {
|
||||
"message": "While embedding documents for embedder `rest`: error: received unexpected HTTP 404 from embedding server\n - server replied with `{\"error\":\"text not found\",\"text\":\"breed: patou\\n\"}`",
|
||||
"code": "vector_embedding_error",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||
},
|
||||
"duration": "[duration]",
|
||||
"enqueuedAt": "[date]",
|
||||
"startedAt": "[date]",
|
||||
"finishedAt": "[date]"
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
@ -5,7 +5,7 @@ use liquid::{ObjectView, ValueView};
|
||||
|
||||
use super::document::Document;
|
||||
use super::fields::Fields;
|
||||
use crate::FieldsIdsMap;
|
||||
use super::FieldsIdsMapWithMetadata;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Context<'a> {
|
||||
@ -14,7 +14,7 @@ pub struct Context<'a> {
|
||||
}
|
||||
|
||||
impl<'a> Context<'a> {
|
||||
pub fn new(document: &'a Document<'a>, field_id_map: &'a FieldsIdsMap) -> Self {
|
||||
pub fn new(document: &'a Document<'a>, field_id_map: &'a FieldsIdsMapWithMetadata<'a>) -> Self {
|
||||
Self { document, fields: Fields::new(document, field_id_map) }
|
||||
}
|
||||
}
|
||||
|
@ -4,16 +4,20 @@ use liquid::model::{
|
||||
use liquid::{ObjectView, ValueView};
|
||||
|
||||
use super::document::Document;
|
||||
use crate::FieldsIdsMap;
|
||||
use super::{FieldMetadata, FieldsIdsMapWithMetadata};
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Fields<'a>(Vec<FieldValue<'a>>);
|
||||
|
||||
impl<'a> Fields<'a> {
|
||||
pub fn new(document: &'a Document<'a>, field_id_map: &'a FieldsIdsMap) -> Self {
|
||||
pub fn new(document: &'a Document<'a>, field_id_map: &'a FieldsIdsMapWithMetadata<'a>) -> Self {
|
||||
Self(
|
||||
std::iter::repeat(document)
|
||||
.zip(field_id_map.iter())
|
||||
.map(|(document, (_fid, name))| FieldValue { document, name })
|
||||
.map(|(document, (fid, name))| FieldValue {
|
||||
document,
|
||||
name,
|
||||
metadata: field_id_map.metadata(fid).unwrap_or_default(),
|
||||
})
|
||||
.collect(),
|
||||
)
|
||||
}
|
||||
@ -23,6 +27,7 @@ impl<'a> Fields<'a> {
|
||||
pub struct FieldValue<'a> {
|
||||
name: &'a str,
|
||||
document: &'a Document<'a>,
|
||||
metadata: FieldMetadata,
|
||||
}
|
||||
|
||||
impl<'a> ValueView for FieldValue<'a> {
|
||||
@ -74,6 +79,10 @@ impl<'a> FieldValue<'a> {
|
||||
self.document.get(self.name).unwrap_or(&LiquidValue::Nil)
|
||||
}
|
||||
|
||||
pub fn is_searchable(&self) -> &bool {
|
||||
&self.metadata.searchable
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.size() == 0
|
||||
}
|
||||
@ -89,12 +98,14 @@ impl<'a> ObjectView for FieldValue<'a> {
|
||||
}
|
||||
|
||||
fn keys<'k>(&'k self) -> Box<dyn Iterator<Item = KStringCow<'k>> + 'k> {
|
||||
Box::new(["name", "value"].iter().map(|&x| KStringCow::from_static(x)))
|
||||
Box::new(["name", "value", "is_searchable"].iter().map(|&x| KStringCow::from_static(x)))
|
||||
}
|
||||
|
||||
fn values<'k>(&'k self) -> Box<dyn Iterator<Item = &'k dyn ValueView> + 'k> {
|
||||
Box::new(
|
||||
std::iter::once(self.name() as &dyn ValueView).chain(std::iter::once(self.value())),
|
||||
std::iter::once(self.name() as &dyn ValueView)
|
||||
.chain(std::iter::once(self.value()))
|
||||
.chain(std::iter::once(self.is_searchable() as &dyn ValueView)),
|
||||
)
|
||||
}
|
||||
|
||||
@ -103,13 +114,14 @@ impl<'a> ObjectView for FieldValue<'a> {
|
||||
}
|
||||
|
||||
fn contains_key(&self, index: &str) -> bool {
|
||||
index == "name" || index == "value"
|
||||
index == "name" || index == "value" || index == "is_searchable"
|
||||
}
|
||||
|
||||
fn get<'s>(&'s self, index: &str) -> Option<&'s dyn ValueView> {
|
||||
match index {
|
||||
"name" => Some(self.name()),
|
||||
"value" => Some(self.value()),
|
||||
"is_searchable" => Some(self.is_searchable()),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
@ -4,14 +4,16 @@ pub(crate) mod error;
|
||||
mod fields;
|
||||
mod template_checker;
|
||||
|
||||
use std::collections::BTreeMap;
|
||||
use std::convert::TryFrom;
|
||||
use std::ops::Deref;
|
||||
|
||||
use error::{NewPromptError, RenderPromptError};
|
||||
|
||||
use self::context::Context;
|
||||
use self::document::Document;
|
||||
use crate::update::del_add::DelAdd;
|
||||
use crate::FieldsIdsMap;
|
||||
use crate::{FieldId, FieldsIdsMap};
|
||||
|
||||
pub struct Prompt {
|
||||
template: liquid::Template,
|
||||
@ -53,8 +55,10 @@ fn default_template() -> liquid::Template {
|
||||
}
|
||||
|
||||
fn default_template_text() -> &'static str {
|
||||
"{% for field in fields %} \
|
||||
"{% for field in fields %}\
|
||||
{% if field.is_searchable and field.value != nil %}\
|
||||
{{ field.name }}: {{ field.value }}\n\
|
||||
{% endif %}\
|
||||
{% endfor %}"
|
||||
}
|
||||
|
||||
@ -93,7 +97,7 @@ impl Prompt {
|
||||
&self,
|
||||
document: obkv::KvReaderU16<'_>,
|
||||
side: DelAdd,
|
||||
field_id_map: &FieldsIdsMap,
|
||||
field_id_map: &FieldsIdsMapWithMetadata,
|
||||
) -> Result<String, RenderPromptError> {
|
||||
let document = Document::new(document, side, field_id_map);
|
||||
let context = Context::new(&document, field_id_map);
|
||||
@ -102,6 +106,40 @@ impl Prompt {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct FieldsIdsMapWithMetadata<'a> {
|
||||
fields_ids_map: &'a FieldsIdsMap,
|
||||
metadata: BTreeMap<FieldId, FieldMetadata>,
|
||||
}
|
||||
|
||||
impl<'a> FieldsIdsMapWithMetadata<'a> {
|
||||
pub fn new(fields_ids_map: &'a FieldsIdsMap, searchable_fields_ids: &'_ [FieldId]) -> Self {
|
||||
let mut metadata: BTreeMap<FieldId, FieldMetadata> =
|
||||
fields_ids_map.ids().map(|id| (id, Default::default())).collect();
|
||||
for searchable_field_id in searchable_fields_ids {
|
||||
let Some(metadata) = metadata.get_mut(searchable_field_id) else { continue };
|
||||
metadata.searchable = true;
|
||||
}
|
||||
Self { fields_ids_map, metadata }
|
||||
}
|
||||
|
||||
pub fn metadata(&self, field_id: FieldId) -> Option<FieldMetadata> {
|
||||
self.metadata.get(&field_id).copied()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Deref for FieldsIdsMapWithMetadata<'a> {
|
||||
type Target = FieldsIdsMap;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
self.fields_ids_map
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Clone, Copy)]
|
||||
pub struct FieldMetadata {
|
||||
pub searchable: bool,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::Prompt;
|
||||
|
@ -15,14 +15,14 @@ use serde_json::Value;
|
||||
use super::helpers::{create_writer, writer_into_reader, GrenadParameters};
|
||||
use crate::error::FaultSource;
|
||||
use crate::index::IndexEmbeddingConfig;
|
||||
use crate::prompt::Prompt;
|
||||
use crate::prompt::{FieldsIdsMapWithMetadata, Prompt};
|
||||
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
|
||||
use crate::update::settings::InnerIndexSettingsDiff;
|
||||
use crate::vector::error::{EmbedErrorKind, PossibleEmbeddingMistakes, UnusedVectorsDistribution};
|
||||
use crate::vector::parsed_vectors::{ParsedVectorsDiff, VectorState, RESERVED_VECTORS_FIELD_NAME};
|
||||
use crate::vector::settings::{EmbedderAction, ReindexAction};
|
||||
use crate::vector::{Embedder, Embeddings};
|
||||
use crate::{try_split_array_at, DocumentId, FieldId, FieldsIdsMap, Result, ThreadPoolNoAbort};
|
||||
use crate::{try_split_array_at, DocumentId, FieldId, Result, ThreadPoolNoAbort};
|
||||
|
||||
/// The length of the elements that are always in the buffer when inserting new values.
|
||||
const TRUNCATE_SIZE: usize = size_of::<DocumentId>();
|
||||
@ -189,7 +189,13 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
||||
let reindex_vectors = settings_diff.reindex_vectors();
|
||||
|
||||
let old_fields_ids_map = &settings_diff.old.fields_ids_map;
|
||||
let old_fields_ids_map =
|
||||
FieldsIdsMapWithMetadata::new(old_fields_ids_map, &settings_diff.old.searchable_fields_ids);
|
||||
|
||||
let new_fields_ids_map = &settings_diff.new.fields_ids_map;
|
||||
let new_fields_ids_map =
|
||||
FieldsIdsMapWithMetadata::new(new_fields_ids_map, &settings_diff.new.searchable_fields_ids);
|
||||
|
||||
// the vector field id may have changed
|
||||
let old_vectors_fid = old_fields_ids_map.id(RESERVED_VECTORS_FIELD_NAME);
|
||||
|
||||
@ -376,7 +382,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
||||
);
|
||||
continue;
|
||||
}
|
||||
regenerate_prompt(obkv, prompt, new_fields_ids_map)?
|
||||
regenerate_prompt(obkv, prompt, &new_fields_ids_map)?
|
||||
}
|
||||
},
|
||||
// prompt regeneration is only triggered for existing embedders
|
||||
@ -393,7 +399,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
||||
regenerate_if_prompt_changed(
|
||||
obkv,
|
||||
(old_prompt, prompt),
|
||||
(old_fields_ids_map, new_fields_ids_map),
|
||||
(&old_fields_ids_map, &new_fields_ids_map),
|
||||
)?
|
||||
} else {
|
||||
// we can simply ignore user provided vectors as they are not regenerated and are
|
||||
@ -409,7 +415,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
||||
prompt,
|
||||
(add_to_user_provided, remove_from_user_provided),
|
||||
(old, new),
|
||||
(old_fields_ids_map, new_fields_ids_map),
|
||||
(&old_fields_ids_map, &new_fields_ids_map),
|
||||
document_id,
|
||||
embedder_name,
|
||||
embedder_is_manual,
|
||||
@ -479,7 +485,10 @@ fn extract_vector_document_diff(
|
||||
prompt: &Prompt,
|
||||
(add_to_user_provided, remove_from_user_provided): (&mut RoaringBitmap, &mut RoaringBitmap),
|
||||
(old, new): (VectorState, VectorState),
|
||||
(old_fields_ids_map, new_fields_ids_map): (&FieldsIdsMap, &FieldsIdsMap),
|
||||
(old_fields_ids_map, new_fields_ids_map): (
|
||||
&FieldsIdsMapWithMetadata,
|
||||
&FieldsIdsMapWithMetadata,
|
||||
),
|
||||
document_id: impl Fn() -> Value,
|
||||
embedder_name: &str,
|
||||
embedder_is_manual: bool,
|
||||
@ -599,7 +608,10 @@ fn extract_vector_document_diff(
|
||||
fn regenerate_if_prompt_changed(
|
||||
obkv: obkv::KvReader<'_, FieldId>,
|
||||
(old_prompt, new_prompt): (&Prompt, &Prompt),
|
||||
(old_fields_ids_map, new_fields_ids_map): (&FieldsIdsMap, &FieldsIdsMap),
|
||||
(old_fields_ids_map, new_fields_ids_map): (
|
||||
&FieldsIdsMapWithMetadata,
|
||||
&FieldsIdsMapWithMetadata,
|
||||
),
|
||||
) -> Result<VectorStateDelta> {
|
||||
let old_prompt =
|
||||
old_prompt.render(obkv, DelAdd::Deletion, old_fields_ids_map).unwrap_or(Default::default());
|
||||
@ -614,7 +626,7 @@ fn regenerate_if_prompt_changed(
|
||||
fn regenerate_prompt(
|
||||
obkv: obkv::KvReader<'_, FieldId>,
|
||||
prompt: &Prompt,
|
||||
new_fields_ids_map: &FieldsIdsMap,
|
||||
new_fields_ids_map: &FieldsIdsMapWithMetadata,
|
||||
) -> Result<VectorStateDelta> {
|
||||
let prompt = prompt.render(obkv, DelAdd::Addition, new_fields_ids_map)?;
|
||||
|
||||
|
@ -1238,7 +1238,7 @@ impl InnerIndexSettingsDiff {
|
||||
old_settings: InnerIndexSettings,
|
||||
new_settings: InnerIndexSettings,
|
||||
primary_key_id: Option<FieldId>,
|
||||
embedding_config_updates: BTreeMap<String, EmbedderAction>,
|
||||
mut embedding_config_updates: BTreeMap<String, EmbedderAction>,
|
||||
settings_update_only: bool,
|
||||
) -> Self {
|
||||
let only_additional_fields = match (
|
||||
@ -1273,6 +1273,32 @@ impl InnerIndexSettingsDiff {
|
||||
let cache_user_defined_searchables = old_settings.user_defined_searchable_fields
|
||||
!= new_settings.user_defined_searchable_fields;
|
||||
|
||||
// if the user-defined searchables changed, then we need to reindex prompts.
|
||||
if cache_user_defined_searchables {
|
||||
for (embedder_name, (config, _)) in new_settings.embedding_configs.inner_as_ref() {
|
||||
// skip embedders that don't use document templates
|
||||
if !config.uses_document_template() {
|
||||
continue;
|
||||
}
|
||||
|
||||
// note: this could currently be entry.or_insert(..), but we're future-proofing with an explicit match
|
||||
// this always makes the code clearer by explicitly handling the cases
|
||||
match embedding_config_updates.entry(embedder_name.clone()) {
|
||||
std::collections::btree_map::Entry::Vacant(entry) => {
|
||||
entry.insert(EmbedderAction::Reindex(ReindexAction::RegeneratePrompts));
|
||||
}
|
||||
std::collections::btree_map::Entry::Occupied(entry) => match entry.get() {
|
||||
EmbedderAction::WriteBackToDocuments(_) => { /* we are deleting this embedder, so no point in regeneration */
|
||||
}
|
||||
EmbedderAction::Reindex(ReindexAction::FullReindex) => { /* we are already fully reindexing */
|
||||
}
|
||||
EmbedderAction::Reindex(ReindexAction::RegeneratePrompts) => { /* we are already regenerating prompts */
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
InnerIndexSettingsDiff {
|
||||
old: old_settings,
|
||||
new: new_settings,
|
||||
|
@ -305,6 +305,16 @@ impl Embedder {
|
||||
Embedder::Rest(embedder) => embedder.distribution(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn uses_document_template(&self) -> bool {
|
||||
match self {
|
||||
Embedder::HuggingFace(_)
|
||||
| Embedder::OpenAi(_)
|
||||
| Embedder::Ollama(_)
|
||||
| Embedder::Rest(_) => true,
|
||||
Embedder::UserProvided(_) => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Describes the mean and sigma of distribution of embedding similarity in the embedding space.
|
||||
|
Loading…
Reference in New Issue
Block a user