Reindex embedders

This commit is contained in:
Louis Dureuil 2024-09-02 12:58:09 +02:00
parent 03fda78901
commit 21296190a3
No known key found for this signature in database
2 changed files with 37 additions and 1 deletions

View File

@ -1238,7 +1238,7 @@ impl InnerIndexSettingsDiff {
old_settings: InnerIndexSettings,
new_settings: InnerIndexSettings,
primary_key_id: Option<FieldId>,
embedding_config_updates: BTreeMap<String, EmbedderAction>,
mut embedding_config_updates: BTreeMap<String, EmbedderAction>,
settings_update_only: bool,
) -> Self {
let only_additional_fields = match (
@ -1273,6 +1273,32 @@ impl InnerIndexSettingsDiff {
let cache_user_defined_searchables = old_settings.user_defined_searchable_fields
!= new_settings.user_defined_searchable_fields;
// if the user-defined searchables changed, then we need to reindex prompts.
if cache_user_defined_searchables {
for (embedder_name, (config, _)) in new_settings.embedding_configs.inner_as_ref() {
// skip embedders that don't use document templates
if !config.uses_document_template() {
continue;
}
// note: this could currently be entry.or_insert(..), but we're future-proofing with an explicit match
// this always makes the code clearer by explicitly handling the cases
match embedding_config_updates.entry(embedder_name.clone()) {
std::collections::btree_map::Entry::Vacant(entry) => {
entry.insert(EmbedderAction::Reindex(ReindexAction::RegeneratePrompts));
}
std::collections::btree_map::Entry::Occupied(entry) => match entry.get() {
EmbedderAction::WriteBackToDocuments(_) => { /* we are deleting this embedder, so no point in regeneration */
}
EmbedderAction::Reindex(ReindexAction::FullReindex) => { /* we are already fully reindexing */
}
EmbedderAction::Reindex(ReindexAction::RegeneratePrompts) => { /* we are already regenerating prompts */
}
},
};
}
}
InnerIndexSettingsDiff {
old: old_settings,
new: new_settings,

View File

@ -305,6 +305,16 @@ impl Embedder {
Embedder::Rest(embedder) => embedder.distribution(),
}
}
pub fn uses_document_template(&self) -> bool {
match self {
Embedder::HuggingFace(_)
| Embedder::OpenAi(_)
| Embedder::Ollama(_)
| Embedder::Rest(_) => true,
Embedder::UserProvided(_) => false,
}
}
}
/// Describes the mean and sigma of distribution of embedding similarity in the embedding space.