mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 06:44:27 +01:00
Add EmbedderAction to settings
This commit is contained in:
parent
e9bf4eb100
commit
d0b05ae691
@ -1,4 +1,5 @@
|
|||||||
use deserr::Deserr;
|
use deserr::Deserr;
|
||||||
|
use roaring::RoaringBitmap;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use super::rest::InputType;
|
use super::rest::InputType;
|
||||||
@ -72,6 +73,245 @@ pub fn check_unset<T>(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Indicates what action should take place during a reindexing operation for an embedder
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||||
|
pub enum ReindexAction {
|
||||||
|
/// An indexing operation should take place for this embedder, keeping existing vectors
|
||||||
|
/// and checking whether the document template changed or not
|
||||||
|
RegeneratePrompts,
|
||||||
|
/// An indexing operation should take place for all documents for this embedder, removing existing vectors
|
||||||
|
/// (except userProvided ones)
|
||||||
|
FullReindex,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub enum SettingsDiff {
|
||||||
|
Remove,
|
||||||
|
Reindex { action: ReindexAction, updated_settings: EmbeddingSettings },
|
||||||
|
UpdateWithoutReindex { updated_settings: EmbeddingSettings },
|
||||||
|
}
|
||||||
|
|
||||||
|
pub enum EmbedderAction {
|
||||||
|
WriteBackToDocuments(WriteBackToDocuments),
|
||||||
|
Reindex(ReindexAction),
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct WriteBackToDocuments {
|
||||||
|
pub embedder_id: u8,
|
||||||
|
pub user_provided: RoaringBitmap,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SettingsDiff {
|
||||||
|
pub fn should_reindex(&self) -> bool {
|
||||||
|
match self {
|
||||||
|
SettingsDiff::Remove { .. } | SettingsDiff::Reindex { .. } => true,
|
||||||
|
SettingsDiff::UpdateWithoutReindex { .. } => false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn from_settings(old: EmbeddingSettings, new: Setting<EmbeddingSettings>) -> Self {
|
||||||
|
match new {
|
||||||
|
Setting::Set(new) => {
|
||||||
|
let EmbeddingSettings {
|
||||||
|
mut source,
|
||||||
|
mut model,
|
||||||
|
mut revision,
|
||||||
|
mut api_key,
|
||||||
|
mut dimensions,
|
||||||
|
mut document_template,
|
||||||
|
mut url,
|
||||||
|
mut query,
|
||||||
|
mut input_field,
|
||||||
|
mut path_to_embeddings,
|
||||||
|
mut embedding_object,
|
||||||
|
mut input_type,
|
||||||
|
mut distribution,
|
||||||
|
} = old;
|
||||||
|
|
||||||
|
let EmbeddingSettings {
|
||||||
|
source: new_source,
|
||||||
|
model: new_model,
|
||||||
|
revision: new_revision,
|
||||||
|
api_key: new_api_key,
|
||||||
|
dimensions: new_dimensions,
|
||||||
|
document_template: new_document_template,
|
||||||
|
url: new_url,
|
||||||
|
query: new_query,
|
||||||
|
input_field: new_input_field,
|
||||||
|
path_to_embeddings: new_path_to_embeddings,
|
||||||
|
embedding_object: new_embedding_object,
|
||||||
|
input_type: new_input_type,
|
||||||
|
distribution: new_distribution,
|
||||||
|
} = new;
|
||||||
|
|
||||||
|
let mut reindex_action = None;
|
||||||
|
|
||||||
|
// **Warning**: do not use short-circuiting || here, we want all these operations applied
|
||||||
|
if source.apply(new_source) {
|
||||||
|
ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex);
|
||||||
|
// when the source changes, we need to reapply the default settings for the new source
|
||||||
|
apply_default_for_source(
|
||||||
|
&source,
|
||||||
|
&mut model,
|
||||||
|
&mut revision,
|
||||||
|
&mut dimensions,
|
||||||
|
&mut url,
|
||||||
|
&mut query,
|
||||||
|
&mut input_field,
|
||||||
|
&mut path_to_embeddings,
|
||||||
|
&mut embedding_object,
|
||||||
|
&mut input_type,
|
||||||
|
&mut document_template,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
if model.apply(new_model) {
|
||||||
|
ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex);
|
||||||
|
}
|
||||||
|
if revision.apply(new_revision) {
|
||||||
|
ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex);
|
||||||
|
}
|
||||||
|
if dimensions.apply(new_dimensions) {
|
||||||
|
ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex);
|
||||||
|
}
|
||||||
|
if url.apply(new_url) {
|
||||||
|
ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex);
|
||||||
|
}
|
||||||
|
if query.apply(new_query) {
|
||||||
|
ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex);
|
||||||
|
}
|
||||||
|
if input_field.apply(new_input_field) {
|
||||||
|
ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex);
|
||||||
|
}
|
||||||
|
if path_to_embeddings.apply(new_path_to_embeddings) {
|
||||||
|
ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex);
|
||||||
|
}
|
||||||
|
if embedding_object.apply(new_embedding_object) {
|
||||||
|
ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex);
|
||||||
|
}
|
||||||
|
if input_type.apply(new_input_type) {
|
||||||
|
ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex);
|
||||||
|
}
|
||||||
|
if document_template.apply(new_document_template) {
|
||||||
|
ReindexAction::push_action(
|
||||||
|
&mut reindex_action,
|
||||||
|
ReindexAction::RegeneratePrompts,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
distribution.apply(new_distribution);
|
||||||
|
api_key.apply(new_api_key);
|
||||||
|
|
||||||
|
let updated_settings = EmbeddingSettings {
|
||||||
|
source,
|
||||||
|
model,
|
||||||
|
revision,
|
||||||
|
api_key,
|
||||||
|
dimensions,
|
||||||
|
document_template,
|
||||||
|
url,
|
||||||
|
query,
|
||||||
|
input_field,
|
||||||
|
path_to_embeddings,
|
||||||
|
embedding_object,
|
||||||
|
input_type,
|
||||||
|
distribution,
|
||||||
|
};
|
||||||
|
|
||||||
|
match reindex_action {
|
||||||
|
Some(action) => Self::Reindex { action, updated_settings },
|
||||||
|
None => Self::UpdateWithoutReindex { updated_settings },
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Setting::Reset => Self::Remove,
|
||||||
|
Setting::NotSet => Self::UpdateWithoutReindex { updated_settings: old },
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ReindexAction {
|
||||||
|
fn push_action(this: &mut Option<Self>, other: Self) {
|
||||||
|
*this = match (*this, other) {
|
||||||
|
(_, ReindexAction::FullReindex) => Some(ReindexAction::FullReindex),
|
||||||
|
(Some(ReindexAction::FullReindex), _) => Some(ReindexAction::FullReindex),
|
||||||
|
(_, ReindexAction::RegeneratePrompts) => Some(ReindexAction::RegeneratePrompts),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(clippy::too_many_arguments)] // private function
|
||||||
|
fn apply_default_for_source(
|
||||||
|
source: &Setting<EmbedderSource>,
|
||||||
|
model: &mut Setting<String>,
|
||||||
|
revision: &mut Setting<String>,
|
||||||
|
dimensions: &mut Setting<usize>,
|
||||||
|
url: &mut Setting<String>,
|
||||||
|
query: &mut Setting<serde_json::Value>,
|
||||||
|
input_field: &mut Setting<Vec<String>>,
|
||||||
|
path_to_embeddings: &mut Setting<Vec<String>>,
|
||||||
|
embedding_object: &mut Setting<Vec<String>>,
|
||||||
|
input_type: &mut Setting<InputType>,
|
||||||
|
document_template: &mut Setting<String>,
|
||||||
|
) {
|
||||||
|
match source {
|
||||||
|
Setting::Set(EmbedderSource::HuggingFace) => {
|
||||||
|
*model = Setting::Reset;
|
||||||
|
*revision = Setting::Reset;
|
||||||
|
*dimensions = Setting::NotSet;
|
||||||
|
*url = Setting::NotSet;
|
||||||
|
*query = Setting::NotSet;
|
||||||
|
*input_field = Setting::NotSet;
|
||||||
|
*path_to_embeddings = Setting::NotSet;
|
||||||
|
*embedding_object = Setting::NotSet;
|
||||||
|
*input_type = Setting::NotSet;
|
||||||
|
}
|
||||||
|
Setting::Set(EmbedderSource::Ollama) => {
|
||||||
|
*model = Setting::Reset;
|
||||||
|
*revision = Setting::NotSet;
|
||||||
|
*dimensions = Setting::Reset;
|
||||||
|
*url = Setting::NotSet;
|
||||||
|
*query = Setting::NotSet;
|
||||||
|
*input_field = Setting::NotSet;
|
||||||
|
*path_to_embeddings = Setting::NotSet;
|
||||||
|
*embedding_object = Setting::NotSet;
|
||||||
|
*input_type = Setting::NotSet;
|
||||||
|
}
|
||||||
|
Setting::Set(EmbedderSource::OpenAi) | Setting::Reset => {
|
||||||
|
*model = Setting::Reset;
|
||||||
|
*revision = Setting::NotSet;
|
||||||
|
*dimensions = Setting::NotSet;
|
||||||
|
*url = Setting::NotSet;
|
||||||
|
*query = Setting::NotSet;
|
||||||
|
*input_field = Setting::NotSet;
|
||||||
|
*path_to_embeddings = Setting::NotSet;
|
||||||
|
*embedding_object = Setting::NotSet;
|
||||||
|
*input_type = Setting::NotSet;
|
||||||
|
}
|
||||||
|
Setting::Set(EmbedderSource::Rest) => {
|
||||||
|
*model = Setting::NotSet;
|
||||||
|
*revision = Setting::NotSet;
|
||||||
|
*dimensions = Setting::Reset;
|
||||||
|
*url = Setting::Reset;
|
||||||
|
*query = Setting::Reset;
|
||||||
|
*input_field = Setting::Reset;
|
||||||
|
*path_to_embeddings = Setting::Reset;
|
||||||
|
*embedding_object = Setting::Reset;
|
||||||
|
*input_type = Setting::Reset;
|
||||||
|
}
|
||||||
|
Setting::Set(EmbedderSource::UserProvided) => {
|
||||||
|
*model = Setting::NotSet;
|
||||||
|
*revision = Setting::NotSet;
|
||||||
|
*dimensions = Setting::Reset;
|
||||||
|
*url = Setting::NotSet;
|
||||||
|
*query = Setting::NotSet;
|
||||||
|
*input_field = Setting::NotSet;
|
||||||
|
*path_to_embeddings = Setting::NotSet;
|
||||||
|
*embedding_object = Setting::NotSet;
|
||||||
|
*input_type = Setting::NotSet;
|
||||||
|
*document_template = Setting::NotSet;
|
||||||
|
}
|
||||||
|
Setting::NotSet => {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn check_set<T>(
|
pub fn check_set<T>(
|
||||||
key: &Setting<T>,
|
key: &Setting<T>,
|
||||||
field: &'static str,
|
field: &'static str,
|
||||||
@ -210,66 +450,6 @@ impl EmbeddingSettings {
|
|||||||
*model = Setting::Set(openai::EmbeddingModel::default().name().to_owned())
|
*model = Setting::Set(openai::EmbeddingModel::default().name().to_owned())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn apply_and_need_reindex(
|
|
||||||
old: &mut Setting<EmbeddingSettings>,
|
|
||||||
new: Setting<EmbeddingSettings>,
|
|
||||||
) -> bool {
|
|
||||||
match (old, new) {
|
|
||||||
(
|
|
||||||
Setting::Set(EmbeddingSettings {
|
|
||||||
source: old_source,
|
|
||||||
model: old_model,
|
|
||||||
revision: old_revision,
|
|
||||||
api_key: old_api_key,
|
|
||||||
dimensions: old_dimensions,
|
|
||||||
document_template: old_document_template,
|
|
||||||
url: old_url,
|
|
||||||
query: old_query,
|
|
||||||
input_field: old_input_field,
|
|
||||||
path_to_embeddings: old_path_to_embeddings,
|
|
||||||
embedding_object: old_embedding_object,
|
|
||||||
input_type: old_input_type,
|
|
||||||
distribution: old_distribution,
|
|
||||||
}),
|
|
||||||
Setting::Set(EmbeddingSettings {
|
|
||||||
source: new_source,
|
|
||||||
model: new_model,
|
|
||||||
revision: new_revision,
|
|
||||||
api_key: new_api_key,
|
|
||||||
dimensions: new_dimensions,
|
|
||||||
document_template: new_document_template,
|
|
||||||
url: new_url,
|
|
||||||
query: new_query,
|
|
||||||
input_field: new_input_field,
|
|
||||||
path_to_embeddings: new_path_to_embeddings,
|
|
||||||
embedding_object: new_embedding_object,
|
|
||||||
input_type: new_input_type,
|
|
||||||
distribution: new_distribution,
|
|
||||||
}),
|
|
||||||
) => {
|
|
||||||
let mut needs_reindex = false;
|
|
||||||
|
|
||||||
needs_reindex |= old_source.apply(new_source);
|
|
||||||
needs_reindex |= old_model.apply(new_model);
|
|
||||||
needs_reindex |= old_revision.apply(new_revision);
|
|
||||||
needs_reindex |= old_dimensions.apply(new_dimensions);
|
|
||||||
needs_reindex |= old_document_template.apply(new_document_template);
|
|
||||||
needs_reindex |= old_url.apply(new_url);
|
|
||||||
needs_reindex |= old_query.apply(new_query);
|
|
||||||
needs_reindex |= old_input_field.apply(new_input_field);
|
|
||||||
needs_reindex |= old_path_to_embeddings.apply(new_path_to_embeddings);
|
|
||||||
needs_reindex |= old_embedding_object.apply(new_embedding_object);
|
|
||||||
needs_reindex |= old_input_type.apply(new_input_type);
|
|
||||||
|
|
||||||
old_distribution.apply(new_distribution);
|
|
||||||
old_api_key.apply(new_api_key);
|
|
||||||
needs_reindex
|
|
||||||
}
|
|
||||||
(Setting::Reset, Setting::Reset) | (_, Setting::NotSet) => false,
|
|
||||||
_ => true,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)]
|
#[derive(Debug, Clone, Copy, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)]
|
||||||
|
Loading…
Reference in New Issue
Block a user