mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 12:27:13 +02:00
Check consistency of fragments
This commit is contained in:
parent
d72e5f5f69
commit
3f5b5df139
3 changed files with 58 additions and 20 deletions
|
@ -501,8 +501,11 @@ impl Settings<Unchecked> {
|
||||||
let Setting::Set(mut configs) = self.embedders else { return Ok(self) };
|
let Setting::Set(mut configs) = self.embedders else { return Ok(self) };
|
||||||
for (name, config) in configs.iter_mut() {
|
for (name, config) in configs.iter_mut() {
|
||||||
let config_to_check = std::mem::take(config);
|
let config_to_check = std::mem::take(config);
|
||||||
let checked_config =
|
let checked_config = milli::update::validate_embedding_settings(
|
||||||
milli::update::validate_embedding_settings(config_to_check.inner, name)?;
|
config_to_check.inner,
|
||||||
|
name,
|
||||||
|
milli::vector::settings::EmbeddingValidationContext::SettingsPartialUpdate,
|
||||||
|
)?;
|
||||||
*config = SettingEmbeddingSettings { inner: checked_config };
|
*config = SettingEmbeddingSettings { inner: checked_config };
|
||||||
}
|
}
|
||||||
self.embedders = Setting::Set(configs);
|
self.embedders = Setting::Set(configs);
|
||||||
|
|
|
@ -35,8 +35,8 @@ use crate::update::{IndexDocuments, UpdateIndexingStep};
|
||||||
use crate::vector::db::{FragmentConfigs, IndexEmbeddingConfig};
|
use crate::vector::db::{FragmentConfigs, IndexEmbeddingConfig};
|
||||||
use crate::vector::json_template::JsonTemplate;
|
use crate::vector::json_template::JsonTemplate;
|
||||||
use crate::vector::settings::{
|
use crate::vector::settings::{
|
||||||
EmbedderAction, EmbedderSource, EmbeddingSettings, NestingContext, ReindexAction,
|
EmbedderAction, EmbedderSource, EmbeddingSettings, EmbeddingValidationContext, NestingContext,
|
||||||
SubEmbeddingSettings, WriteBackToDocuments,
|
ReindexAction, SubEmbeddingSettings, WriteBackToDocuments,
|
||||||
};
|
};
|
||||||
use crate::vector::{
|
use crate::vector::{
|
||||||
Embedder, EmbeddingConfig, RuntimeEmbedder, RuntimeEmbedders, RuntimeFragment,
|
Embedder, EmbeddingConfig, RuntimeEmbedder, RuntimeEmbedders, RuntimeFragment,
|
||||||
|
@ -1181,13 +1181,20 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
||||||
};
|
};
|
||||||
|
|
||||||
embedder_actions.insert(name.clone(), embedder_action);
|
embedder_actions.insert(name.clone(), embedder_action);
|
||||||
let new = validate_embedding_settings(updated_settings, &name)?;
|
let new = validate_embedding_settings(
|
||||||
|
updated_settings,
|
||||||
|
&name,
|
||||||
|
EmbeddingValidationContext::FullSettings,
|
||||||
|
)?;
|
||||||
updated_configs.insert(name, (new, fragments));
|
updated_configs.insert(name, (new, fragments));
|
||||||
}
|
}
|
||||||
SettingsDiff::UpdateWithoutReindex { updated_settings, quantize } => {
|
SettingsDiff::UpdateWithoutReindex { updated_settings, quantize } => {
|
||||||
tracing::debug!(embedder = name, "update without reindex embedder");
|
tracing::debug!(embedder = name, "update without reindex embedder");
|
||||||
let new =
|
let new = validate_embedding_settings(
|
||||||
validate_embedding_settings(Setting::Set(updated_settings), &name)?;
|
Setting::Set(updated_settings),
|
||||||
|
&name,
|
||||||
|
EmbeddingValidationContext::FullSettings,
|
||||||
|
)?;
|
||||||
if quantize {
|
if quantize {
|
||||||
embedder_actions.insert(
|
embedder_actions.insert(
|
||||||
name.clone(),
|
name.clone(),
|
||||||
|
@ -1211,7 +1218,11 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
||||||
crate::vector::settings::EmbeddingSettings::apply_default_openai_model(
|
crate::vector::settings::EmbeddingSettings::apply_default_openai_model(
|
||||||
&mut setting,
|
&mut setting,
|
||||||
);
|
);
|
||||||
let setting = validate_embedding_settings(setting, &name)?;
|
let setting = validate_embedding_settings(
|
||||||
|
setting,
|
||||||
|
&name,
|
||||||
|
EmbeddingValidationContext::FullSettings,
|
||||||
|
)?;
|
||||||
embedder_actions.insert(
|
embedder_actions.insert(
|
||||||
name.clone(),
|
name.clone(),
|
||||||
EmbedderAction::with_reindex(ReindexAction::FullReindex, false),
|
EmbedderAction::with_reindex(ReindexAction::FullReindex, false),
|
||||||
|
@ -2079,6 +2090,7 @@ fn validate_prompt(
|
||||||
pub fn validate_embedding_settings(
|
pub fn validate_embedding_settings(
|
||||||
settings: Setting<EmbeddingSettings>,
|
settings: Setting<EmbeddingSettings>,
|
||||||
name: &str,
|
name: &str,
|
||||||
|
context: EmbeddingValidationContext,
|
||||||
) -> Result<Setting<EmbeddingSettings>> {
|
) -> Result<Setting<EmbeddingSettings>> {
|
||||||
let Setting::Set(settings) = settings else { return Ok(settings) };
|
let Setting::Set(settings) = settings else { return Ok(settings) };
|
||||||
let EmbeddingSettings {
|
let EmbeddingSettings {
|
||||||
|
@ -2119,10 +2131,10 @@ pub fn validate_embedding_settings(
|
||||||
})?;
|
})?;
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(request) = request.as_ref().set() {
|
// if we are working with partial settings, the user could have changed only the `request` and not given again the fragments
|
||||||
let request = crate::vector::rest::RequestData::new(
|
if context == EmbeddingValidationContext::FullSettings {
|
||||||
request.to_owned(),
|
if let Some(request) = request.as_ref().set() {
|
||||||
indexing_fragments
|
let indexing_fragments: BTreeMap<_, _> = indexing_fragments
|
||||||
.as_ref()
|
.as_ref()
|
||||||
.set()
|
.set()
|
||||||
.iter()
|
.iter()
|
||||||
|
@ -2130,8 +2142,8 @@ pub fn validate_embedding_settings(
|
||||||
.filter_map(|(name, fragment)| {
|
.filter_map(|(name, fragment)| {
|
||||||
Some((name.clone(), fragment.as_ref().map(|fragment| fragment.value.clone())?))
|
Some((name.clone(), fragment.as_ref().map(|fragment| fragment.value.clone())?))
|
||||||
})
|
})
|
||||||
.collect(),
|
.collect();
|
||||||
search_fragments
|
let search_fragments: BTreeMap<_, _> = search_fragments
|
||||||
.as_ref()
|
.as_ref()
|
||||||
.set()
|
.set()
|
||||||
.iter()
|
.iter()
|
||||||
|
@ -2139,12 +2151,29 @@ pub fn validate_embedding_settings(
|
||||||
.filter_map(|(name, fragment)| {
|
.filter_map(|(name, fragment)| {
|
||||||
Some((name.clone(), fragment.as_ref().map(|fragment| fragment.value.clone())?))
|
Some((name.clone(), fragment.as_ref().map(|fragment| fragment.value.clone())?))
|
||||||
})
|
})
|
||||||
.collect(),
|
.collect();
|
||||||
)
|
|
||||||
.map_err(|error| crate::UserError::VectorEmbeddingError(error.into()))?;
|
let are_fragments_inconsistent =
|
||||||
if let Some(response) = response.as_ref().set() {
|
indexing_fragments.is_empty() ^ search_fragments.is_empty();
|
||||||
crate::vector::rest::Response::new(response.to_owned(), &request)
|
if are_fragments_inconsistent {
|
||||||
.map_err(|error| crate::UserError::VectorEmbeddingError(error.into()))?;
|
return Err(crate::vector::error::NewEmbedderError::rest_inconsistent_fragments(
|
||||||
|
indexing_fragments.is_empty(),
|
||||||
|
indexing_fragments,
|
||||||
|
search_fragments,
|
||||||
|
))
|
||||||
|
.map_err(|error| crate::UserError::VectorEmbeddingError(error.into()).into());
|
||||||
|
}
|
||||||
|
|
||||||
|
let request = crate::vector::rest::RequestData::new(
|
||||||
|
request.to_owned(),
|
||||||
|
indexing_fragments,
|
||||||
|
search_fragments,
|
||||||
|
)
|
||||||
|
.map_err(|error| crate::UserError::VectorEmbeddingError(error.into()))?;
|
||||||
|
if let Some(response) = response.as_ref().set() {
|
||||||
|
crate::vector::rest::Response::new(response.to_owned(), &request)
|
||||||
|
.map_err(|error| crate::UserError::VectorEmbeddingError(error.into()))?;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -615,6 +615,12 @@ pub struct SubEmbeddingSettings {
|
||||||
pub indexing_embedder: Setting<serde_json::Value>,
|
pub indexing_embedder: Setting<serde_json::Value>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
|
||||||
|
pub enum EmbeddingValidationContext {
|
||||||
|
FullSettings,
|
||||||
|
SettingsPartialUpdate,
|
||||||
|
}
|
||||||
|
|
||||||
/// Indicates what action should take place during a reindexing operation for an embedder
|
/// Indicates what action should take place during a reindexing operation for an embedder
|
||||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||||
pub enum ReindexAction {
|
pub enum ReindexAction {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue