WIP: Failing embedding no longer causes the whole search to fail

This commit is contained in:
Louis Dureuil 2024-03-05 12:29:00 +01:00
parent 0c216048b5
commit 083cdec3de
No known key found for this signature in database
2 changed files with 53 additions and 45 deletions

View File

@ -261,33 +261,56 @@ pub async fn embed(
index_scheduler: &IndexScheduler,
index: &milli::Index,
) -> Result<Option<DistributionShift>, ResponseError> {
match (&query.hybrid, &query.vector, &query.q) {
(Some(HybridQuery { semantic_ratio: _, embedder }), None, Some(q))
if !q.trim().is_empty() =>
{
let embedder_configs = index.embedding_configs(&index.read_txn()?)?;
let embedders = index_scheduler.embedders(embedder_configs)?;
/// TEST:
// - pure vector search without hybrid
// - pure vector search without hybrid passing a vector of the wrong dimension
// - pure vector search without hybrid, with multiple embedders, none of them called 'default'
let Some(hybrid) = &query.hybrid
else {
return Ok(None);
};
let embedder = if let Some(embedder_name) = embedder {
embedders.get(embedder_name)
} else {
embedders.get_default()
let embedder_configs = index.embedding_configs(&index.read_txn()?)?;
let embedders = index_scheduler.embedders(embedder_configs)?;
let embedder_name = match &hybrid.embedder {
Some(embedder_name) => embedder_name.clone(),
None => embedders.get_default_embedder_name(),
};
let embedder = embedders.get(&embedder_name);
let embedder = embedder
.ok_or(milli::UserError::InvalidEmbedder(embedder_name))
.map_err(milli::Error::from)?
.0;
let distribution = embedder.distribution();
match (&query.vector, &query.q) {
(None, Some(q)) if !q.trim().is_empty() => {
let embeddings = match tokio::time::timeout(
tokio::time::Duration::from_secs(10),
embedder.embed(vec![q.to_owned()]),
)
.await
{
Ok(Ok(mut embeddings)) => embeddings.pop(),
Ok(Err(error)) => {
warn!(%error, "error while embedding");
None
}
Err(_) => {
warn!("timeout while embedding");
None
}
};
let embedder = embedder
.ok_or(milli::UserError::InvalidEmbedder("default".to_owned()))
.map_err(milli::Error::from)?
.0;
let distribution = embedder.distribution();
let embeddings = embedder
.embed(vec![q.to_owned()])
.await
.map_err(milli::vector::Error::from)
.map_err(milli::Error::from)?
.pop()
.expect("No vector returned from embedding");
let Some(embeddings) = embeddings else {
warn!("no embedding available, vector search will not take place");
return Ok(distribution);
};
if embeddings.iter().nth(1).is_some() {
warn!("Ignoring embeddings past the first one in long search query");
@ -295,23 +318,10 @@ pub async fn embed(
} else {
query.vector = Some(embeddings.into_inner());
}
Ok(distribution)
}
(Some(hybrid), vector, _) => {
let embedder_configs = index.embedding_configs(&index.read_txn()?)?;
let embedders = index_scheduler.embedders(embedder_configs)?;
let embedder = if let Some(embedder_name) = &hybrid.embedder {
embedders.get(embedder_name)
} else {
embedders.get_default()
};
let embedder = embedder
.ok_or(milli::UserError::InvalidEmbedder("default".to_owned()))
.map_err(milli::Error::from)?
.0;
(vector, _) => {
if let Some(vector) = vector {
if vector.len() != embedder.dimensions() {
return Err(meilisearch_types::milli::Error::UserError(
@ -326,7 +336,6 @@ pub async fn embed(
Ok(embedder.distribution())
}
_ => Ok(None),
}
}

View File

@ -98,17 +98,16 @@ impl EmbeddingConfigs {
}
pub fn get_default(&self) -> Option<(Arc<Embedder>, Arc<Prompt>)> {
self.get_default_embedder_name().and_then(|default| self.get(&default))
self.get(&self.get_default_embedder_name())
}
pub fn get_default_embedder_name(&self) -> Option<String> {
pub fn get_default_embedder_name(&self) -> String {
let mut it = self.0.keys();
let first_name = it.next();
let second_name = it.next();
match (first_name, second_name) {
(None, _) => None,
(Some(first), None) => Some(first.to_owned()),
(Some(_), Some(_)) => Some("default".to_owned()),
(Some(first), None) => first.to_owned(),
_ => "default".to_owned(),
}
}
}