From 083cdec3dee6ad80803e57ac905509170272c036 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 5 Mar 2024 12:29:00 +0100 Subject: [PATCH] WIP: Failing embedding no longer causes the whole search to fail --- meilisearch/src/routes/indexes/search.rs | 89 +++++++++++++----------- milli/src/vector/mod.rs | 9 ++- 2 files changed, 53 insertions(+), 45 deletions(-) diff --git a/meilisearch/src/routes/indexes/search.rs b/meilisearch/src/routes/indexes/search.rs index 3adfce970..ae36e171e 100644 --- a/meilisearch/src/routes/indexes/search.rs +++ b/meilisearch/src/routes/indexes/search.rs @@ -261,33 +261,56 @@ pub async fn embed( index_scheduler: &IndexScheduler, index: &milli::Index, ) -> Result, ResponseError> { - match (&query.hybrid, &query.vector, &query.q) { - (Some(HybridQuery { semantic_ratio: _, embedder }), None, Some(q)) - if !q.trim().is_empty() => - { - let embedder_configs = index.embedding_configs(&index.read_txn()?)?; - let embedders = index_scheduler.embedders(embedder_configs)?; + /// TEST: + // - pure vector search without hybrid + // - pure vector search without hybrid passing a vector of the wrong dimension + // - pure vector search without hybrid, with multiple embedders, none of them called 'default' + let Some(hybrid) = &query.hybrid + else { + return Ok(None); + }; - let embedder = if let Some(embedder_name) = embedder { - embedders.get(embedder_name) - } else { - embedders.get_default() + let embedder_configs = index.embedding_configs(&index.read_txn()?)?; + + let embedders = index_scheduler.embedders(embedder_configs)?; + + let embedder_name = match &hybrid.embedder { + Some(embedder_name) => embedder_name.clone(), + None => embedders.get_default_embedder_name(), + }; + + let embedder = embedders.get(&embedder_name); + + let embedder = embedder + .ok_or(milli::UserError::InvalidEmbedder(embedder_name)) + .map_err(milli::Error::from)? + .0; + + let distribution = embedder.distribution(); + + match (&query.vector, &query.q) { + (None, Some(q)) if !q.trim().is_empty() => { + let embeddings = match tokio::time::timeout( + tokio::time::Duration::from_secs(10), + embedder.embed(vec![q.to_owned()]), + ) + .await + { + Ok(Ok(mut embeddings)) => embeddings.pop(), + Ok(Err(error)) => { + warn!(%error, "error while embedding"); + None + } + Err(_) => { + warn!("timeout while embedding"); + None + } }; - let embedder = embedder - .ok_or(milli::UserError::InvalidEmbedder("default".to_owned())) - .map_err(milli::Error::from)? - .0; - - let distribution = embedder.distribution(); - - let embeddings = embedder - .embed(vec![q.to_owned()]) - .await - .map_err(milli::vector::Error::from) - .map_err(milli::Error::from)? - .pop() - .expect("No vector returned from embedding"); + let Some(embeddings) = embeddings else { + warn!("no embedding available, vector search will not take place"); + return Ok(distribution); + }; if embeddings.iter().nth(1).is_some() { warn!("Ignoring embeddings past the first one in long search query"); @@ -295,23 +318,10 @@ pub async fn embed( } else { query.vector = Some(embeddings.into_inner()); } + Ok(distribution) } - (Some(hybrid), vector, _) => { - let embedder_configs = index.embedding_configs(&index.read_txn()?)?; - let embedders = index_scheduler.embedders(embedder_configs)?; - - let embedder = if let Some(embedder_name) = &hybrid.embedder { - embedders.get(embedder_name) - } else { - embedders.get_default() - }; - - let embedder = embedder - .ok_or(milli::UserError::InvalidEmbedder("default".to_owned())) - .map_err(milli::Error::from)? - .0; - + (vector, _) => { if let Some(vector) = vector { if vector.len() != embedder.dimensions() { return Err(meilisearch_types::milli::Error::UserError( @@ -326,7 +336,6 @@ pub async fn embed( Ok(embedder.distribution()) } - _ => Ok(None), } } diff --git a/milli/src/vector/mod.rs b/milli/src/vector/mod.rs index 6aa324da9..d5b1ac301 100644 --- a/milli/src/vector/mod.rs +++ b/milli/src/vector/mod.rs @@ -98,17 +98,16 @@ impl EmbeddingConfigs { } pub fn get_default(&self) -> Option<(Arc, Arc)> { - self.get_default_embedder_name().and_then(|default| self.get(&default)) + self.get(&self.get_default_embedder_name()) } - pub fn get_default_embedder_name(&self) -> Option { + pub fn get_default_embedder_name(&self) -> String { let mut it = self.0.keys(); let first_name = it.next(); let second_name = it.next(); match (first_name, second_name) { - (None, _) => None, - (Some(first), None) => Some(first.to_owned()), - (Some(_), Some(_)) => Some("default".to_owned()), + (Some(first), None) => first.to_owned(), + _ => "default".to_owned(), } } }