mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 20:37:15 +02:00
Merge #4548
4548: v1.8 hybrid search changes r=dureuill a=dureuill Implements the search changes from the [usage page](https://meilisearch.notion.site/v1-8-AI-search-API-usage-135552d6e85a4a52bc7109be82aeca42#40f24df3da694428a39cc8043c9cfc64) ### ⚠️ Breaking changes in an experimental feature: - Removed the `_semanticScore`. Use the `_rankingScore` instead. - Removed `vector` in the response of the search (output was too big). - Removed all the vectors from the `vectorSort` ranking score details - target vector appearing in the name of the rule - matched vector appearing in the details of the rule ### Other user-facing changes - Added `semanticHitCount`, indicating how many hits were returned from the semantic search. This is especially useful in the hybrid search. - Embed lazily: Meilisearch no longer generates an embedding when the keyword results are "good enough". - Graceful embedding failure in hybrid search: when doing hybrid search (`semanticRatio in ]0.0, 1.0[`), an embedding failure no longer causes the search request to fail. Instead, only the keyword search is performed. When doing a full vector search (`semanticRatio==1.0`), a failure to embed will still result in failing that search. Co-authored-by: Louis Dureuil <louis@meilisearch.com>
This commit is contained in:
commit
b1844b0c27
19 changed files with 508 additions and 322 deletions
|
@ -58,7 +58,7 @@ pub enum EmbedErrorKind {
|
|||
RestResponseDeserialization(std::io::Error),
|
||||
#[error("component `{0}` not found in path `{1}` in response: `{2}`")]
|
||||
RestResponseMissingEmbeddings(String, String, String),
|
||||
#[error("expected a response parseable as a vector or an array of vectors: {0}")]
|
||||
#[error("unexpected format of the embedding response: {0}")]
|
||||
RestResponseFormat(serde_json::Error),
|
||||
#[error("expected a response containing {0} embeddings, got only {1}")]
|
||||
RestResponseEmbeddingCount(usize, usize),
|
||||
|
@ -78,6 +78,8 @@ pub enum EmbedErrorKind {
|
|||
RestNotAnObject(serde_json::Value, Vec<String>),
|
||||
#[error("while embedding tokenized, was expecting embeddings of dimension `{0}`, got embeddings of dimensions `{1}`")]
|
||||
OpenAiUnexpectedDimension(usize, usize),
|
||||
#[error("no embedding was produced")]
|
||||
MissingEmbedding,
|
||||
}
|
||||
|
||||
impl EmbedError {
|
||||
|
@ -190,6 +192,9 @@ impl EmbedError {
|
|||
fault: FaultSource::Runtime,
|
||||
}
|
||||
}
|
||||
pub(crate) fn missing_embedding() -> EmbedError {
|
||||
Self { kind: EmbedErrorKind::MissingEmbedding, fault: FaultSource::Undecided }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, thiserror::Error)]
|
||||
|
|
|
@ -143,7 +143,7 @@ impl EmbeddingConfigs {
|
|||
|
||||
/// Get the default embedder configuration, if any.
|
||||
pub fn get_default(&self) -> Option<(Arc<Embedder>, Arc<Prompt>)> {
|
||||
self.get_default_embedder_name().and_then(|default| self.get(&default))
|
||||
self.get(self.get_default_embedder_name())
|
||||
}
|
||||
|
||||
/// Get the name of the default embedder configuration.
|
||||
|
@ -153,14 +153,14 @@ impl EmbeddingConfigs {
|
|||
/// - If there is only one embedder, it is always the default.
|
||||
/// - If there are multiple embedders and one of them is called `default`, then that one is the default embedder.
|
||||
/// - In all other cases, there is no default embedder.
|
||||
pub fn get_default_embedder_name(&self) -> Option<String> {
|
||||
pub fn get_default_embedder_name(&self) -> &str {
|
||||
let mut it = self.0.keys();
|
||||
let first_name = it.next();
|
||||
let second_name = it.next();
|
||||
match (first_name, second_name) {
|
||||
(None, _) => None,
|
||||
(Some(first), None) => Some(first.to_owned()),
|
||||
(Some(_), Some(_)) => Some("default".to_owned()),
|
||||
(None, _) => "default",
|
||||
(Some(first), None) => first,
|
||||
(Some(_), Some(_)) => "default",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -237,6 +237,17 @@ impl Embedder {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn embed_one(&self, text: String) -> std::result::Result<Embedding, EmbedError> {
|
||||
let mut embeddings = self.embed(vec![text])?;
|
||||
let embeddings = embeddings.pop().ok_or_else(EmbedError::missing_embedding)?;
|
||||
Ok(if embeddings.iter().nth(1).is_some() {
|
||||
tracing::warn!("Ignoring embeddings past the first one in long search query");
|
||||
embeddings.iter().next().unwrap().to_vec()
|
||||
} else {
|
||||
embeddings.into_inner()
|
||||
})
|
||||
}
|
||||
|
||||
/// Embed multiple chunks of texts.
|
||||
///
|
||||
/// Each chunk is composed of one or multiple texts.
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue