milli: add Embedder::embed_one

This commit is contained in:
Louis Dureuil 2024-03-28 11:49:23 +01:00
parent 00c4ed3bc2
commit fabc9cf14a
No known key found for this signature in database
2 changed files with 17 additions and 1 deletions

View File

@ -58,7 +58,7 @@ pub enum EmbedErrorKind {
RestResponseDeserialization(std::io::Error),
#[error("component `{0}` not found in path `{1}` in response: `{2}`")]
RestResponseMissingEmbeddings(String, String, String),
#[error("expected a response parseable as a vector or an array of vectors: {0}")]
#[error("unexpected format of the embedding response: {0}")]
RestResponseFormat(serde_json::Error),
#[error("expected a response containing {0} embeddings, got only {1}")]
RestResponseEmbeddingCount(usize, usize),
@ -78,6 +78,8 @@ pub enum EmbedErrorKind {
RestNotAnObject(serde_json::Value, Vec<String>),
#[error("while embedding tokenized, was expecting embeddings of dimension `{0}`, got embeddings of dimensions `{1}`")]
OpenAiUnexpectedDimension(usize, usize),
#[error("no embedding was produced")]
MissingEmbedding,
}
impl EmbedError {
@ -190,6 +192,9 @@ impl EmbedError {
fault: FaultSource::Runtime,
}
}
pub(crate) fn missing_embedding() -> EmbedError {
Self { kind: EmbedErrorKind::MissingEmbedding, fault: FaultSource::Undecided }
}
}
#[derive(Debug, thiserror::Error)]

View File

@ -237,6 +237,17 @@ impl Embedder {
}
}
pub fn embed_one(&self, text: String) -> std::result::Result<Embedding, EmbedError> {
let mut embeddings = self.embed(vec![text])?;
let embeddings = embeddings.pop().ok_or_else(EmbedError::missing_embedding)?;
Ok(if embeddings.iter().nth(1).is_some() {
tracing::warn!("Ignoring embeddings past the first one in long search query");
embeddings.iter().next().unwrap().to_vec()
} else {
embeddings.into_inner()
})
}
/// Embed multiple chunks of texts.
///
/// Each chunk is composed of one or multiple texts.