4846: Add OpenAI tests r=dureuill a=dureuill

# Pull Request

## Related issue
Part of fixing #4757 

## What does this PR do?
- OpenAI embedder: don't pass apiKey when it is empty (slightly improves error messages)
- rest embedder and rest-based embedders: specialize the authorization denied error message depending on the configuration source
- fix existing tests
- Adds assets containing prerecorded texts to embed and the embeddings obtained from OpenAI
- Adds an asset containing a tokenized long document and the embedding obtained from OpenAI for this token
- Uses the wiremock crate to mock the OpenAI API: parse the openai request, lookup the response in assets, craft an openai response


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
This commit is contained in:
meili-bors[bot] 2024-08-05 10:49:28 +00:00 committed by GitHub
commit 57f7af77c7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
20 changed files with 1969 additions and 75 deletions

View file

@ -62,8 +62,18 @@ pub enum EmbedErrorKind {
RestResponseDeserialization(std::io::Error),
#[error("expected a response containing {0} embeddings, got only {1}")]
RestResponseEmbeddingCount(usize, usize),
#[error("could not authenticate against embedding server{}", option_info(.0.as_deref(), "server replied with "))]
RestUnauthorized(Option<String>),
#[error("could not authenticate against {embedding} server{server_reply}{hint}", embedding=match *.1 {
ConfigurationSource::User => "embedding",
ConfigurationSource::OpenAi => "OpenAI",
ConfigurationSource::Ollama => "ollama"
},
server_reply=option_info(.0.as_deref(), "server replied with "),
hint=match *.1 {
ConfigurationSource::User => "\n - Hint: Check the `apiKey` parameter in the embedder configuration",
ConfigurationSource::OpenAi => "\n - Hint: Check the `apiKey` parameter in the embedder configuration, and the `MEILI_OPENAI_API_KEY` and `OPENAI_API_KEY` environment variables",
ConfigurationSource::Ollama => "\n - Hint: Check the `apiKey` parameter in the embedder configuration"
})]
RestUnauthorized(Option<String>, ConfigurationSource),
#[error("sent too many requests to embedding server{}", option_info(.0.as_deref(), "server replied with "))]
RestTooManyRequests(Option<String>),
#[error("sent a bad request to embedding server{}{}",
@ -136,8 +146,14 @@ impl EmbedError {
}
}
pub(crate) fn rest_unauthorized(error_response: Option<String>) -> EmbedError {
Self { kind: EmbedErrorKind::RestUnauthorized(error_response), fault: FaultSource::User }
pub(crate) fn rest_unauthorized(
error_response: Option<String>,
configuration_source: ConfigurationSource,
) -> EmbedError {
Self {
kind: EmbedErrorKind::RestUnauthorized(error_response, configuration_source),
fault: FaultSource::User,
}
}
pub(crate) fn rest_too_many_requests(error_response: Option<String>) -> EmbedError {

View file

@ -183,7 +183,7 @@ impl Embedder {
let rest_embedder = RestEmbedder::new(
RestEmbedderOptions {
api_key: Some(api_key.clone()),
api_key: (!api_key.is_empty()).then(|| api_key.clone()),
distribution: None,
dimensions: Some(options.dimensions()),
url,

View file

@ -275,7 +275,10 @@ fn check_response(
Err(ureq::Error::Status(code, response)) => {
let error_response: Option<String> = response.into_string().ok();
Err(match code {
401 => Retry::give_up(EmbedError::rest_unauthorized(error_response)),
401 => Retry::give_up(EmbedError::rest_unauthorized(
error_response,
configuration_source,
)),
429 => Retry::rate_limited(EmbedError::rest_too_many_requests(error_response)),
400 => Retry::give_up(EmbedError::rest_bad_request(
error_response,