diff --git a/milli/src/vector/error.rs b/milli/src/vector/error.rs index 3673c85e3..fbe4ee878 100644 --- a/milli/src/vector/error.rs +++ b/milli/src/vector/error.rs @@ -59,8 +59,8 @@ pub enum EmbedErrorKind { OpenAiAuth(OpenAiError), #[error("sent too many requests to OpenAI: {0}")] OpenAiTooManyRequests(OpenAiError), - #[error("received internal error from OpenAI: {0}")] - OpenAiInternalServerError(OpenAiError), + #[error("received internal error from OpenAI: {0:?}")] + OpenAiInternalServerError(Option), #[error("sent too many tokens in a request to OpenAI: {0}")] OpenAiTooManyTokens(OpenAiError), #[error("received unhandled HTTP status code {0} from OpenAI")] @@ -106,7 +106,7 @@ impl EmbedError { Self { kind: EmbedErrorKind::OpenAiTooManyRequests(inner), fault: FaultSource::Runtime } } - pub(crate) fn openai_internal_server_error(inner: OpenAiError) -> EmbedError { + pub(crate) fn openai_internal_server_error(inner: Option) -> EmbedError { Self { kind: EmbedErrorKind::OpenAiInternalServerError(inner), fault: FaultSource::Runtime } } diff --git a/milli/src/vector/openai.rs b/milli/src/vector/openai.rs index cbddddfb7..33442dda4 100644 --- a/milli/src/vector/openai.rs +++ b/milli/src/vector/openai.rs @@ -178,6 +178,8 @@ impl Embedder { retry.into_duration(attempt) } }?; + + let retry_duration = retry_duration.min(std::time::Duration::from_secs(60)); // don't wait more than a minute tracing::warn!( "Attempt #{}, retrying after {}ms.", attempt, @@ -220,24 +222,12 @@ impl Embedder { error_response.error, ))); } - StatusCode::INTERNAL_SERVER_ERROR => { - let error_response: OpenAiErrorResponse = response - .json() - .await - .map_err(EmbedError::openai_unexpected) - .map_err(Retry::retry_later)?; + StatusCode::INTERNAL_SERVER_ERROR + | StatusCode::BAD_GATEWAY + | StatusCode::SERVICE_UNAVAILABLE => { + let error_response: Result = response.json().await; return Err(Retry::retry_later(EmbedError::openai_internal_server_error( - error_response.error, - ))); - } - StatusCode::SERVICE_UNAVAILABLE => { - let error_response: OpenAiErrorResponse = response - .json() - .await - .map_err(EmbedError::openai_unexpected) - .map_err(Retry::retry_later)?; - return Err(Retry::retry_later(EmbedError::openai_internal_server_error( - error_response.error, + error_response.ok().map(|error_response| error_response.error), ))); } StatusCode::BAD_REQUEST => { @@ -248,14 +238,14 @@ impl Embedder { .map_err(EmbedError::openai_unexpected) .map_err(Retry::retry_later)?; - tracing::warn!("OpenAI: input was too long, retrying on tokenized version. For best performance, limit the size of your prompt."); + tracing::warn!("OpenAI: received `BAD_REQUEST`. Input was maybe too long, retrying on tokenized version. For best performance, limit the size of your prompt."); return Err(Retry::retry_tokenized(EmbedError::openai_too_many_tokens( error_response.error, ))); } code => { - return Err(Retry::give_up(EmbedError::openai_unhandled_status_code( + return Err(Retry::retry_later(EmbedError::openai_unhandled_status_code( code.as_u16(), ))); }