From 36d17110d8f50f294734c5fcf9f226227ee8d441 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 5 Mar 2024 12:18:54 +0100 Subject: [PATCH 1/2] openai: Handle BAD_GETAWAY, be more resilient to failure --- milli/src/vector/error.rs | 6 +++--- milli/src/vector/openai.rs | 26 +++++++------------------- 2 files changed, 10 insertions(+), 22 deletions(-) diff --git a/milli/src/vector/error.rs b/milli/src/vector/error.rs index 3673c85e3..fbe4ee878 100644 --- a/milli/src/vector/error.rs +++ b/milli/src/vector/error.rs @@ -59,8 +59,8 @@ pub enum EmbedErrorKind { OpenAiAuth(OpenAiError), #[error("sent too many requests to OpenAI: {0}")] OpenAiTooManyRequests(OpenAiError), - #[error("received internal error from OpenAI: {0}")] - OpenAiInternalServerError(OpenAiError), + #[error("received internal error from OpenAI: {0:?}")] + OpenAiInternalServerError(Option), #[error("sent too many tokens in a request to OpenAI: {0}")] OpenAiTooManyTokens(OpenAiError), #[error("received unhandled HTTP status code {0} from OpenAI")] @@ -106,7 +106,7 @@ impl EmbedError { Self { kind: EmbedErrorKind::OpenAiTooManyRequests(inner), fault: FaultSource::Runtime } } - pub(crate) fn openai_internal_server_error(inner: OpenAiError) -> EmbedError { + pub(crate) fn openai_internal_server_error(inner: Option) -> EmbedError { Self { kind: EmbedErrorKind::OpenAiInternalServerError(inner), fault: FaultSource::Runtime } } diff --git a/milli/src/vector/openai.rs b/milli/src/vector/openai.rs index cbddddfb7..cfc4b6e83 100644 --- a/milli/src/vector/openai.rs +++ b/milli/src/vector/openai.rs @@ -220,24 +220,12 @@ impl Embedder { error_response.error, ))); } - StatusCode::INTERNAL_SERVER_ERROR => { - let error_response: OpenAiErrorResponse = response - .json() - .await - .map_err(EmbedError::openai_unexpected) - .map_err(Retry::retry_later)?; + StatusCode::INTERNAL_SERVER_ERROR + | StatusCode::BAD_GATEWAY + | StatusCode::SERVICE_UNAVAILABLE => { + let error_response: Result = response.json().await; return Err(Retry::retry_later(EmbedError::openai_internal_server_error( - error_response.error, - ))); - } - StatusCode::SERVICE_UNAVAILABLE => { - let error_response: OpenAiErrorResponse = response - .json() - .await - .map_err(EmbedError::openai_unexpected) - .map_err(Retry::retry_later)?; - return Err(Retry::retry_later(EmbedError::openai_internal_server_error( - error_response.error, + error_response.ok().map(|error_response| error_response.error), ))); } StatusCode::BAD_REQUEST => { @@ -248,14 +236,14 @@ impl Embedder { .map_err(EmbedError::openai_unexpected) .map_err(Retry::retry_later)?; - tracing::warn!("OpenAI: input was too long, retrying on tokenized version. For best performance, limit the size of your prompt."); + tracing::warn!("OpenAI: received `BAD_REQUEST`. Input was maybe too long, retrying on tokenized version. For best performance, limit the size of your prompt."); return Err(Retry::retry_tokenized(EmbedError::openai_too_many_tokens( error_response.error, ))); } code => { - return Err(Retry::give_up(EmbedError::openai_unhandled_status_code( + return Err(Retry::retry_later(EmbedError::openai_unhandled_status_code( code.as_u16(), ))); } From 0c216048b53ecbf4fe2cb63f1a03786aed7dadab Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 5 Mar 2024 12:19:25 +0100 Subject: [PATCH 2/2] Cap timeout duration --- milli/src/vector/openai.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/milli/src/vector/openai.rs b/milli/src/vector/openai.rs index cfc4b6e83..33442dda4 100644 --- a/milli/src/vector/openai.rs +++ b/milli/src/vector/openai.rs @@ -178,6 +178,8 @@ impl Embedder { retry.into_duration(attempt) } }?; + + let retry_duration = retry_duration.min(std::time::Duration::from_secs(60)); // don't wait more than a minute tracing::warn!( "Attempt #{}, retrying after {}ms.", attempt,