mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-23 19:57:30 +01:00
Merge #4456
4456: Add Ollama as an embeddings provider r=dureuill a=jakobklemm # Pull Request ## Related issue [Related Discord Thread](https://discord.com/channels/1006923006964154428/1211977150316683305) ## What does this PR do? - Adds Ollama as a provider of Embeddings besides HuggingFace and OpenAI under the name `ollama` - Adds the environment variable `MEILI_OLLAMA_URL` to set the embeddings URL of an Ollama instance with a default value of `http://localhost:11434/api/embeddings` if no variable is set - Changes some of the structs and functions in `openai.rs` to be public so that they can be shared. - Added more error variants for Ollama specific errors - It uses the model `nomic-embed-text` as default, but any string value is allowed, however it won't automatically check if the model actually exists or is an embedding model Tested against Ollama version `v0.1.27` and the `nomic-embed-text` model. ## PR checklist Please check if your PR fulfills the following requirements: - [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)? - [x] Have you read the contributing guidelines? - [x] Have you made sure that the title is accurate and descriptive of the changes? Co-authored-by: Jakob Klemm <jakob@jeykey.net> Co-authored-by: Louis Dureuil <louis.dureuil@gmail.com>
This commit is contained in:
commit
5ed7b6a0b2
@ -604,6 +604,7 @@ fn embedder_analytics(
|
|||||||
EmbedderSource::OpenAi => sources.insert("openAi"),
|
EmbedderSource::OpenAi => sources.insert("openAi"),
|
||||||
EmbedderSource::HuggingFace => sources.insert("huggingFace"),
|
EmbedderSource::HuggingFace => sources.insert("huggingFace"),
|
||||||
EmbedderSource::UserProvided => sources.insert("userProvided"),
|
EmbedderSource::UserProvided => sources.insert("userProvided"),
|
||||||
|
EmbedderSource::Ollama => sources.insert("ollama"),
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -1178,6 +1178,13 @@ pub fn validate_embedding_settings(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
EmbedderSource::Ollama => {
|
||||||
|
// Dimensions get inferred, only model name is required
|
||||||
|
check_unset(&dimensions, "dimensions", inferred_source, name)?;
|
||||||
|
check_set(&model, "model", inferred_source, name)?;
|
||||||
|
check_unset(&api_key, "apiKey", inferred_source, name)?;
|
||||||
|
check_unset(&revision, "revision", inferred_source, name)?;
|
||||||
|
}
|
||||||
EmbedderSource::HuggingFace => {
|
EmbedderSource::HuggingFace => {
|
||||||
check_unset(&api_key, "apiKey", inferred_source, name)?;
|
check_unset(&api_key, "apiKey", inferred_source, name)?;
|
||||||
check_unset(&dimensions, "dimensions", inferred_source, name)?;
|
check_unset(&dimensions, "dimensions", inferred_source, name)?;
|
||||||
|
@ -2,6 +2,7 @@ use std::path::PathBuf;
|
|||||||
|
|
||||||
use hf_hub::api::sync::ApiError;
|
use hf_hub::api::sync::ApiError;
|
||||||
|
|
||||||
|
use super::ollama::OllamaError;
|
||||||
use crate::error::FaultSource;
|
use crate::error::FaultSource;
|
||||||
use crate::vector::openai::OpenAiError;
|
use crate::vector::openai::OpenAiError;
|
||||||
|
|
||||||
@ -71,6 +72,17 @@ pub enum EmbedErrorKind {
|
|||||||
OpenAiRuntimeInit(std::io::Error),
|
OpenAiRuntimeInit(std::io::Error),
|
||||||
#[error("initializing web client for sending embedding requests failed: {0}")]
|
#[error("initializing web client for sending embedding requests failed: {0}")]
|
||||||
InitWebClient(reqwest::Error),
|
InitWebClient(reqwest::Error),
|
||||||
|
// Dedicated Ollama error kinds, might have to merge them into one cohesive error type for all backends.
|
||||||
|
#[error("unexpected response from Ollama: {0}")]
|
||||||
|
OllamaUnexpected(reqwest::Error),
|
||||||
|
#[error("sent too many requests to Ollama: {0}")]
|
||||||
|
OllamaTooManyRequests(OllamaError),
|
||||||
|
#[error("received internal error from Ollama: {0}")]
|
||||||
|
OllamaInternalServerError(OllamaError),
|
||||||
|
#[error("model not found. Meilisearch will not automatically download models from the Ollama library, please pull the model manually: {0}")]
|
||||||
|
OllamaModelNotFoundError(OllamaError),
|
||||||
|
#[error("received unhandled HTTP status code {0} from Ollama")]
|
||||||
|
OllamaUnhandledStatusCode(u16),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl EmbedError {
|
impl EmbedError {
|
||||||
@ -129,6 +141,26 @@ impl EmbedError {
|
|||||||
pub fn openai_initialize_web_client(inner: reqwest::Error) -> Self {
|
pub fn openai_initialize_web_client(inner: reqwest::Error) -> Self {
|
||||||
Self { kind: EmbedErrorKind::InitWebClient(inner), fault: FaultSource::Runtime }
|
Self { kind: EmbedErrorKind::InitWebClient(inner), fault: FaultSource::Runtime }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub(crate) fn ollama_unexpected(inner: reqwest::Error) -> EmbedError {
|
||||||
|
Self { kind: EmbedErrorKind::OllamaUnexpected(inner), fault: FaultSource::Bug }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn ollama_model_not_found(inner: OllamaError) -> EmbedError {
|
||||||
|
Self { kind: EmbedErrorKind::OllamaModelNotFoundError(inner), fault: FaultSource::User }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn ollama_too_many_requests(inner: OllamaError) -> EmbedError {
|
||||||
|
Self { kind: EmbedErrorKind::OllamaTooManyRequests(inner), fault: FaultSource::Runtime }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn ollama_internal_server_error(inner: OllamaError) -> EmbedError {
|
||||||
|
Self { kind: EmbedErrorKind::OllamaInternalServerError(inner), fault: FaultSource::Runtime }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn ollama_unhandled_status_code(code: u16) -> EmbedError {
|
||||||
|
Self { kind: EmbedErrorKind::OllamaUnhandledStatusCode(code), fault: FaultSource::Bug }
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, thiserror::Error)]
|
#[derive(Debug, thiserror::Error)]
|
||||||
@ -195,6 +227,13 @@ impl NewEmbedderError {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn ollama_could_not_determine_dimension(inner: EmbedError) -> NewEmbedderError {
|
||||||
|
Self {
|
||||||
|
kind: NewEmbedderErrorKind::CouldNotDetermineDimension(inner),
|
||||||
|
fault: FaultSource::User,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn openai_invalid_api_key_format(inner: reqwest::header::InvalidHeaderValue) -> Self {
|
pub fn openai_invalid_api_key_format(inner: reqwest::header::InvalidHeaderValue) -> Self {
|
||||||
Self { kind: NewEmbedderErrorKind::InvalidApiKeyFormat(inner), fault: FaultSource::User }
|
Self { kind: NewEmbedderErrorKind::InvalidApiKeyFormat(inner), fault: FaultSource::User }
|
||||||
}
|
}
|
||||||
|
@ -10,6 +10,8 @@ pub mod manual;
|
|||||||
pub mod openai;
|
pub mod openai;
|
||||||
pub mod settings;
|
pub mod settings;
|
||||||
|
|
||||||
|
pub mod ollama;
|
||||||
|
|
||||||
pub use self::error::Error;
|
pub use self::error::Error;
|
||||||
|
|
||||||
pub type Embedding = Vec<f32>;
|
pub type Embedding = Vec<f32>;
|
||||||
@ -76,6 +78,7 @@ pub enum Embedder {
|
|||||||
HuggingFace(hf::Embedder),
|
HuggingFace(hf::Embedder),
|
||||||
OpenAi(openai::Embedder),
|
OpenAi(openai::Embedder),
|
||||||
UserProvided(manual::Embedder),
|
UserProvided(manual::Embedder),
|
||||||
|
Ollama(ollama::Embedder),
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Default, serde::Deserialize, serde::Serialize)]
|
#[derive(Debug, Clone, Default, serde::Deserialize, serde::Serialize)]
|
||||||
@ -127,6 +130,7 @@ impl IntoIterator for EmbeddingConfigs {
|
|||||||
pub enum EmbedderOptions {
|
pub enum EmbedderOptions {
|
||||||
HuggingFace(hf::EmbedderOptions),
|
HuggingFace(hf::EmbedderOptions),
|
||||||
OpenAi(openai::EmbedderOptions),
|
OpenAi(openai::EmbedderOptions),
|
||||||
|
Ollama(ollama::EmbedderOptions),
|
||||||
UserProvided(manual::EmbedderOptions),
|
UserProvided(manual::EmbedderOptions),
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -144,6 +148,10 @@ impl EmbedderOptions {
|
|||||||
pub fn openai(api_key: Option<String>) -> Self {
|
pub fn openai(api_key: Option<String>) -> Self {
|
||||||
Self::OpenAi(openai::EmbedderOptions::with_default_model(api_key))
|
Self::OpenAi(openai::EmbedderOptions::with_default_model(api_key))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn ollama() -> Self {
|
||||||
|
Self::Ollama(ollama::EmbedderOptions::with_default_model())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Embedder {
|
impl Embedder {
|
||||||
@ -151,6 +159,7 @@ impl Embedder {
|
|||||||
Ok(match options {
|
Ok(match options {
|
||||||
EmbedderOptions::HuggingFace(options) => Self::HuggingFace(hf::Embedder::new(options)?),
|
EmbedderOptions::HuggingFace(options) => Self::HuggingFace(hf::Embedder::new(options)?),
|
||||||
EmbedderOptions::OpenAi(options) => Self::OpenAi(openai::Embedder::new(options)?),
|
EmbedderOptions::OpenAi(options) => Self::OpenAi(openai::Embedder::new(options)?),
|
||||||
|
EmbedderOptions::Ollama(options) => Self::Ollama(ollama::Embedder::new(options)?),
|
||||||
EmbedderOptions::UserProvided(options) => {
|
EmbedderOptions::UserProvided(options) => {
|
||||||
Self::UserProvided(manual::Embedder::new(options))
|
Self::UserProvided(manual::Embedder::new(options))
|
||||||
}
|
}
|
||||||
@ -167,6 +176,10 @@ impl Embedder {
|
|||||||
let client = embedder.new_client()?;
|
let client = embedder.new_client()?;
|
||||||
embedder.embed(texts, &client).await
|
embedder.embed(texts, &client).await
|
||||||
}
|
}
|
||||||
|
Embedder::Ollama(embedder) => {
|
||||||
|
let client = embedder.new_client()?;
|
||||||
|
embedder.embed(texts, &client).await
|
||||||
|
}
|
||||||
Embedder::UserProvided(embedder) => embedder.embed(texts),
|
Embedder::UserProvided(embedder) => embedder.embed(texts),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -181,6 +194,7 @@ impl Embedder {
|
|||||||
match self {
|
match self {
|
||||||
Embedder::HuggingFace(embedder) => embedder.embed_chunks(text_chunks),
|
Embedder::HuggingFace(embedder) => embedder.embed_chunks(text_chunks),
|
||||||
Embedder::OpenAi(embedder) => embedder.embed_chunks(text_chunks),
|
Embedder::OpenAi(embedder) => embedder.embed_chunks(text_chunks),
|
||||||
|
Embedder::Ollama(embedder) => embedder.embed_chunks(text_chunks),
|
||||||
Embedder::UserProvided(embedder) => embedder.embed_chunks(text_chunks),
|
Embedder::UserProvided(embedder) => embedder.embed_chunks(text_chunks),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -189,6 +203,7 @@ impl Embedder {
|
|||||||
match self {
|
match self {
|
||||||
Embedder::HuggingFace(embedder) => embedder.chunk_count_hint(),
|
Embedder::HuggingFace(embedder) => embedder.chunk_count_hint(),
|
||||||
Embedder::OpenAi(embedder) => embedder.chunk_count_hint(),
|
Embedder::OpenAi(embedder) => embedder.chunk_count_hint(),
|
||||||
|
Embedder::Ollama(embedder) => embedder.chunk_count_hint(),
|
||||||
Embedder::UserProvided(_) => 1,
|
Embedder::UserProvided(_) => 1,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -197,6 +212,7 @@ impl Embedder {
|
|||||||
match self {
|
match self {
|
||||||
Embedder::HuggingFace(embedder) => embedder.prompt_count_in_chunk_hint(),
|
Embedder::HuggingFace(embedder) => embedder.prompt_count_in_chunk_hint(),
|
||||||
Embedder::OpenAi(embedder) => embedder.prompt_count_in_chunk_hint(),
|
Embedder::OpenAi(embedder) => embedder.prompt_count_in_chunk_hint(),
|
||||||
|
Embedder::Ollama(embedder) => embedder.prompt_count_in_chunk_hint(),
|
||||||
Embedder::UserProvided(_) => 1,
|
Embedder::UserProvided(_) => 1,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -205,6 +221,7 @@ impl Embedder {
|
|||||||
match self {
|
match self {
|
||||||
Embedder::HuggingFace(embedder) => embedder.dimensions(),
|
Embedder::HuggingFace(embedder) => embedder.dimensions(),
|
||||||
Embedder::OpenAi(embedder) => embedder.dimensions(),
|
Embedder::OpenAi(embedder) => embedder.dimensions(),
|
||||||
|
Embedder::Ollama(embedder) => embedder.dimensions(),
|
||||||
Embedder::UserProvided(embedder) => embedder.dimensions(),
|
Embedder::UserProvided(embedder) => embedder.dimensions(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -213,6 +230,7 @@ impl Embedder {
|
|||||||
match self {
|
match self {
|
||||||
Embedder::HuggingFace(embedder) => embedder.distribution(),
|
Embedder::HuggingFace(embedder) => embedder.distribution(),
|
||||||
Embedder::OpenAi(embedder) => embedder.distribution(),
|
Embedder::OpenAi(embedder) => embedder.distribution(),
|
||||||
|
Embedder::Ollama(embedder) => embedder.distribution(),
|
||||||
Embedder::UserProvided(_embedder) => None,
|
Embedder::UserProvided(_embedder) => None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
307
milli/src/vector/ollama.rs
Normal file
307
milli/src/vector/ollama.rs
Normal file
@ -0,0 +1,307 @@
|
|||||||
|
// Copied from "openai.rs" with the sections I actually understand changed for Ollama.
|
||||||
|
// The common components of the Ollama and OpenAI interfaces might need to be extracted.
|
||||||
|
|
||||||
|
use std::fmt::Display;
|
||||||
|
|
||||||
|
use reqwest::StatusCode;
|
||||||
|
|
||||||
|
use super::error::{EmbedError, NewEmbedderError};
|
||||||
|
use super::openai::Retry;
|
||||||
|
use super::{DistributionShift, Embedding, Embeddings};
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct Embedder {
|
||||||
|
headers: reqwest::header::HeaderMap,
|
||||||
|
options: EmbedderOptions,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
|
||||||
|
pub struct EmbedderOptions {
|
||||||
|
pub embedding_model: EmbeddingModel,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(
|
||||||
|
Debug, Clone, Hash, PartialEq, Eq, serde::Serialize, serde::Deserialize, deserr::Deserr,
|
||||||
|
)]
|
||||||
|
#[deserr(deny_unknown_fields)]
|
||||||
|
pub struct EmbeddingModel {
|
||||||
|
name: String,
|
||||||
|
dimensions: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, serde::Serialize)]
|
||||||
|
struct OllamaRequest<'a> {
|
||||||
|
model: &'a str,
|
||||||
|
prompt: &'a str,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, serde::Deserialize)]
|
||||||
|
struct OllamaResponse {
|
||||||
|
embedding: Embedding,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, serde::Deserialize)]
|
||||||
|
pub struct OllamaError {
|
||||||
|
error: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl EmbeddingModel {
|
||||||
|
pub fn max_token(&self) -> usize {
|
||||||
|
// this might not be the same for all models
|
||||||
|
8192
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn default_dimensions(&self) -> usize {
|
||||||
|
// Dimensions for nomic-embed-text
|
||||||
|
768
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn name(&self) -> String {
|
||||||
|
self.name.clone()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn from_name(name: &str) -> Self {
|
||||||
|
Self { name: name.to_string(), dimensions: 0 }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn supports_overriding_dimensions(&self) -> bool {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for EmbeddingModel {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self { name: "nomic-embed-text".to_string(), dimensions: 0 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl EmbedderOptions {
|
||||||
|
pub fn with_default_model() -> Self {
|
||||||
|
Self { embedding_model: Default::default() }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn with_embedding_model(embedding_model: EmbeddingModel) -> Self {
|
||||||
|
Self { embedding_model }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Embedder {
|
||||||
|
pub fn new_client(&self) -> Result<reqwest::Client, EmbedError> {
|
||||||
|
reqwest::ClientBuilder::new()
|
||||||
|
.default_headers(self.headers.clone())
|
||||||
|
.build()
|
||||||
|
.map_err(EmbedError::openai_initialize_web_client)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn new(options: EmbedderOptions) -> Result<Self, NewEmbedderError> {
|
||||||
|
let mut headers = reqwest::header::HeaderMap::new();
|
||||||
|
headers.insert(
|
||||||
|
reqwest::header::CONTENT_TYPE,
|
||||||
|
reqwest::header::HeaderValue::from_static("application/json"),
|
||||||
|
);
|
||||||
|
|
||||||
|
let mut embedder = Self { options, headers };
|
||||||
|
|
||||||
|
let rt = tokio::runtime::Builder::new_current_thread()
|
||||||
|
.enable_io()
|
||||||
|
.enable_time()
|
||||||
|
.build()
|
||||||
|
.map_err(EmbedError::openai_runtime_init)
|
||||||
|
.map_err(NewEmbedderError::ollama_could_not_determine_dimension)?;
|
||||||
|
|
||||||
|
// Get dimensions from Ollama
|
||||||
|
let request =
|
||||||
|
OllamaRequest { model: &embedder.options.embedding_model.name(), prompt: "test" };
|
||||||
|
// TODO: Refactor into shared error type
|
||||||
|
let client = embedder
|
||||||
|
.new_client()
|
||||||
|
.map_err(NewEmbedderError::ollama_could_not_determine_dimension)?;
|
||||||
|
|
||||||
|
rt.block_on(async move {
|
||||||
|
let response = client
|
||||||
|
.post(get_ollama_path())
|
||||||
|
.json(&request)
|
||||||
|
.send()
|
||||||
|
.await
|
||||||
|
.map_err(EmbedError::ollama_unexpected)
|
||||||
|
.map_err(NewEmbedderError::ollama_could_not_determine_dimension)?;
|
||||||
|
|
||||||
|
// Process error in case model not found
|
||||||
|
let response = Self::check_response(response).await.map_err(|_err| {
|
||||||
|
let e = EmbedError::ollama_model_not_found(OllamaError {
|
||||||
|
error: format!("model: {}", embedder.options.embedding_model.name()),
|
||||||
|
});
|
||||||
|
NewEmbedderError::ollama_could_not_determine_dimension(e)
|
||||||
|
})?;
|
||||||
|
|
||||||
|
let response: OllamaResponse = response
|
||||||
|
.json()
|
||||||
|
.await
|
||||||
|
.map_err(EmbedError::ollama_unexpected)
|
||||||
|
.map_err(NewEmbedderError::ollama_could_not_determine_dimension)?;
|
||||||
|
|
||||||
|
let embedding = Embeddings::from_single_embedding(response.embedding);
|
||||||
|
|
||||||
|
embedder.options.embedding_model.dimensions = embedding.dimension();
|
||||||
|
|
||||||
|
tracing::info!(
|
||||||
|
"ollama model {} with dimensionality {} added",
|
||||||
|
embedder.options.embedding_model.name(),
|
||||||
|
embedding.dimension()
|
||||||
|
);
|
||||||
|
|
||||||
|
Ok(embedder)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn check_response(response: reqwest::Response) -> Result<reqwest::Response, Retry> {
|
||||||
|
if !response.status().is_success() {
|
||||||
|
// Not the same number of possible error cases covered as with OpenAI.
|
||||||
|
match response.status() {
|
||||||
|
StatusCode::TOO_MANY_REQUESTS => {
|
||||||
|
let error_response: OllamaError = response
|
||||||
|
.json()
|
||||||
|
.await
|
||||||
|
.map_err(EmbedError::ollama_unexpected)
|
||||||
|
.map_err(Retry::retry_later)?;
|
||||||
|
|
||||||
|
return Err(Retry::rate_limited(EmbedError::ollama_too_many_requests(
|
||||||
|
OllamaError { error: error_response.error },
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
StatusCode::SERVICE_UNAVAILABLE => {
|
||||||
|
let error_response: OllamaError = response
|
||||||
|
.json()
|
||||||
|
.await
|
||||||
|
.map_err(EmbedError::ollama_unexpected)
|
||||||
|
.map_err(Retry::retry_later)?;
|
||||||
|
return Err(Retry::retry_later(EmbedError::ollama_internal_server_error(
|
||||||
|
OllamaError { error: error_response.error },
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
StatusCode::NOT_FOUND => {
|
||||||
|
let error_response: OllamaError = response
|
||||||
|
.json()
|
||||||
|
.await
|
||||||
|
.map_err(EmbedError::ollama_unexpected)
|
||||||
|
.map_err(Retry::give_up)?;
|
||||||
|
|
||||||
|
return Err(Retry::give_up(EmbedError::ollama_model_not_found(OllamaError {
|
||||||
|
error: error_response.error,
|
||||||
|
})));
|
||||||
|
}
|
||||||
|
code => {
|
||||||
|
return Err(Retry::give_up(EmbedError::ollama_unhandled_status_code(
|
||||||
|
code.as_u16(),
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(response)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub async fn embed(
|
||||||
|
&self,
|
||||||
|
texts: Vec<String>,
|
||||||
|
client: &reqwest::Client,
|
||||||
|
) -> Result<Vec<Embeddings<f32>>, EmbedError> {
|
||||||
|
// Ollama only embedds one document at a time.
|
||||||
|
let mut results = Vec::with_capacity(texts.len());
|
||||||
|
|
||||||
|
// The retry loop is inside the texts loop, might have to switch that around
|
||||||
|
for text in texts {
|
||||||
|
// Retries copied from openai.rs
|
||||||
|
for attempt in 0..7 {
|
||||||
|
let retry_duration = match self.try_embed(&text, client).await {
|
||||||
|
Ok(result) => {
|
||||||
|
results.push(result);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
Err(retry) => {
|
||||||
|
tracing::warn!("Failed: {}", retry.error);
|
||||||
|
retry.into_duration(attempt)
|
||||||
|
}
|
||||||
|
}?;
|
||||||
|
tracing::warn!(
|
||||||
|
"Attempt #{}, retrying after {}ms.",
|
||||||
|
attempt,
|
||||||
|
retry_duration.as_millis()
|
||||||
|
);
|
||||||
|
tokio::time::sleep(retry_duration).await;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(results)
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn try_embed(
|
||||||
|
&self,
|
||||||
|
text: &str,
|
||||||
|
client: &reqwest::Client,
|
||||||
|
) -> Result<Embeddings<f32>, Retry> {
|
||||||
|
let request = OllamaRequest { model: &self.options.embedding_model.name(), prompt: text };
|
||||||
|
let response = client
|
||||||
|
.post(get_ollama_path())
|
||||||
|
.json(&request)
|
||||||
|
.send()
|
||||||
|
.await
|
||||||
|
.map_err(EmbedError::openai_network)
|
||||||
|
.map_err(Retry::retry_later)?;
|
||||||
|
|
||||||
|
let response = Self::check_response(response).await?;
|
||||||
|
|
||||||
|
let response: OllamaResponse = response
|
||||||
|
.json()
|
||||||
|
.await
|
||||||
|
.map_err(EmbedError::openai_unexpected)
|
||||||
|
.map_err(Retry::retry_later)?;
|
||||||
|
|
||||||
|
tracing::trace!("response: {:?}", response.embedding);
|
||||||
|
|
||||||
|
let embedding = Embeddings::from_single_embedding(response.embedding);
|
||||||
|
Ok(embedding)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn embed_chunks(
|
||||||
|
&self,
|
||||||
|
text_chunks: Vec<Vec<String>>,
|
||||||
|
) -> Result<Vec<Vec<Embeddings<f32>>>, EmbedError> {
|
||||||
|
let rt = tokio::runtime::Builder::new_current_thread()
|
||||||
|
.enable_io()
|
||||||
|
.enable_time()
|
||||||
|
.build()
|
||||||
|
.map_err(EmbedError::openai_runtime_init)?;
|
||||||
|
let client = self.new_client()?;
|
||||||
|
rt.block_on(futures::future::try_join_all(
|
||||||
|
text_chunks.into_iter().map(|prompts| self.embed(prompts, &client)),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Defaults copied from openai.rs
|
||||||
|
pub fn chunk_count_hint(&self) -> usize {
|
||||||
|
10
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn prompt_count_in_chunk_hint(&self) -> usize {
|
||||||
|
10
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn dimensions(&self) -> usize {
|
||||||
|
self.options.embedding_model.dimensions
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn distribution(&self) -> Option<DistributionShift> {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Display for OllamaError {
|
||||||
|
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
write!(f, "{}", self.error)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_ollama_path() -> String {
|
||||||
|
// Important: Hostname not enough, has to be entire path to embeddings endpoint
|
||||||
|
std::env::var("MEILI_OLLAMA_URL").unwrap_or("http://localhost:11434/api/embeddings".to_string())
|
||||||
|
}
|
@ -419,12 +419,12 @@ impl Embedder {
|
|||||||
|
|
||||||
// retrying in case of failure
|
// retrying in case of failure
|
||||||
|
|
||||||
struct Retry {
|
pub struct Retry {
|
||||||
error: EmbedError,
|
pub error: EmbedError,
|
||||||
strategy: RetryStrategy,
|
strategy: RetryStrategy,
|
||||||
}
|
}
|
||||||
|
|
||||||
enum RetryStrategy {
|
pub enum RetryStrategy {
|
||||||
GiveUp,
|
GiveUp,
|
||||||
Retry,
|
Retry,
|
||||||
RetryTokenized,
|
RetryTokenized,
|
||||||
@ -432,23 +432,23 @@ enum RetryStrategy {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl Retry {
|
impl Retry {
|
||||||
fn give_up(error: EmbedError) -> Self {
|
pub fn give_up(error: EmbedError) -> Self {
|
||||||
Self { error, strategy: RetryStrategy::GiveUp }
|
Self { error, strategy: RetryStrategy::GiveUp }
|
||||||
}
|
}
|
||||||
|
|
||||||
fn retry_later(error: EmbedError) -> Self {
|
pub fn retry_later(error: EmbedError) -> Self {
|
||||||
Self { error, strategy: RetryStrategy::Retry }
|
Self { error, strategy: RetryStrategy::Retry }
|
||||||
}
|
}
|
||||||
|
|
||||||
fn retry_tokenized(error: EmbedError) -> Self {
|
pub fn retry_tokenized(error: EmbedError) -> Self {
|
||||||
Self { error, strategy: RetryStrategy::RetryTokenized }
|
Self { error, strategy: RetryStrategy::RetryTokenized }
|
||||||
}
|
}
|
||||||
|
|
||||||
fn rate_limited(error: EmbedError) -> Self {
|
pub fn rate_limited(error: EmbedError) -> Self {
|
||||||
Self { error, strategy: RetryStrategy::RetryAfterRateLimit }
|
Self { error, strategy: RetryStrategy::RetryAfterRateLimit }
|
||||||
}
|
}
|
||||||
|
|
||||||
fn into_duration(self, attempt: u32) -> Result<tokio::time::Duration, EmbedError> {
|
pub fn into_duration(self, attempt: u32) -> Result<tokio::time::Duration, EmbedError> {
|
||||||
match self.strategy {
|
match self.strategy {
|
||||||
RetryStrategy::GiveUp => Err(self.error),
|
RetryStrategy::GiveUp => Err(self.error),
|
||||||
RetryStrategy::Retry => Ok(tokio::time::Duration::from_millis((10u64).pow(attempt))),
|
RetryStrategy::Retry => Ok(tokio::time::Duration::from_millis((10u64).pow(attempt))),
|
||||||
@ -459,11 +459,11 @@ impl Retry {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn must_tokenize(&self) -> bool {
|
pub fn must_tokenize(&self) -> bool {
|
||||||
matches!(self.strategy, RetryStrategy::RetryTokenized)
|
matches!(self.strategy, RetryStrategy::RetryTokenized)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn into_error(self) -> EmbedError {
|
pub fn into_error(self) -> EmbedError {
|
||||||
self.error
|
self.error
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
use deserr::Deserr;
|
use deserr::Deserr;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use super::openai;
|
use super::{ollama, openai};
|
||||||
use crate::prompt::PromptData;
|
use crate::prompt::PromptData;
|
||||||
use crate::update::Setting;
|
use crate::update::Setting;
|
||||||
use crate::vector::EmbeddingConfig;
|
use crate::vector::EmbeddingConfig;
|
||||||
@ -80,11 +80,15 @@ impl EmbeddingSettings {
|
|||||||
Self::SOURCE => {
|
Self::SOURCE => {
|
||||||
&[EmbedderSource::HuggingFace, EmbedderSource::OpenAi, EmbedderSource::UserProvided]
|
&[EmbedderSource::HuggingFace, EmbedderSource::OpenAi, EmbedderSource::UserProvided]
|
||||||
}
|
}
|
||||||
Self::MODEL => &[EmbedderSource::HuggingFace, EmbedderSource::OpenAi],
|
Self::MODEL => {
|
||||||
|
&[EmbedderSource::HuggingFace, EmbedderSource::OpenAi, EmbedderSource::Ollama]
|
||||||
|
}
|
||||||
Self::REVISION => &[EmbedderSource::HuggingFace],
|
Self::REVISION => &[EmbedderSource::HuggingFace],
|
||||||
Self::API_KEY => &[EmbedderSource::OpenAi],
|
Self::API_KEY => &[EmbedderSource::OpenAi],
|
||||||
Self::DIMENSIONS => &[EmbedderSource::OpenAi, EmbedderSource::UserProvided],
|
Self::DIMENSIONS => &[EmbedderSource::OpenAi, EmbedderSource::UserProvided],
|
||||||
Self::DOCUMENT_TEMPLATE => &[EmbedderSource::HuggingFace, EmbedderSource::OpenAi],
|
Self::DOCUMENT_TEMPLATE => {
|
||||||
|
&[EmbedderSource::HuggingFace, EmbedderSource::OpenAi, EmbedderSource::Ollama]
|
||||||
|
}
|
||||||
_other => unreachable!("unknown field"),
|
_other => unreachable!("unknown field"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -101,6 +105,7 @@ impl EmbeddingSettings {
|
|||||||
EmbedderSource::HuggingFace => {
|
EmbedderSource::HuggingFace => {
|
||||||
&[Self::SOURCE, Self::MODEL, Self::REVISION, Self::DOCUMENT_TEMPLATE]
|
&[Self::SOURCE, Self::MODEL, Self::REVISION, Self::DOCUMENT_TEMPLATE]
|
||||||
}
|
}
|
||||||
|
EmbedderSource::Ollama => &[Self::SOURCE, Self::MODEL, Self::DOCUMENT_TEMPLATE],
|
||||||
EmbedderSource::UserProvided => &[Self::SOURCE, Self::DIMENSIONS],
|
EmbedderSource::UserProvided => &[Self::SOURCE, Self::DIMENSIONS],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -134,6 +139,7 @@ pub enum EmbedderSource {
|
|||||||
#[default]
|
#[default]
|
||||||
OpenAi,
|
OpenAi,
|
||||||
HuggingFace,
|
HuggingFace,
|
||||||
|
Ollama,
|
||||||
UserProvided,
|
UserProvided,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -143,6 +149,7 @@ impl std::fmt::Display for EmbedderSource {
|
|||||||
EmbedderSource::OpenAi => "openAi",
|
EmbedderSource::OpenAi => "openAi",
|
||||||
EmbedderSource::HuggingFace => "huggingFace",
|
EmbedderSource::HuggingFace => "huggingFace",
|
||||||
EmbedderSource::UserProvided => "userProvided",
|
EmbedderSource::UserProvided => "userProvided",
|
||||||
|
EmbedderSource::Ollama => "ollama",
|
||||||
};
|
};
|
||||||
f.write_str(s)
|
f.write_str(s)
|
||||||
}
|
}
|
||||||
@ -192,7 +199,15 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
|
|||||||
model: Setting::Set(options.embedding_model.name().to_owned()),
|
model: Setting::Set(options.embedding_model.name().to_owned()),
|
||||||
revision: Setting::NotSet,
|
revision: Setting::NotSet,
|
||||||
api_key: options.api_key.map(Setting::Set).unwrap_or_default(),
|
api_key: options.api_key.map(Setting::Set).unwrap_or_default(),
|
||||||
dimensions: options.dimensions.map(Setting::Set).unwrap_or_default(),
|
dimensions: Setting::Set(options.dimensions.unwrap_or_default()),
|
||||||
|
document_template: Setting::Set(prompt.template),
|
||||||
|
},
|
||||||
|
super::EmbedderOptions::Ollama(options) => Self {
|
||||||
|
source: Setting::Set(EmbedderSource::Ollama),
|
||||||
|
model: Setting::Set(options.embedding_model.name().to_owned()),
|
||||||
|
revision: Setting::NotSet,
|
||||||
|
api_key: Setting::NotSet,
|
||||||
|
dimensions: Setting::NotSet,
|
||||||
document_template: Setting::Set(prompt.template),
|
document_template: Setting::Set(prompt.template),
|
||||||
},
|
},
|
||||||
super::EmbedderOptions::UserProvided(options) => Self {
|
super::EmbedderOptions::UserProvided(options) => Self {
|
||||||
@ -229,6 +244,14 @@ impl From<EmbeddingSettings> for EmbeddingConfig {
|
|||||||
}
|
}
|
||||||
this.embedder_options = super::EmbedderOptions::OpenAi(options);
|
this.embedder_options = super::EmbedderOptions::OpenAi(options);
|
||||||
}
|
}
|
||||||
|
EmbedderSource::Ollama => {
|
||||||
|
let mut options: ollama::EmbedderOptions =
|
||||||
|
super::ollama::EmbedderOptions::with_default_model();
|
||||||
|
if let Some(model) = model.set() {
|
||||||
|
options.embedding_model = super::ollama::EmbeddingModel::from_name(&model);
|
||||||
|
}
|
||||||
|
this.embedder_options = super::EmbedderOptions::Ollama(options);
|
||||||
|
}
|
||||||
EmbedderSource::HuggingFace => {
|
EmbedderSource::HuggingFace => {
|
||||||
let mut options = super::hf::EmbedderOptions::default();
|
let mut options = super::hf::EmbedderOptions::default();
|
||||||
if let Some(model) = model.set() {
|
if let Some(model) = model.set() {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user