mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-22 21:04:27 +01:00
Merge #4801
4801: AI quality-of-life improvements r=irevoire a=dureuill # Pull Request ## Related issue Fixes #4802 ## What does this PR do? This PR implements several quality-of-life improvements described in the [public usage](https://meilisearch.notion.site/v1-10-AI-search-changes-737c9d7d010d4dd685582bf5dab579e2#ece824a1814e47a0a986d786baff1be9) Co-authored-by: Louis Dureuil <louis@meilisearch.com>
This commit is contained in:
commit
7a292b572a
@ -415,7 +415,9 @@ impl ErrorCode for milli::Error {
|
|||||||
Code::InvalidSettingsTypoTolerance
|
Code::InvalidSettingsTypoTolerance
|
||||||
}
|
}
|
||||||
UserError::InvalidEmbedder(_) => Code::InvalidEmbedder,
|
UserError::InvalidEmbedder(_) => Code::InvalidEmbedder,
|
||||||
UserError::VectorEmbeddingError(_) => Code::VectorEmbeddingError,
|
UserError::VectorEmbeddingError(_) | UserError::DocumentEmbeddingError(_) => {
|
||||||
|
Code::VectorEmbeddingError
|
||||||
|
}
|
||||||
UserError::DocumentEditionCannotModifyPrimaryKey
|
UserError::DocumentEditionCannotModifyPrimaryKey
|
||||||
| UserError::DocumentEditionDocumentMustBeObject
|
| UserError::DocumentEditionDocumentMustBeObject
|
||||||
| UserError::DocumentEditionRuntimeError(_)
|
| UserError::DocumentEditionRuntimeError(_)
|
||||||
|
@ -645,7 +645,12 @@ async fn get_document_with_vectors() {
|
|||||||
{
|
{
|
||||||
"id": 1,
|
"id": 1,
|
||||||
"name": "echo",
|
"name": "echo",
|
||||||
"_vectors": {}
|
"_vectors": {
|
||||||
|
"manual": {
|
||||||
|
"embeddings": [],
|
||||||
|
"regenerate": false
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"offset": 0,
|
"offset": 0,
|
||||||
@ -701,7 +706,12 @@ async fn get_document_with_vectors() {
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "echo",
|
"name": "echo",
|
||||||
"_vectors": {}
|
"_vectors": {
|
||||||
|
"manual": {
|
||||||
|
"embeddings": [],
|
||||||
|
"regenerate": false
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"offset": 0,
|
"offset": 0,
|
||||||
|
@ -120,7 +120,12 @@ async fn add_remove_user_provided() {
|
|||||||
{
|
{
|
||||||
"id": 1,
|
"id": 1,
|
||||||
"name": "echo",
|
"name": "echo",
|
||||||
"_vectors": {}
|
"_vectors": {
|
||||||
|
"manual": {
|
||||||
|
"embeddings": [],
|
||||||
|
"regenerate": false
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"offset": 0,
|
"offset": 0,
|
||||||
@ -142,7 +147,12 @@ async fn add_remove_user_provided() {
|
|||||||
{
|
{
|
||||||
"id": 1,
|
"id": 1,
|
||||||
"name": "echo",
|
"name": "echo",
|
||||||
"_vectors": {}
|
"_vectors": {
|
||||||
|
"manual": {
|
||||||
|
"embeddings": [],
|
||||||
|
"regenerate": false
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"offset": 0,
|
"offset": 0,
|
||||||
@ -471,6 +481,99 @@ async fn user_provided_embeddings_error() {
|
|||||||
"###);
|
"###);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn user_provided_vectors_error() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = generate_default_user_provided_documents(&server).await;
|
||||||
|
|
||||||
|
// First case, we forget to specify `_vectors`
|
||||||
|
let documents = json!({"id": 42, "name": "kefir"});
|
||||||
|
let (value, code) = index.add_documents(documents, None).await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
let task = index.wait_task(value.uid()).await;
|
||||||
|
snapshot!(task, @r###"
|
||||||
|
{
|
||||||
|
"uid": 2,
|
||||||
|
"indexUid": "doggo",
|
||||||
|
"status": "failed",
|
||||||
|
"type": "documentAdditionOrUpdate",
|
||||||
|
"canceledBy": null,
|
||||||
|
"details": {
|
||||||
|
"receivedDocuments": 1,
|
||||||
|
"indexedDocuments": 0
|
||||||
|
},
|
||||||
|
"error": {
|
||||||
|
"message": "While embedding documents for embedder `manual`: user error: attempt to embed the following text in a configuration where embeddings must be user provided: \" id: 42\\n name: kefir\\n _vectors: \\n _vectors.manual: \\n _vectors.manual.regenerate: \\n _vectors.manual.embeddings: \\n\"\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: opt-out for a document with `_vectors.manual: null`",
|
||||||
|
"code": "vector_embedding_error",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||||
|
},
|
||||||
|
"duration": "[duration]",
|
||||||
|
"enqueuedAt": "[date]",
|
||||||
|
"startedAt": "[date]",
|
||||||
|
"finishedAt": "[date]"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
// Second case, we provide `_vectors` with a typo
|
||||||
|
let documents = json!({"id": 42, "name": "kefir", "_vector": { "manaul": [0, 0, 0] }});
|
||||||
|
let (value, code) = index.add_documents(documents, None).await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
let task = index.wait_task(value.uid()).await;
|
||||||
|
snapshot!(task, @r###"
|
||||||
|
{
|
||||||
|
"uid": 3,
|
||||||
|
"indexUid": "doggo",
|
||||||
|
"status": "failed",
|
||||||
|
"type": "documentAdditionOrUpdate",
|
||||||
|
"canceledBy": null,
|
||||||
|
"details": {
|
||||||
|
"receivedDocuments": 1,
|
||||||
|
"indexedDocuments": 0
|
||||||
|
},
|
||||||
|
"error": {
|
||||||
|
"message": "While embedding documents for embedder `manual`: user error: attempt to embed the following text in a configuration where embeddings must be user provided: \" id: 42\\n name: kefir\\n _vectors: \\n _vectors.manual: \\n _vectors.manual.regenerate: \\n _vectors.manual.embeddings: \\n _vector: manaul000\\n _vector.manaul: \\n\"\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: try replacing `_vector` by `_vectors` in 1 document(s).",
|
||||||
|
"code": "vector_embedding_error",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||||
|
},
|
||||||
|
"duration": "[duration]",
|
||||||
|
"enqueuedAt": "[date]",
|
||||||
|
"startedAt": "[date]",
|
||||||
|
"finishedAt": "[date]"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
// Third case, we specify the embedder with a typo
|
||||||
|
let documents = json!({"id": 42, "name": "kefir", "_vectors": { "manaul": [0, 0, 0] }});
|
||||||
|
let (value, code) = index.add_documents(documents, None).await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
let task = index.wait_task(value.uid()).await;
|
||||||
|
snapshot!(task, @r###"
|
||||||
|
{
|
||||||
|
"uid": 4,
|
||||||
|
"indexUid": "doggo",
|
||||||
|
"status": "failed",
|
||||||
|
"type": "documentAdditionOrUpdate",
|
||||||
|
"canceledBy": null,
|
||||||
|
"details": {
|
||||||
|
"receivedDocuments": 1,
|
||||||
|
"indexedDocuments": 0
|
||||||
|
},
|
||||||
|
"error": {
|
||||||
|
"message": "While embedding documents for embedder `manual`: user error: attempt to embed the following text in a configuration where embeddings must be user provided: \" id: 42\\n name: kefir\\n _vectors: manaul000\\n _vectors.manual: \\n _vectors.manual.regenerate: \\n _vectors.manual.embeddings: \\n _vectors.manaul: \\n\"\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: try replacing `_vectors.manaul` by `_vectors.manual` in 1 document(s).",
|
||||||
|
"code": "vector_embedding_error",
|
||||||
|
"type": "invalid_request",
|
||||||
|
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
|
||||||
|
},
|
||||||
|
"duration": "[duration]",
|
||||||
|
"enqueuedAt": "[date]",
|
||||||
|
"startedAt": "[date]",
|
||||||
|
"finishedAt": "[date]"
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
|
||||||
#[actix_rt::test]
|
#[actix_rt::test]
|
||||||
async fn clear_documents() {
|
async fn clear_documents() {
|
||||||
let server = Server::new().await;
|
let server = Server::new().await;
|
||||||
@ -580,7 +683,12 @@ async fn add_remove_one_vector_4588() {
|
|||||||
{
|
{
|
||||||
"id": 0,
|
"id": 0,
|
||||||
"name": "kefir",
|
"name": "kefir",
|
||||||
"_vectors": {}
|
"_vectors": {
|
||||||
|
"manual": {
|
||||||
|
"embeddings": [],
|
||||||
|
"regenerate": false
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"offset": 0,
|
"offset": 0,
|
||||||
|
@ -268,15 +268,17 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
|
|||||||
DocumentEditionRuntimeError(Box<EvalAltResult>),
|
DocumentEditionRuntimeError(Box<EvalAltResult>),
|
||||||
#[error("Document edition runtime error encountered while compiling the function: {0}")]
|
#[error("Document edition runtime error encountered while compiling the function: {0}")]
|
||||||
DocumentEditionCompilationError(rhai::ParseError),
|
DocumentEditionCompilationError(rhai::ParseError),
|
||||||
|
#[error("{0}")]
|
||||||
|
DocumentEmbeddingError(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<crate::vector::Error> for Error {
|
impl From<crate::vector::Error> for Error {
|
||||||
fn from(value: crate::vector::Error) -> Self {
|
fn from(value: crate::vector::Error) -> Self {
|
||||||
match value.fault() {
|
match value.fault() {
|
||||||
FaultSource::User => Error::UserError(value.into()),
|
FaultSource::User => Error::UserError(value.into()),
|
||||||
FaultSource::Runtime => Error::InternalError(value.into()),
|
FaultSource::Runtime => Error::UserError(value.into()),
|
||||||
FaultSource::Bug => Error::InternalError(value.into()),
|
FaultSource::Bug => Error::InternalError(value.into()),
|
||||||
FaultSource::Undecided => Error::InternalError(value.into()),
|
FaultSource::Undecided => Error::UserError(value.into()),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1691,9 +1691,7 @@ impl Index {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if !embeddings.is_empty() {
|
res.insert(embedder_name.to_owned(), embeddings);
|
||||||
res.insert(embedder_name.to_owned(), embeddings);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
Ok(res)
|
Ok(res)
|
||||||
}
|
}
|
||||||
|
@ -13,13 +13,15 @@ use roaring::RoaringBitmap;
|
|||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
|
|
||||||
use super::helpers::{create_writer, writer_into_reader, GrenadParameters};
|
use super::helpers::{create_writer, writer_into_reader, GrenadParameters};
|
||||||
|
use crate::error::FaultSource;
|
||||||
use crate::index::IndexEmbeddingConfig;
|
use crate::index::IndexEmbeddingConfig;
|
||||||
use crate::prompt::Prompt;
|
use crate::prompt::Prompt;
|
||||||
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
|
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
|
||||||
use crate::update::settings::InnerIndexSettingsDiff;
|
use crate::update::settings::InnerIndexSettingsDiff;
|
||||||
|
use crate::vector::error::{EmbedErrorKind, PossibleEmbeddingMistakes, UnusedVectorsDistribution};
|
||||||
use crate::vector::parsed_vectors::{ParsedVectorsDiff, VectorState, RESERVED_VECTORS_FIELD_NAME};
|
use crate::vector::parsed_vectors::{ParsedVectorsDiff, VectorState, RESERVED_VECTORS_FIELD_NAME};
|
||||||
use crate::vector::settings::{EmbedderAction, ReindexAction};
|
use crate::vector::settings::{EmbedderAction, ReindexAction};
|
||||||
use crate::vector::Embedder;
|
use crate::vector::{Embedder, Embeddings};
|
||||||
use crate::{try_split_array_at, DocumentId, FieldId, FieldsIdsMap, Result, ThreadPoolNoAbort};
|
use crate::{try_split_array_at, DocumentId, FieldId, FieldsIdsMap, Result, ThreadPoolNoAbort};
|
||||||
|
|
||||||
/// The length of the elements that are always in the buffer when inserting new values.
|
/// The length of the elements that are always in the buffer when inserting new values.
|
||||||
@ -102,7 +104,8 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
|||||||
indexer: GrenadParameters,
|
indexer: GrenadParameters,
|
||||||
embedders_configs: &[IndexEmbeddingConfig],
|
embedders_configs: &[IndexEmbeddingConfig],
|
||||||
settings_diff: &InnerIndexSettingsDiff,
|
settings_diff: &InnerIndexSettingsDiff,
|
||||||
) -> Result<Vec<ExtractedVectorPoints>> {
|
) -> Result<(Vec<ExtractedVectorPoints>, UnusedVectorsDistribution)> {
|
||||||
|
let mut unused_vectors_distribution = UnusedVectorsDistribution::new();
|
||||||
let reindex_vectors = settings_diff.reindex_vectors();
|
let reindex_vectors = settings_diff.reindex_vectors();
|
||||||
|
|
||||||
let old_fields_ids_map = &settings_diff.old.fields_ids_map;
|
let old_fields_ids_map = &settings_diff.old.fields_ids_map;
|
||||||
@ -319,6 +322,8 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
|||||||
delta,
|
delta,
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
unused_vectors_distribution.append(parsed_vectors);
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut results = Vec::new();
|
let mut results = Vec::new();
|
||||||
@ -355,7 +360,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(results)
|
Ok((results, unused_vectors_distribution))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn extract_vector_document_diff(
|
fn extract_vector_document_diff(
|
||||||
@ -547,6 +552,9 @@ pub fn extract_embeddings<R: io::Read + io::Seek>(
|
|||||||
prompt_reader: grenad::Reader<R>,
|
prompt_reader: grenad::Reader<R>,
|
||||||
indexer: GrenadParameters,
|
indexer: GrenadParameters,
|
||||||
embedder: Arc<Embedder>,
|
embedder: Arc<Embedder>,
|
||||||
|
embedder_name: &str,
|
||||||
|
possible_embedding_mistakes: &PossibleEmbeddingMistakes,
|
||||||
|
unused_vectors_distribution: &UnusedVectorsDistribution,
|
||||||
request_threads: &ThreadPoolNoAbort,
|
request_threads: &ThreadPoolNoAbort,
|
||||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||||
let n_chunks = embedder.chunk_count_hint(); // chunk level parallelism
|
let n_chunks = embedder.chunk_count_hint(); // chunk level parallelism
|
||||||
@ -583,13 +591,14 @@ pub fn extract_embeddings<R: io::Read + io::Seek>(
|
|||||||
current_chunk_ids.push(docid);
|
current_chunk_ids.push(docid);
|
||||||
|
|
||||||
if chunks.len() == chunks.capacity() {
|
if chunks.len() == chunks.capacity() {
|
||||||
let chunked_embeds = embedder
|
let chunked_embeds = embed_chunks(
|
||||||
.embed_chunks(
|
&embedder,
|
||||||
std::mem::replace(&mut chunks, Vec::with_capacity(n_chunks)),
|
std::mem::replace(&mut chunks, Vec::with_capacity(n_chunks)),
|
||||||
request_threads,
|
embedder_name,
|
||||||
)
|
possible_embedding_mistakes,
|
||||||
.map_err(crate::vector::Error::from)
|
unused_vectors_distribution,
|
||||||
.map_err(crate::Error::from)?;
|
request_threads,
|
||||||
|
)?;
|
||||||
|
|
||||||
for (docid, embeddings) in chunks_ids
|
for (docid, embeddings) in chunks_ids
|
||||||
.iter()
|
.iter()
|
||||||
@ -604,10 +613,14 @@ pub fn extract_embeddings<R: io::Read + io::Seek>(
|
|||||||
|
|
||||||
// send last chunk
|
// send last chunk
|
||||||
if !chunks.is_empty() {
|
if !chunks.is_empty() {
|
||||||
let chunked_embeds = embedder
|
let chunked_embeds = embed_chunks(
|
||||||
.embed_chunks(std::mem::take(&mut chunks), request_threads)
|
&embedder,
|
||||||
.map_err(crate::vector::Error::from)
|
std::mem::take(&mut chunks),
|
||||||
.map_err(crate::Error::from)?;
|
embedder_name,
|
||||||
|
possible_embedding_mistakes,
|
||||||
|
unused_vectors_distribution,
|
||||||
|
request_threads,
|
||||||
|
)?;
|
||||||
for (docid, embeddings) in chunks_ids
|
for (docid, embeddings) in chunks_ids
|
||||||
.iter()
|
.iter()
|
||||||
.flat_map(|docids| docids.iter())
|
.flat_map(|docids| docids.iter())
|
||||||
@ -618,10 +631,14 @@ pub fn extract_embeddings<R: io::Read + io::Seek>(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if !current_chunk.is_empty() {
|
if !current_chunk.is_empty() {
|
||||||
let embeds = embedder
|
let embeds = embed_chunks(
|
||||||
.embed_chunks(vec![std::mem::take(&mut current_chunk)], request_threads)
|
&embedder,
|
||||||
.map_err(crate::vector::Error::from)
|
vec![std::mem::take(&mut current_chunk)],
|
||||||
.map_err(crate::Error::from)?;
|
embedder_name,
|
||||||
|
possible_embedding_mistakes,
|
||||||
|
unused_vectors_distribution,
|
||||||
|
request_threads,
|
||||||
|
)?;
|
||||||
|
|
||||||
if let Some(embeds) = embeds.first() {
|
if let Some(embeds) = embeds.first() {
|
||||||
for (docid, embeddings) in current_chunk_ids.iter().zip(embeds.iter()) {
|
for (docid, embeddings) in current_chunk_ids.iter().zip(embeds.iter()) {
|
||||||
@ -632,3 +649,57 @@ pub fn extract_embeddings<R: io::Read + io::Seek>(
|
|||||||
|
|
||||||
writer_into_reader(state_writer)
|
writer_into_reader(state_writer)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn embed_chunks(
|
||||||
|
embedder: &Embedder,
|
||||||
|
text_chunks: Vec<Vec<String>>,
|
||||||
|
embedder_name: &str,
|
||||||
|
possible_embedding_mistakes: &PossibleEmbeddingMistakes,
|
||||||
|
unused_vectors_distribution: &UnusedVectorsDistribution,
|
||||||
|
request_threads: &ThreadPoolNoAbort,
|
||||||
|
) -> Result<Vec<Vec<Embeddings<f32>>>> {
|
||||||
|
match embedder.embed_chunks(text_chunks, request_threads) {
|
||||||
|
Ok(chunks) => Ok(chunks),
|
||||||
|
Err(error) => {
|
||||||
|
if let FaultSource::Bug = error.fault {
|
||||||
|
Err(crate::Error::InternalError(crate::InternalError::VectorEmbeddingError(
|
||||||
|
error.into(),
|
||||||
|
)))
|
||||||
|
} else {
|
||||||
|
let mut msg =
|
||||||
|
format!(r"While embedding documents for embedder `{embedder_name}`: {error}");
|
||||||
|
|
||||||
|
if let EmbedErrorKind::ManualEmbed(_) = &error.kind {
|
||||||
|
msg += &format!("\n- Note: `{embedder_name}` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.{embedder_name}`.");
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut hint_count = 0;
|
||||||
|
|
||||||
|
for (vector_misspelling, count) in
|
||||||
|
possible_embedding_mistakes.vector_mistakes().take(2)
|
||||||
|
{
|
||||||
|
msg += &format!("\n- Hint: try replacing `{vector_misspelling}` by `_vectors` in {count} document(s).");
|
||||||
|
hint_count += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (embedder_misspelling, count) in possible_embedding_mistakes
|
||||||
|
.embedder_mistakes(embedder_name, unused_vectors_distribution)
|
||||||
|
.take(2)
|
||||||
|
{
|
||||||
|
msg += &format!("\n- Hint: try replacing `_vectors.{embedder_misspelling}` by `_vectors.{embedder_name}` in {count} document(s).");
|
||||||
|
hint_count += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if hint_count == 0 {
|
||||||
|
if let EmbedErrorKind::ManualEmbed(_) = &error.kind {
|
||||||
|
msg += &format!(
|
||||||
|
"\n- Hint: opt-out for a document with `_vectors.{embedder_name}: null`"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Err(crate::Error::UserError(crate::UserError::DocumentEmbeddingError(msg)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -32,6 +32,7 @@ use super::helpers::{as_cloneable_grenad, CursorClonableMmap, GrenadParameters};
|
|||||||
use super::{helpers, TypedChunk};
|
use super::{helpers, TypedChunk};
|
||||||
use crate::index::IndexEmbeddingConfig;
|
use crate::index::IndexEmbeddingConfig;
|
||||||
use crate::update::settings::InnerIndexSettingsDiff;
|
use crate::update::settings::InnerIndexSettingsDiff;
|
||||||
|
use crate::vector::error::PossibleEmbeddingMistakes;
|
||||||
use crate::{FieldId, Result, ThreadPoolNoAbort, ThreadPoolNoAbortBuilder};
|
use crate::{FieldId, Result, ThreadPoolNoAbort, ThreadPoolNoAbortBuilder};
|
||||||
|
|
||||||
/// Extract data for each databases from obkv documents in parallel.
|
/// Extract data for each databases from obkv documents in parallel.
|
||||||
@ -47,6 +48,7 @@ pub(crate) fn data_from_obkv_documents(
|
|||||||
embedders_configs: Arc<Vec<IndexEmbeddingConfig>>,
|
embedders_configs: Arc<Vec<IndexEmbeddingConfig>>,
|
||||||
settings_diff: Arc<InnerIndexSettingsDiff>,
|
settings_diff: Arc<InnerIndexSettingsDiff>,
|
||||||
max_positions_per_attributes: Option<u32>,
|
max_positions_per_attributes: Option<u32>,
|
||||||
|
possible_embedding_mistakes: Arc<PossibleEmbeddingMistakes>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let (original_pipeline_result, flattened_pipeline_result): (Result<_>, Result<_>) = rayon::join(
|
let (original_pipeline_result, flattened_pipeline_result): (Result<_>, Result<_>) = rayon::join(
|
||||||
|| {
|
|| {
|
||||||
@ -59,6 +61,7 @@ pub(crate) fn data_from_obkv_documents(
|
|||||||
lmdb_writer_sx.clone(),
|
lmdb_writer_sx.clone(),
|
||||||
embedders_configs.clone(),
|
embedders_configs.clone(),
|
||||||
settings_diff.clone(),
|
settings_diff.clone(),
|
||||||
|
possible_embedding_mistakes.clone(),
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
.collect::<Result<()>>()
|
.collect::<Result<()>>()
|
||||||
@ -227,6 +230,7 @@ fn send_original_documents_data(
|
|||||||
lmdb_writer_sx: Sender<Result<TypedChunk>>,
|
lmdb_writer_sx: Sender<Result<TypedChunk>>,
|
||||||
embedders_configs: Arc<Vec<IndexEmbeddingConfig>>,
|
embedders_configs: Arc<Vec<IndexEmbeddingConfig>>,
|
||||||
settings_diff: Arc<InnerIndexSettingsDiff>,
|
settings_diff: Arc<InnerIndexSettingsDiff>,
|
||||||
|
possible_embedding_mistakes: Arc<PossibleEmbeddingMistakes>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let original_documents_chunk =
|
let original_documents_chunk =
|
||||||
original_documents_chunk.and_then(|c| unsafe { as_cloneable_grenad(&c) })?;
|
original_documents_chunk.and_then(|c| unsafe { as_cloneable_grenad(&c) })?;
|
||||||
@ -248,7 +252,7 @@ fn send_original_documents_data(
|
|||||||
&embedders_configs,
|
&embedders_configs,
|
||||||
&settings_diff,
|
&settings_diff,
|
||||||
) {
|
) {
|
||||||
Ok(extracted_vectors) => {
|
Ok((extracted_vectors, unused_vectors_distribution)) => {
|
||||||
for ExtractedVectorPoints {
|
for ExtractedVectorPoints {
|
||||||
manual_vectors,
|
manual_vectors,
|
||||||
remove_vectors,
|
remove_vectors,
|
||||||
@ -263,6 +267,9 @@ fn send_original_documents_data(
|
|||||||
prompts,
|
prompts,
|
||||||
indexer,
|
indexer,
|
||||||
embedder.clone(),
|
embedder.clone(),
|
||||||
|
&embedder_name,
|
||||||
|
&possible_embedding_mistakes,
|
||||||
|
&unused_vectors_distribution,
|
||||||
request_threads(),
|
request_threads(),
|
||||||
) {
|
) {
|
||||||
Ok(results) => Some(results),
|
Ok(results) => Some(results),
|
||||||
|
@ -427,6 +427,9 @@ where
|
|||||||
let settings_diff = Arc::new(settings_diff);
|
let settings_diff = Arc::new(settings_diff);
|
||||||
let embedders_configs = Arc::new(self.index.embedding_configs(self.wtxn)?);
|
let embedders_configs = Arc::new(self.index.embedding_configs(self.wtxn)?);
|
||||||
|
|
||||||
|
let possible_embedding_mistakes =
|
||||||
|
crate::vector::error::PossibleEmbeddingMistakes::new(&field_distribution);
|
||||||
|
|
||||||
let backup_pool;
|
let backup_pool;
|
||||||
let pool = match self.indexer_config.thread_pool {
|
let pool = match self.indexer_config.thread_pool {
|
||||||
Some(ref pool) => pool,
|
Some(ref pool) => pool,
|
||||||
@ -542,6 +545,7 @@ where
|
|||||||
embedders_configs.clone(),
|
embedders_configs.clone(),
|
||||||
settings_diff_cloned,
|
settings_diff_cloned,
|
||||||
max_positions_per_attributes,
|
max_positions_per_attributes,
|
||||||
|
Arc::new(possible_embedding_mistakes)
|
||||||
)
|
)
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -1574,7 +1574,6 @@ pub fn validate_embedding_settings(
|
|||||||
EmbedderSource::OpenAi => {
|
EmbedderSource::OpenAi => {
|
||||||
check_unset(&revision, EmbeddingSettings::REVISION, inferred_source, name)?;
|
check_unset(&revision, EmbeddingSettings::REVISION, inferred_source, name)?;
|
||||||
|
|
||||||
check_unset(&url, EmbeddingSettings::URL, inferred_source, name)?;
|
|
||||||
check_unset(&query, EmbeddingSettings::QUERY, inferred_source, name)?;
|
check_unset(&query, EmbeddingSettings::QUERY, inferred_source, name)?;
|
||||||
check_unset(&input_field, EmbeddingSettings::INPUT_FIELD, inferred_source, name)?;
|
check_unset(&input_field, EmbeddingSettings::INPUT_FIELD, inferred_source, name)?;
|
||||||
check_unset(
|
check_unset(
|
||||||
|
@ -1,9 +1,11 @@
|
|||||||
|
use std::collections::BTreeMap;
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
|
|
||||||
use hf_hub::api::sync::ApiError;
|
use hf_hub::api::sync::ApiError;
|
||||||
|
|
||||||
|
use super::parsed_vectors::ParsedVectorsDiff;
|
||||||
use crate::error::FaultSource;
|
use crate::error::FaultSource;
|
||||||
use crate::PanicCatched;
|
use crate::{FieldDistribution, PanicCatched};
|
||||||
|
|
||||||
#[derive(Debug, thiserror::Error)]
|
#[derive(Debug, thiserror::Error)]
|
||||||
#[error("Error while generating embeddings: {inner}")]
|
#[error("Error while generating embeddings: {inner}")]
|
||||||
@ -310,3 +312,68 @@ pub enum NewEmbedderErrorKind {
|
|||||||
#[error("loading model failed: {0}")]
|
#[error("loading model failed: {0}")]
|
||||||
LoadModel(candle_core::Error),
|
LoadModel(candle_core::Error),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub struct PossibleEmbeddingMistakes {
|
||||||
|
vectors_mistakes: BTreeMap<String, u64>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PossibleEmbeddingMistakes {
|
||||||
|
pub fn new(field_distribution: &FieldDistribution) -> Self {
|
||||||
|
let mut vectors_mistakes = BTreeMap::new();
|
||||||
|
let builder = levenshtein_automata::LevenshteinAutomatonBuilder::new(2, true);
|
||||||
|
let automata = builder.build_dfa("_vectors");
|
||||||
|
for (field, count) in field_distribution {
|
||||||
|
if *count == 0 {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if field.contains('.') {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
match automata.eval(field) {
|
||||||
|
levenshtein_automata::Distance::Exact(0) => continue,
|
||||||
|
levenshtein_automata::Distance::Exact(_) => {
|
||||||
|
vectors_mistakes.insert(field.to_string(), *count);
|
||||||
|
}
|
||||||
|
levenshtein_automata::Distance::AtLeast(_) => continue,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Self { vectors_mistakes }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn vector_mistakes(&self) -> impl Iterator<Item = (&str, u64)> {
|
||||||
|
self.vectors_mistakes.iter().map(|(misspelling, count)| (misspelling.as_str(), *count))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn embedder_mistakes<'a>(
|
||||||
|
&'a self,
|
||||||
|
embedder_name: &'a str,
|
||||||
|
unused_vectors_distributions: &'a UnusedVectorsDistribution,
|
||||||
|
) -> impl Iterator<Item = (&'a str, u64)> + 'a {
|
||||||
|
let builder = levenshtein_automata::LevenshteinAutomatonBuilder::new(2, true);
|
||||||
|
let automata = builder.build_dfa(embedder_name);
|
||||||
|
|
||||||
|
unused_vectors_distributions.0.iter().filter_map(move |(field, count)| {
|
||||||
|
match automata.eval(field) {
|
||||||
|
levenshtein_automata::Distance::Exact(0) => None,
|
||||||
|
levenshtein_automata::Distance::Exact(_) => Some((field.as_str(), *count)),
|
||||||
|
levenshtein_automata::Distance::AtLeast(_) => None,
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Default)]
|
||||||
|
pub struct UnusedVectorsDistribution(BTreeMap<String, u64>);
|
||||||
|
|
||||||
|
impl UnusedVectorsDistribution {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self::default()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn append(&mut self, parsed_vectors_diff: ParsedVectorsDiff) {
|
||||||
|
for name in parsed_vectors_diff.into_new_vectors_keys_iter() {
|
||||||
|
*self.0.entry(name).or_default() += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -20,7 +20,7 @@ impl Embedder {
|
|||||||
|
|
||||||
pub fn embed(&self, mut texts: Vec<String>) -> Result<Vec<Embeddings<f32>>, EmbedError> {
|
pub fn embed(&self, mut texts: Vec<String>) -> Result<Vec<Embeddings<f32>>, EmbedError> {
|
||||||
let Some(text) = texts.pop() else { return Ok(Default::default()) };
|
let Some(text) = texts.pop() else { return Ok(Default::default()) };
|
||||||
Err(EmbedError::embed_on_manual_embedder(text))
|
Err(EmbedError::embed_on_manual_embedder(text.chars().take(250).collect()))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn dimensions(&self) -> usize {
|
pub fn dimensions(&self) -> usize {
|
||||||
|
@ -10,6 +10,7 @@ use crate::ThreadPoolNoAbort;
|
|||||||
|
|
||||||
#[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
|
#[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
|
||||||
pub struct EmbedderOptions {
|
pub struct EmbedderOptions {
|
||||||
|
pub url: Option<String>,
|
||||||
pub api_key: Option<String>,
|
pub api_key: Option<String>,
|
||||||
pub embedding_model: EmbeddingModel,
|
pub embedding_model: EmbeddingModel,
|
||||||
pub dimensions: Option<usize>,
|
pub dimensions: Option<usize>,
|
||||||
@ -146,11 +147,13 @@ pub const OPENAI_EMBEDDINGS_URL: &str = "https://api.openai.com/v1/embeddings";
|
|||||||
|
|
||||||
impl EmbedderOptions {
|
impl EmbedderOptions {
|
||||||
pub fn with_default_model(api_key: Option<String>) -> Self {
|
pub fn with_default_model(api_key: Option<String>) -> Self {
|
||||||
Self { api_key, embedding_model: Default::default(), dimensions: None, distribution: None }
|
Self {
|
||||||
}
|
api_key,
|
||||||
|
embedding_model: Default::default(),
|
||||||
pub fn with_embedding_model(api_key: Option<String>, embedding_model: EmbeddingModel) -> Self {
|
dimensions: None,
|
||||||
Self { api_key, embedding_model, dimensions: None, distribution: None }
|
distribution: None,
|
||||||
|
url: None,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -175,11 +178,13 @@ impl Embedder {
|
|||||||
&inferred_api_key
|
&inferred_api_key
|
||||||
});
|
});
|
||||||
|
|
||||||
|
let url = options.url.as_deref().unwrap_or(OPENAI_EMBEDDINGS_URL).to_owned();
|
||||||
|
|
||||||
let rest_embedder = RestEmbedder::new(RestEmbedderOptions {
|
let rest_embedder = RestEmbedder::new(RestEmbedderOptions {
|
||||||
api_key: Some(api_key.clone()),
|
api_key: Some(api_key.clone()),
|
||||||
distribution: None,
|
distribution: None,
|
||||||
dimensions: Some(options.dimensions()),
|
dimensions: Some(options.dimensions()),
|
||||||
url: OPENAI_EMBEDDINGS_URL.to_owned(),
|
url,
|
||||||
query: options.query(),
|
query: options.query(),
|
||||||
input_field: vec!["input".to_owned()],
|
input_field: vec!["input".to_owned()],
|
||||||
input_type: crate::vector::rest::InputType::TextArray,
|
input_type: crate::vector::rest::InputType::TextArray,
|
||||||
@ -205,7 +210,6 @@ impl Embedder {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn try_embed_tokenized(&self, text: &[String]) -> Result<Vec<Embeddings<f32>>, EmbedError> {
|
fn try_embed_tokenized(&self, text: &[String]) -> Result<Vec<Embeddings<f32>>, EmbedError> {
|
||||||
pub const OVERLAP_SIZE: usize = 200;
|
|
||||||
let mut all_embeddings = Vec::with_capacity(text.len());
|
let mut all_embeddings = Vec::with_capacity(text.len());
|
||||||
for text in text {
|
for text in text {
|
||||||
let max_token_count = self.options.embedding_model.max_token();
|
let max_token_count = self.options.embedding_model.max_token();
|
||||||
@ -216,21 +220,10 @@ impl Embedder {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut tokens = encoded.as_slice();
|
let tokens = &encoded.as_slice()[0..max_token_count];
|
||||||
let mut embeddings_for_prompt = Embeddings::new(self.dimensions());
|
let mut embeddings_for_prompt = Embeddings::new(self.dimensions());
|
||||||
while tokens.len() > max_token_count {
|
|
||||||
let window = &tokens[..max_token_count];
|
|
||||||
let embedding = self.rest_embedder.embed_tokens(window)?;
|
|
||||||
embeddings_for_prompt.append(embedding.into_inner()).map_err(|got| {
|
|
||||||
EmbedError::openai_unexpected_dimension(self.dimensions(), got.len())
|
|
||||||
})?;
|
|
||||||
|
|
||||||
tokens = &tokens[max_token_count - OVERLAP_SIZE..];
|
|
||||||
}
|
|
||||||
|
|
||||||
// end of text
|
|
||||||
let embedding = self.rest_embedder.embed_tokens(tokens)?;
|
let embedding = self.rest_embedder.embed_tokens(tokens)?;
|
||||||
|
|
||||||
embeddings_for_prompt.append(embedding.into_inner()).map_err(|got| {
|
embeddings_for_prompt.append(embedding.into_inner()).map_err(|got| {
|
||||||
EmbedError::openai_unexpected_dimension(self.dimensions(), got.len())
|
EmbedError::openai_unexpected_dimension(self.dimensions(), got.len())
|
||||||
})?;
|
})?;
|
||||||
|
@ -179,6 +179,15 @@ impl ParsedVectorsDiff {
|
|||||||
|
|
||||||
(old, new)
|
(old, new)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn into_new_vectors_keys_iter(self) -> impl Iterator<Item = String> {
|
||||||
|
let maybe_it = match self.new {
|
||||||
|
VectorsState::NoVectorsFid => None,
|
||||||
|
VectorsState::NoVectorsFieldInDocument => None,
|
||||||
|
VectorsState::Vectors(vectors) => Some(vectors.into_keys()),
|
||||||
|
};
|
||||||
|
maybe_it.into_iter().flatten()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct ParsedVectors(pub BTreeMap<String, Vectors>);
|
pub struct ParsedVectors(pub BTreeMap<String, Vectors>);
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
use deserr::Deserr;
|
use deserr::Deserr;
|
||||||
|
use rand::Rng;
|
||||||
use rayon::iter::{IntoParallelIterator as _, ParallelIterator as _};
|
use rayon::iter::{IntoParallelIterator as _, ParallelIterator as _};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
@ -264,7 +265,7 @@ where
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
for attempt in 0..7 {
|
for attempt in 0..10 {
|
||||||
let response = request.clone().send_json(&body);
|
let response = request.clone().send_json(&body);
|
||||||
let result = check_response(response);
|
let result = check_response(response);
|
||||||
|
|
||||||
@ -277,6 +278,11 @@ where
|
|||||||
}?;
|
}?;
|
||||||
|
|
||||||
let retry_duration = retry_duration.min(std::time::Duration::from_secs(60)); // don't wait more than a minute
|
let retry_duration = retry_duration.min(std::time::Duration::from_secs(60)); // don't wait more than a minute
|
||||||
|
|
||||||
|
// randomly up to double the retry duration
|
||||||
|
let retry_duration = retry_duration
|
||||||
|
+ rand::thread_rng().gen_range(std::time::Duration::ZERO..retry_duration);
|
||||||
|
|
||||||
tracing::warn!("Attempt #{}, retrying after {}ms.", attempt, retry_duration.as_millis());
|
tracing::warn!("Attempt #{}, retrying after {}ms.", attempt, retry_duration.as_millis());
|
||||||
std::thread::sleep(retry_duration);
|
std::thread::sleep(retry_duration);
|
||||||
}
|
}
|
||||||
|
@ -166,7 +166,16 @@ impl SettingsDiff {
|
|||||||
ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex);
|
ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex);
|
||||||
}
|
}
|
||||||
if url.apply(new_url) {
|
if url.apply(new_url) {
|
||||||
ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex);
|
match source {
|
||||||
|
// do not regenerate on an url change in OpenAI
|
||||||
|
Setting::Set(EmbedderSource::OpenAi) | Setting::Reset => {}
|
||||||
|
_ => {
|
||||||
|
ReindexAction::push_action(
|
||||||
|
&mut reindex_action,
|
||||||
|
ReindexAction::FullReindex,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if query.apply(new_query) {
|
if query.apply(new_query) {
|
||||||
ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex);
|
ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex);
|
||||||
@ -271,7 +280,7 @@ fn apply_default_for_source(
|
|||||||
*model = Setting::Reset;
|
*model = Setting::Reset;
|
||||||
*revision = Setting::NotSet;
|
*revision = Setting::NotSet;
|
||||||
*dimensions = Setting::NotSet;
|
*dimensions = Setting::NotSet;
|
||||||
*url = Setting::NotSet;
|
*url = Setting::Reset;
|
||||||
*query = Setting::NotSet;
|
*query = Setting::NotSet;
|
||||||
*input_field = Setting::NotSet;
|
*input_field = Setting::NotSet;
|
||||||
*path_to_embeddings = Setting::NotSet;
|
*path_to_embeddings = Setting::NotSet;
|
||||||
@ -364,7 +373,7 @@ impl EmbeddingSettings {
|
|||||||
EmbedderSource::Ollama,
|
EmbedderSource::Ollama,
|
||||||
EmbedderSource::Rest,
|
EmbedderSource::Rest,
|
||||||
],
|
],
|
||||||
Self::URL => &[EmbedderSource::Ollama, EmbedderSource::Rest],
|
Self::URL => &[EmbedderSource::Ollama, EmbedderSource::Rest, EmbedderSource::OpenAi],
|
||||||
Self::QUERY => &[EmbedderSource::Rest],
|
Self::QUERY => &[EmbedderSource::Rest],
|
||||||
Self::INPUT_FIELD => &[EmbedderSource::Rest],
|
Self::INPUT_FIELD => &[EmbedderSource::Rest],
|
||||||
Self::PATH_TO_EMBEDDINGS => &[EmbedderSource::Rest],
|
Self::PATH_TO_EMBEDDINGS => &[EmbedderSource::Rest],
|
||||||
@ -390,6 +399,7 @@ impl EmbeddingSettings {
|
|||||||
Self::DOCUMENT_TEMPLATE,
|
Self::DOCUMENT_TEMPLATE,
|
||||||
Self::DIMENSIONS,
|
Self::DIMENSIONS,
|
||||||
Self::DISTRIBUTION,
|
Self::DISTRIBUTION,
|
||||||
|
Self::URL,
|
||||||
],
|
],
|
||||||
EmbedderSource::HuggingFace => &[
|
EmbedderSource::HuggingFace => &[
|
||||||
Self::SOURCE,
|
Self::SOURCE,
|
||||||
@ -494,6 +504,7 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
|
|||||||
distribution: distribution.map(Setting::Set).unwrap_or_default(),
|
distribution: distribution.map(Setting::Set).unwrap_or_default(),
|
||||||
},
|
},
|
||||||
super::EmbedderOptions::OpenAi(super::openai::EmbedderOptions {
|
super::EmbedderOptions::OpenAi(super::openai::EmbedderOptions {
|
||||||
|
url,
|
||||||
api_key,
|
api_key,
|
||||||
embedding_model,
|
embedding_model,
|
||||||
dimensions,
|
dimensions,
|
||||||
@ -505,7 +516,7 @@ impl From<EmbeddingConfig> for EmbeddingSettings {
|
|||||||
api_key: api_key.map(Setting::Set).unwrap_or_default(),
|
api_key: api_key.map(Setting::Set).unwrap_or_default(),
|
||||||
dimensions: dimensions.map(Setting::Set).unwrap_or_default(),
|
dimensions: dimensions.map(Setting::Set).unwrap_or_default(),
|
||||||
document_template: Setting::Set(prompt.template),
|
document_template: Setting::Set(prompt.template),
|
||||||
url: Setting::NotSet,
|
url: url.map(Setting::Set).unwrap_or_default(),
|
||||||
query: Setting::NotSet,
|
query: Setting::NotSet,
|
||||||
input_field: Setting::NotSet,
|
input_field: Setting::NotSet,
|
||||||
path_to_embeddings: Setting::NotSet,
|
path_to_embeddings: Setting::NotSet,
|
||||||
@ -608,6 +619,9 @@ impl From<EmbeddingSettings> for EmbeddingConfig {
|
|||||||
options.embedding_model = model;
|
options.embedding_model = model;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if let Some(url) = url.set() {
|
||||||
|
options.url = Some(url);
|
||||||
|
}
|
||||||
if let Some(api_key) = api_key.set() {
|
if let Some(api_key) = api_key.set() {
|
||||||
options.api_key = Some(api_key);
|
options.api_key = Some(api_key);
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user