mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 20:37:15 +02:00
Various changes
- fixed seed for arroy - check vector dimensions as soon as it is provided to search - don't embed whitespace
This commit is contained in:
parent
217105b7da
commit
87bba98bd8
9 changed files with 148 additions and 51 deletions
|
@ -7,7 +7,7 @@ use hf_hub::{Repo, RepoType};
|
|||
use tokenizers::{PaddingParams, Tokenizer};
|
||||
|
||||
pub use super::error::{EmbedError, Error, NewEmbedderError};
|
||||
use super::{Embedding, Embeddings};
|
||||
use super::{DistributionShift, Embedding, Embeddings};
|
||||
|
||||
#[derive(
|
||||
Debug,
|
||||
|
@ -184,4 +184,12 @@ impl Embedder {
|
|||
pub fn dimensions(&self) -> usize {
|
||||
self.dimensions
|
||||
}
|
||||
|
||||
pub fn distribution(&self) -> Option<DistributionShift> {
|
||||
if self.options.model == "BAAI/bge-base-en-v1.5" {
|
||||
Some(DistributionShift { current_mean: 0.85, current_sigma: 0.1 })
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -202,6 +202,14 @@ impl Embedder {
|
|||
Embedder::UserProvided(embedder) => embedder.dimensions(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn distribution(&self) -> Option<DistributionShift> {
|
||||
match self {
|
||||
Embedder::HuggingFace(embedder) => embedder.distribution(),
|
||||
Embedder::OpenAi(embedder) => embedder.distribution(),
|
||||
Embedder::UserProvided(_embedder) => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
|
|
|
@ -4,7 +4,7 @@ use reqwest::StatusCode;
|
|||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use super::error::{EmbedError, NewEmbedderError};
|
||||
use super::{Embedding, Embeddings};
|
||||
use super::{DistributionShift, Embedding, Embeddings};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Embedder {
|
||||
|
@ -65,6 +65,14 @@ impl EmbeddingModel {
|
|||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
fn distribution(&self) -> Option<DistributionShift> {
|
||||
match self {
|
||||
EmbeddingModel::TextEmbeddingAda002 => {
|
||||
Some(DistributionShift { current_mean: 0.90, current_sigma: 0.08 })
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub const OPENAI_EMBEDDINGS_URL: &str = "https://api.openai.com/v1/embeddings";
|
||||
|
@ -326,6 +334,10 @@ impl Embedder {
|
|||
pub fn dimensions(&self) -> usize {
|
||||
self.options.embedding_model.dimensions()
|
||||
}
|
||||
|
||||
pub fn distribution(&self) -> Option<DistributionShift> {
|
||||
self.options.embedding_model.distribution()
|
||||
}
|
||||
}
|
||||
|
||||
// retrying in case of failure
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue