Various changes

- fixed seed for arroy
- check vector dimensions as soon as it is provided to search
- don't embed whitespace
This commit is contained in:
Louis Dureuil 2023-12-14 16:01:35 +01:00
parent 217105b7da
commit 87bba98bd8
No known key found for this signature in database
9 changed files with 148 additions and 51 deletions

View file

@ -4,7 +4,7 @@ use reqwest::StatusCode;
use serde::{Deserialize, Serialize};
use super::error::{EmbedError, NewEmbedderError};
use super::{Embedding, Embeddings};
use super::{DistributionShift, Embedding, Embeddings};
#[derive(Debug)]
pub struct Embedder {
@ -65,6 +65,14 @@ impl EmbeddingModel {
_ => None,
}
}
fn distribution(&self) -> Option<DistributionShift> {
match self {
EmbeddingModel::TextEmbeddingAda002 => {
Some(DistributionShift { current_mean: 0.90, current_sigma: 0.08 })
}
}
}
}
pub const OPENAI_EMBEDDINGS_URL: &str = "https://api.openai.com/v1/embeddings";
@ -326,6 +334,10 @@ impl Embedder {
pub fn dimensions(&self) -> usize {
self.options.embedding_model.dimensions()
}
pub fn distribution(&self) -> Option<DistributionShift> {
self.options.embedding_model.distribution()
}
}
// retrying in case of failure