Various changes

- fixed seed for arroy
- check vector dimensions as soon as it is provided to search
- don't embed whitespace
This commit is contained in:
Louis Dureuil 2023-12-14 16:01:35 +01:00
parent 217105b7da
commit 87bba98bd8
No known key found for this signature in database
9 changed files with 148 additions and 51 deletions

View file

@ -7,7 +7,7 @@ use hf_hub::{Repo, RepoType};
use tokenizers::{PaddingParams, Tokenizer};
pub use super::error::{EmbedError, Error, NewEmbedderError};
use super::{Embedding, Embeddings};
use super::{DistributionShift, Embedding, Embeddings};
#[derive(
Debug,
@ -184,4 +184,12 @@ impl Embedder {
pub fn dimensions(&self) -> usize {
self.dimensions
}
pub fn distribution(&self) -> Option<DistributionShift> {
if self.options.model == "BAAI/bge-base-en-v1.5" {
Some(DistributionShift { current_mean: 0.85, current_sigma: 0.1 })
} else {
None
}
}
}