Disable the cache by default and make it experimental

This commit is contained in:
Louis Dureuil 2025-03-13 14:54:31 +01:00
parent 1876132172
commit e2d372823a
No known key found for this signature in database
14 changed files with 101 additions and 34 deletions

View file

@ -2806,8 +2806,9 @@ mod tests {
embedding_configs.pop().unwrap();
insta::assert_snapshot!(embedder_name, @"manual");
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[0, 1, 2]>");
let embedder =
std::sync::Arc::new(crate::vector::Embedder::new(embedder.embedder_options).unwrap());
let embedder = std::sync::Arc::new(
crate::vector::Embedder::new(embedder.embedder_options, 0).unwrap(),
);
let res = index
.search(&rtxn)
.semantic(embedder_name, embedder, false, Some([0.0, 1.0, 2.0].to_vec()))

View file

@ -1628,7 +1628,8 @@ fn embedders(embedding_configs: Vec<IndexEmbeddingConfig>) -> Result<EmbeddingCo
let prompt = Arc::new(prompt.try_into().map_err(crate::Error::from)?);
let embedder = Arc::new(
Embedder::new(embedder_options.clone())
// cache_cap: no cache needed for indexing purposes
Embedder::new(embedder_options.clone(), 0)
.map_err(crate::vector::Error::from)
.map_err(crate::Error::from)?,
);

View file

@ -59,9 +59,11 @@ pub struct EmbedderOptions {
impl Embedder {
pub fn new(
EmbedderOptions { search, index }: EmbedderOptions,
cache_cap: usize,
) -> Result<Self, NewEmbedderError> {
let search = SubEmbedder::new(search)?;
let index = SubEmbedder::new(index)?;
let search = SubEmbedder::new(search, cache_cap)?;
// cache is only used at search
let index = SubEmbedder::new(index, 0)?;
// check dimensions
if search.dimensions() != index.dimensions() {
@ -119,19 +121,28 @@ impl Embedder {
}
impl SubEmbedder {
pub fn new(options: SubEmbedderOptions) -> std::result::Result<Self, NewEmbedderError> {
pub fn new(
options: SubEmbedderOptions,
cache_cap: usize,
) -> std::result::Result<Self, NewEmbedderError> {
Ok(match options {
SubEmbedderOptions::HuggingFace(options) => {
Self::HuggingFace(hf::Embedder::new(options)?)
Self::HuggingFace(hf::Embedder::new(options, cache_cap)?)
}
SubEmbedderOptions::OpenAi(options) => {
Self::OpenAi(openai::Embedder::new(options, cache_cap)?)
}
SubEmbedderOptions::Ollama(options) => {
Self::Ollama(ollama::Embedder::new(options, cache_cap)?)
}
SubEmbedderOptions::OpenAi(options) => Self::OpenAi(openai::Embedder::new(options)?),
SubEmbedderOptions::Ollama(options) => Self::Ollama(ollama::Embedder::new(options)?),
SubEmbedderOptions::UserProvided(options) => {
Self::UserProvided(manual::Embedder::new(options))
}
SubEmbedderOptions::Rest(options) => {
Self::Rest(rest::Embedder::new(options, rest::ConfigurationSource::User)?)
}
SubEmbedderOptions::Rest(options) => Self::Rest(rest::Embedder::new(
options,
cache_cap,
rest::ConfigurationSource::User,
)?),
})
}

View file

@ -150,7 +150,10 @@ impl From<PoolingConfig> for Pooling {
}
impl Embedder {
pub fn new(options: EmbedderOptions) -> std::result::Result<Self, NewEmbedderError> {
pub fn new(
options: EmbedderOptions,
cache_cap: usize,
) -> std::result::Result<Self, NewEmbedderError> {
let device = match candle_core::Device::cuda_if_available(0) {
Ok(device) => device,
Err(error) => {
@ -252,7 +255,7 @@ impl Embedder {
options,
dimensions: 0,
pooling,
cache: EmbeddingCache::new(super::CACHE_CAP),
cache: EmbeddingCache::new(cache_cap),
};
let embeddings = this

View file

@ -560,8 +560,8 @@ struct EmbeddingCache {
impl EmbeddingCache {
const MAX_TEXT_LEN: usize = 2000;
pub fn new(cap: u16) -> Self {
let data = NonZeroUsize::new(cap.into()).map(lru::LruCache::new).map(Mutex::new);
pub fn new(cap: usize) -> Self {
let data = NonZeroUsize::new(cap).map(lru::LruCache::new).map(Mutex::new);
Self { data }
}
@ -584,14 +584,14 @@ impl EmbeddingCache {
if text.len() > Self::MAX_TEXT_LEN {
return;
}
tracing::trace!(text, "embedding added to cache");
let mut cache = data.lock().unwrap();
cache.put(text, embedding);
}
}
pub const CACHE_CAP: u16 = 150;
/// Configuration for an embedder.
#[derive(Debug, Clone, Default, serde::Deserialize, serde::Serialize)]
pub struct EmbeddingConfig {
@ -670,19 +670,30 @@ impl Default for EmbedderOptions {
impl Embedder {
/// Spawns a new embedder built from its options.
pub fn new(options: EmbedderOptions) -> std::result::Result<Self, NewEmbedderError> {
pub fn new(
options: EmbedderOptions,
cache_cap: usize,
) -> std::result::Result<Self, NewEmbedderError> {
Ok(match options {
EmbedderOptions::HuggingFace(options) => Self::HuggingFace(hf::Embedder::new(options)?),
EmbedderOptions::OpenAi(options) => Self::OpenAi(openai::Embedder::new(options)?),
EmbedderOptions::Ollama(options) => Self::Ollama(ollama::Embedder::new(options)?),
EmbedderOptions::HuggingFace(options) => {
Self::HuggingFace(hf::Embedder::new(options, cache_cap)?)
}
EmbedderOptions::OpenAi(options) => {
Self::OpenAi(openai::Embedder::new(options, cache_cap)?)
}
EmbedderOptions::Ollama(options) => {
Self::Ollama(ollama::Embedder::new(options, cache_cap)?)
}
EmbedderOptions::UserProvided(options) => {
Self::UserProvided(manual::Embedder::new(options))
}
EmbedderOptions::Rest(options) => {
Self::Rest(rest::Embedder::new(options, rest::ConfigurationSource::User)?)
}
EmbedderOptions::Rest(options) => Self::Rest(rest::Embedder::new(
options,
cache_cap,
rest::ConfigurationSource::User,
)?),
EmbedderOptions::Composite(options) => {
Self::Composite(composite::Embedder::new(options)?)
Self::Composite(composite::Embedder::new(options, cache_cap)?)
}
})
}
@ -718,7 +729,6 @@ impl Embedder {
}?;
if let Some(cache) = self.cache() {
tracing::trace!(text, "embedding added to cache");
cache.put(text.to_owned(), embedding.clone());
}

View file

@ -75,9 +75,10 @@ impl EmbedderOptions {
}
impl Embedder {
pub fn new(options: EmbedderOptions) -> Result<Self, NewEmbedderError> {
pub fn new(options: EmbedderOptions, cache_cap: usize) -> Result<Self, NewEmbedderError> {
let rest_embedder = match RestEmbedder::new(
options.into_rest_embedder_config()?,
cache_cap,
super::rest::ConfigurationSource::Ollama,
) {
Ok(embedder) => embedder,

View file

@ -176,7 +176,7 @@ pub struct Embedder {
}
impl Embedder {
pub fn new(options: EmbedderOptions) -> Result<Self, NewEmbedderError> {
pub fn new(options: EmbedderOptions, cache_cap: usize) -> Result<Self, NewEmbedderError> {
let mut inferred_api_key = Default::default();
let api_key = options.api_key.as_ref().unwrap_or_else(|| {
inferred_api_key = infer_api_key();
@ -201,6 +201,7 @@ impl Embedder {
}),
headers: Default::default(),
},
cache_cap,
super::rest::ConfigurationSource::OpenAi,
)?;

View file

@ -10,8 +10,7 @@ use serde::{Deserialize, Serialize};
use super::error::EmbedErrorKind;
use super::json_template::ValueTemplate;
use super::{
DistributionShift, EmbedError, Embedding, EmbeddingCache, NewEmbedderError, CACHE_CAP,
REQUEST_PARALLELISM,
DistributionShift, EmbedError, Embedding, EmbeddingCache, NewEmbedderError, REQUEST_PARALLELISM,
};
use crate::error::FaultSource;
use crate::ThreadPoolNoAbort;
@ -127,6 +126,7 @@ enum InputType {
impl Embedder {
pub fn new(
options: EmbedderOptions,
cache_cap: usize,
configuration_source: ConfigurationSource,
) -> Result<Self, NewEmbedderError> {
let bearer = options.api_key.as_deref().map(|api_key| format!("Bearer {api_key}"));
@ -160,7 +160,7 @@ impl Embedder {
data,
dimensions,
distribution: options.distribution,
cache: EmbeddingCache::new(CACHE_CAP),
cache: EmbeddingCache::new(cache_cap),
})
}