mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 04:17:10 +02:00
wraps the index embedding config in a struct
This commit is contained in:
parent
04f6523f3c
commit
9eb6f522ea
7 changed files with 112 additions and 75 deletions
|
@ -785,6 +785,7 @@ mod tests {
|
|||
use super::*;
|
||||
use crate::documents::documents_batch_reader_from_objects;
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::index::IndexEmbeddingConfig;
|
||||
use crate::search::TermsMatchingStrategy;
|
||||
use crate::update::Setting;
|
||||
use crate::{db_snap, Filter, Search};
|
||||
|
@ -2620,7 +2621,8 @@ mod tests {
|
|||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let mut embedding_configs = index.embedding_configs(&rtxn).unwrap();
|
||||
let (embedder_name, embedder, user_defined) = embedding_configs.pop().unwrap();
|
||||
let IndexEmbeddingConfig { name: embedder_name, config: embedder, user_defined } =
|
||||
embedding_configs.pop().unwrap();
|
||||
insta::assert_snapshot!(embedder_name, @"manual");
|
||||
insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[0, 1, 2]>");
|
||||
let embedder =
|
||||
|
|
|
@ -20,6 +20,7 @@ use super::MergeFn;
|
|||
use crate::external_documents_ids::{DocumentOperation, DocumentOperationKind};
|
||||
use crate::facet::FacetType;
|
||||
use crate::index::db_name::DOCUMENTS;
|
||||
use crate::index::IndexEmbeddingConfig;
|
||||
use crate::proximity::MAX_DISTANCE;
|
||||
use crate::update::del_add::{deladd_serialize_add_side, DelAdd, KvReaderDelAdd};
|
||||
use crate::update::facet::FacetsUpdate;
|
||||
|
@ -156,8 +157,11 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||
let mut docids = index.documents_ids(wtxn)?;
|
||||
let mut iter = merger.into_stream_merger_iter()?;
|
||||
|
||||
let embedders: BTreeSet<_> =
|
||||
index.embedding_configs(wtxn)?.into_iter().map(|(name, _, _)| name).collect();
|
||||
let embedders: BTreeSet<_> = index
|
||||
.embedding_configs(wtxn)?
|
||||
.into_iter()
|
||||
.map(|IndexEmbeddingConfig { name, .. }| name)
|
||||
.collect();
|
||||
let mut vectors_buffer = Vec::new();
|
||||
while let Some((key, reader)) = iter.next()? {
|
||||
let mut writer: KvWriter<_, FieldId> = KvWriter::memory();
|
||||
|
@ -653,10 +657,12 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||
let Some((expected_dimension, embedder_name)) = params else { unreachable!() };
|
||||
|
||||
let mut embedding_configs = index.embedding_configs(&wtxn)?;
|
||||
let (_name, _conf, ud) =
|
||||
embedding_configs.iter_mut().find(|config| config.0 == embedder_name).unwrap();
|
||||
*ud -= remove_from_user_defined;
|
||||
*ud |= user_defined;
|
||||
let index_embedder_config = embedding_configs
|
||||
.iter_mut()
|
||||
.find(|IndexEmbeddingConfig { name, .. }| name == &embedder_name)
|
||||
.unwrap();
|
||||
index_embedder_config.user_defined -= remove_from_user_defined;
|
||||
index_embedder_config.user_defined |= user_defined;
|
||||
|
||||
index.put_embedding_configs(wtxn, embedding_configs)?;
|
||||
|
||||
|
|
|
@ -15,7 +15,9 @@ use super::index_documents::{IndexDocumentsConfig, Transform};
|
|||
use super::IndexerConfig;
|
||||
use crate::criterion::Criterion;
|
||||
use crate::error::UserError;
|
||||
use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS};
|
||||
use crate::index::{
|
||||
IndexEmbeddingConfig, DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS,
|
||||
};
|
||||
use crate::order_by_map::OrderByMap;
|
||||
use crate::proximity::ProximityPrecision;
|
||||
use crate::update::index_documents::IndexDocumentsMethod;
|
||||
|
@ -930,8 +932,8 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||
let old_configs: BTreeMap<String, (Setting<EmbeddingSettings>, RoaringBitmap)> =
|
||||
old_configs
|
||||
.into_iter()
|
||||
.map(|(name, setting, user_defined)| {
|
||||
(name, (Setting::Set(setting.into()), user_defined))
|
||||
.map(|IndexEmbeddingConfig { name, config, user_defined }| {
|
||||
(name, (Setting::Set(config.into()), user_defined))
|
||||
})
|
||||
.collect();
|
||||
|
||||
|
@ -975,23 +977,27 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||
}
|
||||
}
|
||||
}
|
||||
let new_configs: Vec<(String, EmbeddingConfig, RoaringBitmap)> = new_configs
|
||||
let new_configs: Vec<IndexEmbeddingConfig> = new_configs
|
||||
.into_iter()
|
||||
.filter_map(|(name, (setting, user_defined))| match setting {
|
||||
Setting::Set(settings) => Some((name, settings.into(), user_defined)),
|
||||
Setting::Reset => None,
|
||||
Setting::NotSet => {
|
||||
Some((name, EmbeddingSettings::default().into(), user_defined))
|
||||
.filter_map(|(name, (config, user_defined))| match config {
|
||||
Setting::Set(config) => {
|
||||
Some(IndexEmbeddingConfig { name, config: config.into(), user_defined })
|
||||
}
|
||||
Setting::Reset => None,
|
||||
Setting::NotSet => Some(IndexEmbeddingConfig {
|
||||
name,
|
||||
config: EmbeddingSettings::default().into(),
|
||||
user_defined,
|
||||
}),
|
||||
})
|
||||
.collect();
|
||||
|
||||
self.index.embedder_category_id.clear(self.wtxn)?;
|
||||
for (index, (embedder_name, _, _)) in new_configs.iter().enumerate() {
|
||||
for (index, index_embedding_config) in new_configs.iter().enumerate() {
|
||||
self.index.embedder_category_id.put_with_flags(
|
||||
self.wtxn,
|
||||
heed::PutFlags::APPEND,
|
||||
embedder_name,
|
||||
&index_embedding_config.name,
|
||||
&index
|
||||
.try_into()
|
||||
.map_err(|_| UserError::TooManyEmbedders(new_configs.len()))?,
|
||||
|
@ -1371,21 +1377,25 @@ impl InnerIndexSettings {
|
|||
}
|
||||
}
|
||||
|
||||
fn embedders(
|
||||
embedding_configs: Vec<(String, EmbeddingConfig, RoaringBitmap)>,
|
||||
) -> Result<EmbeddingConfigs> {
|
||||
fn embedders(embedding_configs: Vec<IndexEmbeddingConfig>) -> Result<EmbeddingConfigs> {
|
||||
let res: Result<_> = embedding_configs
|
||||
.into_iter()
|
||||
.map(|(name, EmbeddingConfig { embedder_options, prompt }, _)| {
|
||||
let prompt = Arc::new(prompt.try_into().map_err(crate::Error::from)?);
|
||||
.map(
|
||||
|IndexEmbeddingConfig {
|
||||
name,
|
||||
config: EmbeddingConfig { embedder_options, prompt },
|
||||
..
|
||||
}| {
|
||||
let prompt = Arc::new(prompt.try_into().map_err(crate::Error::from)?);
|
||||
|
||||
let embedder = Arc::new(
|
||||
Embedder::new(embedder_options.clone())
|
||||
.map_err(crate::vector::Error::from)
|
||||
.map_err(crate::Error::from)?,
|
||||
);
|
||||
Ok((name, (embedder, prompt)))
|
||||
})
|
||||
let embedder = Arc::new(
|
||||
Embedder::new(embedder_options.clone())
|
||||
.map_err(crate::vector::Error::from)
|
||||
.map_err(crate::Error::from)?,
|
||||
);
|
||||
Ok((name, (embedder, prompt)))
|
||||
},
|
||||
)
|
||||
.collect();
|
||||
res.map(EmbeddingConfigs::new)
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue