Merge branch 'main' into indexer-edition-2024

This commit is contained in:
ManyTheFish 2024-09-25 07:37:32 +02:00
commit 974272f2e9
94 changed files with 8510 additions and 4616 deletions

View file

@ -15,14 +15,14 @@ use serde_json::Value;
use super::helpers::{create_writer, writer_into_reader, GrenadParameters};
use crate::error::FaultSource;
use crate::index::IndexEmbeddingConfig;
use crate::prompt::Prompt;
use crate::prompt::{FieldsIdsMapWithMetadata, Prompt};
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
use crate::update::settings::InnerIndexSettingsDiff;
use crate::vector::error::{EmbedErrorKind, PossibleEmbeddingMistakes, UnusedVectorsDistribution};
use crate::vector::parsed_vectors::{ParsedVectorsDiff, VectorState, RESERVED_VECTORS_FIELD_NAME};
use crate::vector::settings::{EmbedderAction, ReindexAction};
use crate::vector::settings::ReindexAction;
use crate::vector::{Embedder, Embeddings};
use crate::{try_split_array_at, DocumentId, FieldId, FieldsIdsMap, Result, ThreadPoolNoAbort};
use crate::{try_split_array_at, DocumentId, FieldId, Result, ThreadPoolNoAbort};
/// The length of the elements that are always in the buffer when inserting new values.
const TRUNCATE_SIZE: usize = size_of::<DocumentId>();
@ -189,7 +189,13 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
let reindex_vectors = settings_diff.reindex_vectors();
let old_fields_ids_map = &settings_diff.old.fields_ids_map;
let old_fields_ids_map =
FieldsIdsMapWithMetadata::new(old_fields_ids_map, &settings_diff.old.searchable_fields_ids);
let new_fields_ids_map = &settings_diff.new.fields_ids_map;
let new_fields_ids_map =
FieldsIdsMapWithMetadata::new(new_fields_ids_map, &settings_diff.new.searchable_fields_ids);
// the vector field id may have changed
let old_vectors_fid = old_fields_ids_map.id(RESERVED_VECTORS_FIELD_NAME);
@ -202,65 +208,65 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
if reindex_vectors {
for (name, action) in settings_diff.embedding_config_updates.iter() {
match action {
EmbedderAction::WriteBackToDocuments(_) => continue, // already deleted
EmbedderAction::Reindex(action) => {
let Some((embedder_name, (embedder, prompt))) = configs.remove_entry(name)
else {
tracing::error!(embedder = name, "Requested embedder config not found");
continue;
};
if let Some(action) = action.reindex() {
let Some((embedder_name, (embedder, prompt, _quantized))) =
configs.remove_entry(name)
else {
tracing::error!(embedder = name, "Requested embedder config not found");
continue;
};
// (docid, _index) -> KvWriterDelAdd -> Vector
let manual_vectors_writer = create_writer(
indexer.chunk_compression_type,
indexer.chunk_compression_level,
tempfile::tempfile()?,
);
// (docid, _index) -> KvWriterDelAdd -> Vector
let manual_vectors_writer = create_writer(
indexer.chunk_compression_type,
indexer.chunk_compression_level,
tempfile::tempfile()?,
);
// (docid) -> (prompt)
let prompts_writer = create_writer(
indexer.chunk_compression_type,
indexer.chunk_compression_level,
tempfile::tempfile()?,
);
// (docid) -> (prompt)
let prompts_writer = create_writer(
indexer.chunk_compression_type,
indexer.chunk_compression_level,
tempfile::tempfile()?,
);
// (docid) -> ()
let remove_vectors_writer = create_writer(
indexer.chunk_compression_type,
indexer.chunk_compression_level,
tempfile::tempfile()?,
);
// (docid) -> ()
let remove_vectors_writer = create_writer(
indexer.chunk_compression_type,
indexer.chunk_compression_level,
tempfile::tempfile()?,
);
let action = match action {
ReindexAction::FullReindex => ExtractionAction::SettingsFullReindex,
ReindexAction::RegeneratePrompts => {
let Some((_, old_prompt)) = old_configs.get(name) else {
tracing::error!(embedder = name, "Old embedder config not found");
continue;
};
let action = match action {
ReindexAction::FullReindex => ExtractionAction::SettingsFullReindex,
ReindexAction::RegeneratePrompts => {
let Some((_, old_prompt, _quantized)) = old_configs.get(name) else {
tracing::error!(embedder = name, "Old embedder config not found");
continue;
};
ExtractionAction::SettingsRegeneratePrompts { old_prompt }
}
};
ExtractionAction::SettingsRegeneratePrompts { old_prompt }
}
};
extractors.push(EmbedderVectorExtractor {
embedder_name,
embedder,
prompt,
prompts_writer,
remove_vectors_writer,
manual_vectors_writer,
add_to_user_provided: RoaringBitmap::new(),
action,
});
}
extractors.push(EmbedderVectorExtractor {
embedder_name,
embedder,
prompt,
prompts_writer,
remove_vectors_writer,
manual_vectors_writer,
add_to_user_provided: RoaringBitmap::new(),
action,
});
} else {
continue;
}
}
} else {
// document operation
for (embedder_name, (embedder, prompt)) in configs.into_iter() {
for (embedder_name, (embedder, prompt, _quantized)) in configs.into_iter() {
// (docid, _index) -> KvWriterDelAdd -> Vector
let manual_vectors_writer = create_writer(
indexer.chunk_compression_type,
@ -376,7 +382,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
);
continue;
}
regenerate_prompt(obkv, prompt, new_fields_ids_map)?
regenerate_prompt(obkv, prompt, &new_fields_ids_map)?
}
},
// prompt regeneration is only triggered for existing embedders
@ -393,7 +399,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
regenerate_if_prompt_changed(
obkv,
(old_prompt, prompt),
(old_fields_ids_map, new_fields_ids_map),
(&old_fields_ids_map, &new_fields_ids_map),
)?
} else {
// we can simply ignore user provided vectors as they are not regenerated and are
@ -409,7 +415,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
prompt,
(add_to_user_provided, remove_from_user_provided),
(old, new),
(old_fields_ids_map, new_fields_ids_map),
(&old_fields_ids_map, &new_fields_ids_map),
document_id,
embedder_name,
embedder_is_manual,
@ -479,7 +485,10 @@ fn extract_vector_document_diff(
prompt: &Prompt,
(add_to_user_provided, remove_from_user_provided): (&mut RoaringBitmap, &mut RoaringBitmap),
(old, new): (VectorState, VectorState),
(old_fields_ids_map, new_fields_ids_map): (&FieldsIdsMap, &FieldsIdsMap),
(old_fields_ids_map, new_fields_ids_map): (
&FieldsIdsMapWithMetadata,
&FieldsIdsMapWithMetadata,
),
document_id: impl Fn() -> Value,
embedder_name: &str,
embedder_is_manual: bool,
@ -599,7 +608,10 @@ fn extract_vector_document_diff(
fn regenerate_if_prompt_changed(
obkv: &obkv::KvReader<FieldId>,
(old_prompt, new_prompt): (&Prompt, &Prompt),
(old_fields_ids_map, new_fields_ids_map): (&FieldsIdsMap, &FieldsIdsMap),
(old_fields_ids_map, new_fields_ids_map): (
&FieldsIdsMapWithMetadata,
&FieldsIdsMapWithMetadata,
),
) -> Result<VectorStateDelta> {
let old_prompt =
old_prompt.render(obkv, DelAdd::Deletion, old_fields_ids_map).unwrap_or(Default::default());
@ -614,7 +626,7 @@ fn regenerate_if_prompt_changed(
fn regenerate_prompt(
obkv: &obkv::KvReader<FieldId>,
prompt: &Prompt,
new_fields_ids_map: &FieldsIdsMap,
new_fields_ids_map: &FieldsIdsMapWithMetadata,
) -> Result<VectorStateDelta> {
let prompt = prompt.render(obkv, DelAdd::Addition, new_fields_ids_map)?;

View file

@ -37,7 +37,7 @@ use crate::update::index_documents::parallel::ImmutableObkvs;
use crate::update::{
IndexerConfig, UpdateIndexingStep, WordPrefixDocids, WordPrefixIntegerDocids, WordsPrefixesFst,
};
use crate::vector::EmbeddingConfigs;
use crate::vector::{ArroyWrapper, EmbeddingConfigs};
use crate::{CboRoaringBitmapCodec, Index, Object, Result};
static MERGED_DATABASE_COUNT: usize = 7;
@ -673,6 +673,24 @@ where
let number_of_documents = self.index.number_of_documents(self.wtxn)?;
let mut rng = rand::rngs::StdRng::seed_from_u64(42);
// If an embedder wasn't used in the typedchunk but must be binary quantized
// we should insert it in `dimension`
for (name, action) in settings_diff.embedding_config_updates.iter() {
if action.is_being_quantized && !dimension.contains_key(name.as_str()) {
let index = self.index.embedder_category_id.get(self.wtxn, name)?.ok_or(
InternalError::DatabaseMissingEntry {
db_name: "embedder_category_id",
key: None,
},
)?;
let first_id = crate::vector::arroy_db_range_for_embedder(index).next().unwrap();
let reader =
ArroyWrapper::new(self.index.vector_arroy, first_id, action.was_quantized);
let dim = reader.dimensions(self.wtxn)?;
dimension.insert(name.to_string(), dim);
}
}
for (embedder_name, dimension) in dimension {
let wtxn = &mut *self.wtxn;
let vector_arroy = self.index.vector_arroy;
@ -680,13 +698,23 @@ where
let embedder_index = self.index.embedder_category_id.get(wtxn, &embedder_name)?.ok_or(
InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None },
)?;
let embedder_config = settings_diff.embedding_config_updates.get(&embedder_name);
let was_quantized = settings_diff
.old
.embedding_configs
.get(&embedder_name)
.map_or(false, |conf| conf.2);
let is_quantizing = embedder_config.map_or(false, |action| action.is_being_quantized);
pool.install(|| {
for k in crate::vector::arroy_db_range_for_embedder(embedder_index) {
let writer = arroy::Writer::new(vector_arroy, k, dimension);
if writer.need_build(wtxn)? {
writer.build(wtxn, &mut rng, None)?;
} else if writer.is_empty(wtxn)? {
let mut writer = ArroyWrapper::new(vector_arroy, k, was_quantized);
if is_quantizing {
writer.quantize(wtxn, k, dimension)?;
}
if writer.need_build(wtxn, dimension)? {
writer.build(wtxn, &mut rng, dimension)?;
} else if writer.is_empty(wtxn, dimension)? {
break;
}
}
@ -2734,11 +2762,13 @@ mod tests {
api_key: Setting::NotSet,
dimensions: Setting::Set(3),
document_template: Setting::NotSet,
document_template_max_bytes: Setting::NotSet,
url: Setting::NotSet,
request: Setting::NotSet,
response: Setting::NotSet,
distribution: Setting::NotSet,
headers: Setting::NotSet,
binary_quantized: Setting::NotSet,
}),
);
settings.set_embedder_settings(embedders);
@ -2767,7 +2797,7 @@ mod tests {
std::sync::Arc::new(crate::vector::Embedder::new(embedder.embedder_options).unwrap());
let res = index
.search(&rtxn)
.semantic(embedder_name, embedder, Some([0.0, 1.0, 2.0].to_vec()))
.semantic(embedder_name, embedder, false, Some([0.0, 1.0, 2.0].to_vec()))
.execute()
.unwrap();
assert_eq!(res.documents_ids.len(), 3);

View file

@ -29,7 +29,8 @@ use crate::update::index_documents::GrenadParameters;
use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff};
use crate::update::{AvailableIds, UpdateIndexingStep};
use crate::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
use crate::vector::settings::{EmbedderAction, WriteBackToDocuments};
use crate::vector::settings::WriteBackToDocuments;
use crate::vector::ArroyWrapper;
use crate::{
is_faceted_by, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result,
};
@ -992,19 +993,17 @@ impl<'a, 'i> Transform<'a, 'i> {
None
};
let readers: Result<
BTreeMap<&str, (Vec<arroy::Reader<'_, arroy::distances::Angular>>, &RoaringBitmap)>,
> = settings_diff
let readers: Result<BTreeMap<&str, (Vec<ArroyWrapper>, &RoaringBitmap)>> = settings_diff
.embedding_config_updates
.iter()
.filter_map(|(name, action)| {
if let EmbedderAction::WriteBackToDocuments(WriteBackToDocuments {
embedder_id,
user_provided,
}) = action
if let Some(WriteBackToDocuments { embedder_id, user_provided }) =
action.write_back()
{
let readers: Result<Vec<_>> =
self.index.arroy_readers(wtxn, *embedder_id).collect();
let readers: Result<Vec<_>> = self
.index
.arroy_readers(wtxn, *embedder_id, action.was_quantized)
.collect();
match readers {
Ok(readers) => Some(Ok((name.as_str(), (readers, user_provided)))),
Err(error) => Some(Err(error)),
@ -1107,23 +1106,14 @@ impl<'a, 'i> Transform<'a, 'i> {
}
}
let mut writers = Vec::new();
// delete all vectors from the embedders that need removal
for (_, (readers, _)) in readers {
for reader in readers {
let dimensions = reader.dimensions();
let arroy_index = reader.index();
drop(reader);
let writer = arroy::Writer::new(self.index.vector_arroy, arroy_index, dimensions);
writers.push(writer);
let dimensions = reader.dimensions(wtxn)?;
reader.clear(wtxn, dimensions)?;
}
}
for writer in writers {
writer.clear(wtxn)?;
}
let grenad_params = GrenadParameters {
chunk_compression_type: self.indexer_settings.chunk_compression_type,
chunk_compression_level: self.indexer_settings.chunk_compression_level,

View file

@ -26,6 +26,7 @@ use crate::update::index_documents::helpers::{
as_cloneable_grenad, try_split_array_at, KeepLatestObkv,
};
use crate::update::settings::InnerIndexSettingsDiff;
use crate::vector::ArroyWrapper;
use crate::{
lat_lng_to_xyz, CboRoaringBitmapCodec, DocumentId, FieldId, GeoPoint, Index, InternalError,
Result, SerializationError, U8StrStrCodec,
@ -661,9 +662,14 @@ pub(crate) fn write_typed_chunk_into_index(
let embedder_index = index.embedder_category_id.get(wtxn, &embedder_name)?.ok_or(
InternalError::DatabaseMissingEntry { db_name: "embedder_category_id", key: None },
)?;
let binary_quantized = settings_diff
.old
.embedding_configs
.get(&embedder_name)
.map_or(false, |conf| conf.2);
// FIXME: allow customizing distance
let writers: Vec<_> = crate::vector::arroy_db_range_for_embedder(embedder_index)
.map(|k| arroy::Writer::new(index.vector_arroy, k, expected_dimension))
.map(|k| ArroyWrapper::new(index.vector_arroy, k, binary_quantized))
.collect();
// remove vectors for docids we want them removed
@ -674,7 +680,7 @@ pub(crate) fn write_typed_chunk_into_index(
for writer in &writers {
// Uses invariant: vectors are packed in the first writers.
if !writer.del_item(wtxn, docid)? {
if !writer.del_item(wtxn, expected_dimension, docid)? {
break;
}
}
@ -706,7 +712,7 @@ pub(crate) fn write_typed_chunk_into_index(
)));
}
for (embedding, writer) in embeddings.iter().zip(&writers) {
writer.add_item(wtxn, docid, embedding)?;
writer.add_item(wtxn, expected_dimension, docid, embedding)?;
}
}
@ -729,7 +735,7 @@ pub(crate) fn write_typed_chunk_into_index(
break;
};
if candidate == vector {
writer.del_item(wtxn, docid)?;
writer.del_item(wtxn, expected_dimension, docid)?;
deleted_index = Some(index);
}
}
@ -746,8 +752,13 @@ pub(crate) fn write_typed_chunk_into_index(
if let Some((last_index, vector)) = last_index_with_a_vector {
// unwrap: computed the index from the list of writers
let writer = writers.get(last_index).unwrap();
writer.del_item(wtxn, docid)?;
writers.get(deleted_index).unwrap().add_item(wtxn, docid, &vector)?;
writer.del_item(wtxn, expected_dimension, docid)?;
writers.get(deleted_index).unwrap().add_item(
wtxn,
expected_dimension,
docid,
&vector,
)?;
}
}
}
@ -757,8 +768,8 @@ pub(crate) fn write_typed_chunk_into_index(
// overflow was detected during vector extraction.
for writer in &writers {
if !writer.contains_item(wtxn, docid)? {
writer.add_item(wtxn, docid, &vector)?;
if !writer.contains_item(wtxn, expected_dimension, docid)? {
writer.add_item(wtxn, expected_dimension, docid, &vector)?;
break;
}
}

View file

@ -1,5 +1,6 @@
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
use std::convert::TryInto;
use std::num::NonZeroUsize;
use std::result::Result as StdResult;
use std::sync::Arc;
@ -19,6 +20,7 @@ use crate::index::{
IndexEmbeddingConfig, DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS,
};
use crate::order_by_map::OrderByMap;
use crate::prompt::default_max_bytes;
use crate::proximity::ProximityPrecision;
use crate::update::index_documents::IndexDocumentsMethod;
use crate::update::{IndexDocuments, UpdateIndexingStep};
@ -952,7 +954,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
let old_configs = self.index.embedding_configs(self.wtxn)?;
let remove_all: Result<BTreeMap<String, EmbedderAction>> = old_configs
.into_iter()
.map(|IndexEmbeddingConfig { name, config: _, user_provided }| -> Result<_> {
.map(|IndexEmbeddingConfig { name, config, user_provided }| -> Result<_> {
let embedder_id =
self.index.embedder_category_id.get(self.wtxn, &name)?.ok_or(
crate::InternalError::DatabaseMissingEntry {
@ -962,10 +964,10 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
)?;
Ok((
name,
EmbedderAction::WriteBackToDocuments(WriteBackToDocuments {
embedder_id,
user_provided,
}),
EmbedderAction::with_write_back(
WriteBackToDocuments { embedder_id, user_provided },
config.quantized(),
),
))
})
.collect();
@ -1002,7 +1004,8 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
match joined {
// updated config
EitherOrBoth::Both((name, (old, user_provided)), (_, new)) => {
let settings_diff = SettingsDiff::from_settings(old, new);
let was_quantized = old.binary_quantized.set().unwrap_or_default();
let settings_diff = SettingsDiff::from_settings(&name, old, new)?;
match settings_diff {
SettingsDiff::Remove => {
tracing::debug!(
@ -1021,25 +1024,29 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
self.index.embedder_category_id.delete(self.wtxn, &name)?;
embedder_actions.insert(
name,
EmbedderAction::WriteBackToDocuments(WriteBackToDocuments {
embedder_id,
user_provided,
}),
EmbedderAction::with_write_back(
WriteBackToDocuments { embedder_id, user_provided },
was_quantized,
),
);
}
SettingsDiff::Reindex { action, updated_settings } => {
SettingsDiff::Reindex { action, updated_settings, quantize } => {
tracing::debug!(
embedder = name,
user_provided = user_provided.len(),
?action,
"reindex embedder"
);
embedder_actions.insert(name.clone(), EmbedderAction::Reindex(action));
embedder_actions.insert(
name.clone(),
EmbedderAction::with_reindex(action, was_quantized)
.with_is_being_quantized(quantize),
);
let new =
validate_embedding_settings(Setting::Set(updated_settings), &name)?;
updated_configs.insert(name, (new, user_provided));
}
SettingsDiff::UpdateWithoutReindex { updated_settings } => {
SettingsDiff::UpdateWithoutReindex { updated_settings, quantize } => {
tracing::debug!(
embedder = name,
user_provided = user_provided.len(),
@ -1047,6 +1054,12 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
);
let new =
validate_embedding_settings(Setting::Set(updated_settings), &name)?;
if quantize {
embedder_actions.insert(
name.clone(),
EmbedderAction::default().with_is_being_quantized(true),
);
}
updated_configs.insert(name, (new, user_provided));
}
}
@ -1065,8 +1078,10 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
&mut setting,
);
let setting = validate_embedding_settings(setting, &name)?;
embedder_actions
.insert(name.clone(), EmbedderAction::Reindex(ReindexAction::FullReindex));
embedder_actions.insert(
name.clone(),
EmbedderAction::with_reindex(ReindexAction::FullReindex, false),
);
updated_configs.insert(name, (setting, RoaringBitmap::new()));
}
}
@ -1080,19 +1095,14 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
let mut find_free_index =
move || free_indices.find(|(_, free)| **free).map(|(index, _)| index as u8);
for (name, action) in embedder_actions.iter() {
match action {
EmbedderAction::Reindex(ReindexAction::RegeneratePrompts) => {
/* cannot be a new embedder, so has to have an id already */
}
EmbedderAction::Reindex(ReindexAction::FullReindex) => {
if self.index.embedder_category_id.get(self.wtxn, name)?.is_none() {
let id = find_free_index()
.ok_or(UserError::TooManyEmbedders(updated_configs.len()))?;
tracing::debug!(embedder = name, id, "assigning free id to new embedder");
self.index.embedder_category_id.put(self.wtxn, name, &id)?;
}
}
EmbedderAction::WriteBackToDocuments(_) => { /* already removed */ }
// ignore actions that are not possible for a new embedder
if matches!(action.reindex(), Some(ReindexAction::FullReindex))
&& self.index.embedder_category_id.get(self.wtxn, name)?.is_none()
{
let id =
find_free_index().ok_or(UserError::TooManyEmbedders(updated_configs.len()))?;
tracing::debug!(embedder = name, id, "assigning free id to new embedder");
self.index.embedder_category_id.put(self.wtxn, name, &id)?;
}
}
let updated_configs: Vec<IndexEmbeddingConfig> = updated_configs
@ -1238,7 +1248,7 @@ impl InnerIndexSettingsDiff {
old_settings: InnerIndexSettings,
new_settings: InnerIndexSettings,
primary_key_id: Option<FieldId>,
embedding_config_updates: BTreeMap<String, EmbedderAction>,
mut embedding_config_updates: BTreeMap<String, EmbedderAction>,
settings_update_only: bool,
) -> Self {
let only_additional_fields = match (
@ -1273,6 +1283,39 @@ impl InnerIndexSettingsDiff {
let cache_user_defined_searchables = old_settings.user_defined_searchable_fields
!= new_settings.user_defined_searchable_fields;
// if the user-defined searchables changed, then we need to reindex prompts.
if cache_user_defined_searchables {
for (embedder_name, (config, _, _quantized)) in
new_settings.embedding_configs.inner_as_ref()
{
let was_quantized =
old_settings.embedding_configs.get(embedder_name).map_or(false, |conf| conf.2);
// skip embedders that don't use document templates
if !config.uses_document_template() {
continue;
}
// note: this could currently be entry.or_insert(..), but we're future-proofing with an explicit match
// this always makes the code clearer by explicitly handling the cases
match embedding_config_updates.entry(embedder_name.clone()) {
std::collections::btree_map::Entry::Vacant(entry) => {
entry.insert(EmbedderAction::with_reindex(
ReindexAction::RegeneratePrompts,
was_quantized,
));
}
std::collections::btree_map::Entry::Occupied(entry) => {
let EmbedderAction {
was_quantized: _,
is_being_quantized: _,
write_back: _, // We are deleting this embedder, so no point in regeneration
reindex: _, // We are already fully reindexing
} = entry.get();
}
};
}
}
InnerIndexSettingsDiff {
old: old_settings,
new: new_settings,
@ -1518,7 +1561,7 @@ fn embedders(embedding_configs: Vec<IndexEmbeddingConfig>) -> Result<EmbeddingCo
.map(
|IndexEmbeddingConfig {
name,
config: EmbeddingConfig { embedder_options, prompt },
config: EmbeddingConfig { embedder_options, prompt, quantized },
..
}| {
let prompt = Arc::new(prompt.try_into().map_err(crate::Error::from)?);
@ -1528,7 +1571,7 @@ fn embedders(embedding_configs: Vec<IndexEmbeddingConfig>) -> Result<EmbeddingCo
.map_err(crate::vector::Error::from)
.map_err(crate::Error::from)?,
);
Ok((name, (embedder, prompt)))
Ok((name, (embedder, prompt, quantized.unwrap_or_default())))
},
)
.collect();
@ -1547,16 +1590,31 @@ fn validate_prompt(
api_key,
dimensions,
document_template: Setting::Set(template),
document_template_max_bytes,
url,
request,
response,
distribution,
headers,
binary_quantized: binary_quantize,
}) => {
let max_bytes = match document_template_max_bytes.set() {
Some(max_bytes) => NonZeroUsize::new(max_bytes).ok_or_else(|| {
crate::error::UserError::InvalidSettingsDocumentTemplateMaxBytes {
embedder_name: name.to_owned(),
}
})?,
None => default_max_bytes(),
};
// validate
let template = crate::prompt::Prompt::new(template)
.map(|prompt| crate::prompt::PromptData::from(prompt).template)
.map_err(|inner| UserError::InvalidPromptForEmbeddings(name.to_owned(), inner))?;
let template = crate::prompt::Prompt::new(
template,
// always specify a max_bytes
Some(max_bytes),
)
.map(|prompt| crate::prompt::PromptData::from(prompt).template)
.map_err(|inner| UserError::InvalidPromptForEmbeddings(name.to_owned(), inner))?;
Ok(Setting::Set(EmbeddingSettings {
source,
@ -1565,11 +1623,13 @@ fn validate_prompt(
api_key,
dimensions,
document_template: Setting::Set(template),
document_template_max_bytes,
url,
request,
response,
distribution,
headers,
binary_quantized: binary_quantize,
}))
}
new => Ok(new),
@ -1589,11 +1649,13 @@ pub fn validate_embedding_settings(
api_key,
dimensions,
document_template,
document_template_max_bytes,
url,
request,
response,
distribution,
headers,
binary_quantized: binary_quantize,
} = settings;
if let Some(0) = dimensions.set() {
@ -1628,11 +1690,13 @@ pub fn validate_embedding_settings(
api_key,
dimensions,
document_template,
document_template_max_bytes,
url,
request,
response,
distribution,
headers,
binary_quantized: binary_quantize,
}));
};
match inferred_source {
@ -1700,6 +1764,12 @@ pub fn validate_embedding_settings(
inferred_source,
name,
)?;
check_unset(
&document_template_max_bytes,
EmbeddingSettings::DOCUMENT_TEMPLATE_MAX_BYTES,
inferred_source,
name,
)?;
check_set(&dimensions, EmbeddingSettings::DIMENSIONS, inferred_source, name)?;
check_unset(&url, EmbeddingSettings::URL, inferred_source, name)?;
@ -1722,11 +1792,13 @@ pub fn validate_embedding_settings(
api_key,
dimensions,
document_template,
document_template_max_bytes,
url,
request,
response,
distribution,
headers,
binary_quantized: binary_quantize,
}))
}