rename all occurences of user_defined to user_provided for consistency

This commit is contained in:
Tamo 2024-06-05 15:38:49 +02:00
parent b7349910d9
commit d85ab23b82
8 changed files with 42 additions and 37 deletions

View File

@ -955,7 +955,7 @@ impl IndexScheduler {
let user_provided = embedding_configs
.iter()
.find(|conf| conf.name == embedder_name)
.is_some_and(|conf| conf.user_defined.contains(id));
.is_some_and(|conf| conf.user_provided.contains(id));
let embeddings = ExplicitVectors {
embeddings: VectorOrArrayOfVectors::from_array_of_vectors(

View File

@ -3062,9 +3062,9 @@ mod tests {
let rtxn = index.read_txn().unwrap();
let configs = index.embedding_configs(&rtxn).unwrap();
let IndexEmbeddingConfig { name, config, user_defined } = configs.first().unwrap();
let IndexEmbeddingConfig { name, config, user_provided } = configs.first().unwrap();
insta::assert_snapshot!(name, @"default");
insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[]>");
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
insta::assert_json_snapshot!(config.embedder_options);
}
@ -5029,17 +5029,17 @@ mod tests {
let configs = index.embedding_configs(&rtxn).unwrap();
// for consistency with the below
#[allow(clippy::get_first)]
let IndexEmbeddingConfig { name, config: fakerest_config, user_defined } =
let IndexEmbeddingConfig { name, config: fakerest_config, user_provided } =
configs.get(0).unwrap();
insta::assert_snapshot!(name, @"A_fakerest");
insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[]>");
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
insta::assert_json_snapshot!(fakerest_config.embedder_options);
let fakerest_name = name.clone();
let IndexEmbeddingConfig { name, config: simple_hf_config, user_defined } =
let IndexEmbeddingConfig { name, config: simple_hf_config, user_provided } =
configs.get(1).unwrap();
insta::assert_snapshot!(name, @"B_small_hf");
insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[]>");
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
insta::assert_json_snapshot!(simple_hf_config.embedder_options);
let simple_hf_name = name.clone();
@ -5111,13 +5111,14 @@ mod tests {
let configs = index.embedding_configs(&rtxn).unwrap();
// for consistency with the below
#[allow(clippy::get_first)]
let IndexEmbeddingConfig { name, config: _, user_defined } = configs.get(0).unwrap();
let IndexEmbeddingConfig { name, config: _, user_provided: user_defined } =
configs.get(0).unwrap();
insta::assert_snapshot!(name, @"A_fakerest");
insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[0]>");
let IndexEmbeddingConfig { name, config: _, user_defined } = configs.get(1).unwrap();
let IndexEmbeddingConfig { name, config: _, user_provided } = configs.get(1).unwrap();
insta::assert_snapshot!(name, @"B_small_hf");
insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[]>");
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
let embeddings = index.embeddings(&rtxn, 0).unwrap();
@ -5185,15 +5186,15 @@ mod tests {
let configs = index.embedding_configs(&rtxn).unwrap();
// for consistency with the below
#[allow(clippy::get_first)]
let IndexEmbeddingConfig { name, config: _, user_defined } =
let IndexEmbeddingConfig { name, config: _, user_provided: user_defined } =
configs.get(0).unwrap();
insta::assert_snapshot!(name, @"A_fakerest");
insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[0]>");
let IndexEmbeddingConfig { name, config: _, user_defined } =
let IndexEmbeddingConfig { name, config: _, user_provided } =
configs.get(1).unwrap();
insta::assert_snapshot!(name, @"B_small_hf");
insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[]>");
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
let embeddings = index.embeddings(&rtxn, 0).unwrap();

View File

@ -1067,12 +1067,12 @@ fn make_hits(
if retrieve_vectors {
let mut vectors = serde_json::Map::new();
for (name, mut vector) in index.embeddings(rtxn, id)? {
let user_defined = embedding_configs
let user_provided = embedding_configs
.iter()
.find(|conf| conf.name == name)
.is_some_and(|conf| conf.user_defined.contains(id));
.is_some_and(|conf| conf.user_provided.contains(id));
let mut embedding = serde_json::Map::new();
embedding.insert("userDefined".to_string(), user_defined.into());
embedding.insert("userProvided".to_string(), user_provided.into());
match vector.as_mut_slice() {
[one] => embedding.insert("embedding".to_string(), std::mem::take(one).into()),
_ => embedding.insert("embedding".to_string(), vector.into()),

View File

@ -1672,7 +1672,7 @@ impl Index {
pub struct IndexEmbeddingConfig {
pub name: String,
pub config: EmbeddingConfig,
pub user_defined: RoaringBitmap,
pub user_provided: RoaringBitmap,
}
#[cfg(test)]

View File

@ -2623,10 +2623,10 @@ mod tests {
let rtxn = index.read_txn().unwrap();
let mut embedding_configs = index.embedding_configs(&rtxn).unwrap();
let IndexEmbeddingConfig { name: embedder_name, config: embedder, user_defined } =
let IndexEmbeddingConfig { name: embedder_name, config: embedder, user_provided } =
embedding_configs.pop().unwrap();
insta::assert_snapshot!(embedder_name, @"manual");
insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[0, 1, 2]>");
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[0, 1, 2]>");
let embedder =
std::sync::Arc::new(crate::vector::Embedder::new(embedder.embedder_options).unwrap());
let res = index

View File

@ -625,8 +625,8 @@ pub(crate) fn write_typed_chunk_into_index(
let mut remove_vectors_builder = MergerBuilder::new(keep_first as MergeFn);
let mut manual_vectors_builder = MergerBuilder::new(keep_first as MergeFn);
let mut embeddings_builder = MergerBuilder::new(keep_first as MergeFn);
let mut user_defined = RoaringBitmap::new();
let mut remove_from_user_defined = RoaringBitmap::new();
let mut user_provided = RoaringBitmap::new();
let mut remove_from_user_provided = RoaringBitmap::new();
let mut params = None;
for typed_chunk in typed_chunks {
let TypedChunk::VectorPoints {
@ -649,8 +649,8 @@ pub(crate) fn write_typed_chunk_into_index(
if let Some(embeddings) = embeddings {
embeddings_builder.push(embeddings.into_cursor()?);
}
user_defined |= ud;
remove_from_user_defined |= rud;
user_provided |= ud;
remove_from_user_provided |= rud;
}
// typed chunks has always at least 1 chunk.
@ -661,8 +661,8 @@ pub(crate) fn write_typed_chunk_into_index(
.iter_mut()
.find(|IndexEmbeddingConfig { name, .. }| name == &embedder_name)
.unwrap();
index_embedder_config.user_defined -= remove_from_user_defined;
index_embedder_config.user_defined |= user_defined;
index_embedder_config.user_provided -= remove_from_user_provided;
index_embedder_config.user_provided |= user_provided;
index.put_embedding_configs(wtxn, embedding_configs)?;

View File

@ -932,9 +932,11 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
let old_configs: BTreeMap<String, (Setting<EmbeddingSettings>, RoaringBitmap)> =
old_configs
.into_iter()
.map(|IndexEmbeddingConfig { name, config, user_defined }| {
.map(
|IndexEmbeddingConfig { name, config, user_provided: user_defined }| {
(name, (Setting::Set(config.into()), user_defined))
})
},
)
.collect();
let mut new_configs = BTreeMap::new();
@ -944,19 +946,19 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
{
match joined {
// updated config
EitherOrBoth::Both((name, (mut old, user_defined)), (_, new)) => {
EitherOrBoth::Both((name, (mut old, user_provided)), (_, new)) => {
changed |= EmbeddingSettings::apply_and_need_reindex(&mut old, new);
if changed {
tracing::debug!(
embedder = name,
documents = user_defined.len(),
user_provided = user_provided.len(),
"need reindex"
);
} else {
tracing::debug!(embedder = name, "skip reindex");
}
let new = validate_embedding_settings(old, &name)?;
new_configs.insert(name, (new, user_defined));
new_configs.insert(name, (new, user_provided));
}
// unchanged config
EitherOrBoth::Left((name, setting)) => {
@ -979,15 +981,17 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
}
let new_configs: Vec<IndexEmbeddingConfig> = new_configs
.into_iter()
.filter_map(|(name, (config, user_defined))| match config {
Setting::Set(config) => {
Some(IndexEmbeddingConfig { name, config: config.into(), user_defined })
}
.filter_map(|(name, (config, user_provided))| match config {
Setting::Set(config) => Some(IndexEmbeddingConfig {
name,
config: config.into(),
user_provided,
}),
Setting::Reset => None,
Setting::NotSet => Some(IndexEmbeddingConfig {
name,
config: EmbeddingSettings::default().into(),
user_defined,
user_provided,
}),
})
.collect();

View File

@ -73,7 +73,7 @@ impl ParsedVectorsDiff {
}
.flatten().map_or(BTreeMap::default(), |del| del.into_iter().map(|(name, vec)| (name, Some(vec))).collect());
for embedding_config in embedders_configs {
if embedding_config.user_defined.contains(docid) {
if embedding_config.user_provided.contains(docid) {
old.entry(embedding_config.name.to_string()).or_insert(None);
}
}