Introducing a new into_del_add_obkv_conditional_operation function

This commit is contained in:
Clément Renault 2024-05-28 14:53:45 +02:00
parent 99211eb375
commit 9af103a88e
No known key found for this signature in database
GPG Key ID: F250A4C4E3AE5F5F
2 changed files with 53 additions and 9 deletions

View File

@ -40,11 +40,26 @@ pub fn into_del_add_obkv<K: obkv::Key + PartialOrd>(
operation: DelAddOperation,
buffer: &mut Vec<u8>,
) -> Result<(), std::io::Error> {
into_del_add_obkv_conditional_operation(reader, buffer, |_| operation)
}
/// Akin to the [into_del_add_obkv] function but lets you
/// conditionally define the `DelAdd` variant based on the obkv key.
pub fn into_del_add_obkv_conditional_operation<K, F>(
reader: obkv::KvReader<K>,
buffer: &mut Vec<u8>,
operation: F,
) -> std::io::Result<()>
where
K: obkv::Key + PartialOrd,
F: Fn(K) -> DelAddOperation,
{
let mut writer = obkv::KvWriter::new(buffer);
let mut value_buffer = Vec::new();
for (key, value) in reader.iter() {
value_buffer.clear();
let mut value_writer = KvWriterDelAdd::new(&mut value_buffer);
let operation = operation(key);
if matches!(operation, DelAddOperation::Deletion | DelAddOperation::DeletionAndAddition) {
value_writer.insert(DelAdd::Deletion, value)?;
}

View File

@ -20,7 +20,10 @@ use super::{IndexDocumentsMethod, IndexerConfig};
use crate::documents::{DocumentsBatchIndex, EnrichedDocument, EnrichedDocumentsBatchReader};
use crate::error::{Error, InternalError, UserError};
use crate::index::{db_name, main_key};
use crate::update::del_add::{into_del_add_obkv, DelAdd, DelAddOperation, KvReaderDelAdd};
use crate::update::del_add::{
into_del_add_obkv, into_del_add_obkv_conditional_operation, DelAdd, DelAddOperation,
KvReaderDelAdd,
};
use crate::update::index_documents::GrenadParameters;
use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff};
use crate::update::{AvailableDocumentsIds, UpdateIndexingStep};
@ -841,13 +844,28 @@ impl<'a, 'i> Transform<'a, 'i> {
let is_primary_key = |id: FieldId| -> bool { settings_diff.primary_key_id == Some(id) };
// If only the `searchableAttributes` has been changed, keep only the searchable fields.
// However, if only new searchable attributes are added, this function will
// return false has fields do not need to be reindexed.
let must_reindex_searchables = settings_diff.reindex_searchable();
let necessary_searchable_field = |id: FieldId| -> bool {
must_reindex_searchables
let must_index_only_additional_searchables = &settings_diff.only_additional_fields();
let necessary_searchable_field_to_reindex = move |id: FieldId| -> bool {
must_index_only_additional_searchables.is_none()
&& must_reindex_searchables
&& (settings_diff.old.searchable_fields_ids.contains(&id)
|| settings_diff.new.searchable_fields_ids.contains(&id))
};
// If only new `searchableAttributes` are present, keep only those ones.
let additional_searchable_field_only = move |id: FieldId| -> bool {
match must_index_only_additional_searchables {
Some(additional_fields) => {
let additional_field = settings_diff.new.fields_ids_map.name(id).unwrap();
additional_fields.contains(additional_field)
}
None => false,
}
};
// If only a faceted field has been added, keep only this field.
let must_reindex_facets = settings_diff.reindex_facets();
let necessary_faceted_field = |id: FieldId| -> bool {
@ -862,14 +880,21 @@ impl<'a, 'i> Transform<'a, 'i> {
// we need the fields for the prompt/templating.
let reindex_vectors = settings_diff.reindex_vectors();
// The set of additional searchable fields only,
// the only purpose of these fields is to be indexed from scratch.
let mut additional_searchables_only = HashSet::new();
let mut obkv_writer = KvWriter::<_, FieldId>::memory();
for (id, val) in old_obkv.iter() {
if is_primary_key(id)
|| necessary_searchable_field(id)
|| necessary_searchable_field_to_reindex(id)
|| necessary_faceted_field(id)
|| reindex_vectors
{
obkv_writer.insert(id, val)?;
} else if additional_searchable_field_only(id) {
additional_searchables_only.insert(id);
obkv_writer.insert(id, val)?;
}
}
let data = obkv_writer.into_inner()?;
@ -887,11 +912,15 @@ impl<'a, 'i> Transform<'a, 'i> {
let flattened = flattened.as_deref().map_or(obkv, KvReader::new);
flattened_obkv_buffer.clear();
into_del_add_obkv(
flattened,
DelAddOperation::DeletionAndAddition,
flattened_obkv_buffer,
)?;
into_del_add_obkv_conditional_operation(flattened, flattened_obkv_buffer, |id| {
// If the field is only required because it is an additional
// searchable field only define it as an DelAdd::Addition only.
if additional_searchables_only.contains(&id) {
DelAddOperation::Addition
} else {
DelAddOperation::DeletionAndAddition
}
})?;
}
Ok(())