Introducing a new into_del_add_obkv_conditional_operation function

This commit is contained in:
Clément Renault 2024-05-28 14:53:45 +02:00
parent 99211eb375
commit 9af103a88e
No known key found for this signature in database
GPG Key ID: F250A4C4E3AE5F5F
2 changed files with 53 additions and 9 deletions

View File

@ -40,11 +40,26 @@ pub fn into_del_add_obkv<K: obkv::Key + PartialOrd>(
operation: DelAddOperation, operation: DelAddOperation,
buffer: &mut Vec<u8>, buffer: &mut Vec<u8>,
) -> Result<(), std::io::Error> { ) -> Result<(), std::io::Error> {
into_del_add_obkv_conditional_operation(reader, buffer, |_| operation)
}
/// Akin to the [into_del_add_obkv] function but lets you
/// conditionally define the `DelAdd` variant based on the obkv key.
pub fn into_del_add_obkv_conditional_operation<K, F>(
reader: obkv::KvReader<K>,
buffer: &mut Vec<u8>,
operation: F,
) -> std::io::Result<()>
where
K: obkv::Key + PartialOrd,
F: Fn(K) -> DelAddOperation,
{
let mut writer = obkv::KvWriter::new(buffer); let mut writer = obkv::KvWriter::new(buffer);
let mut value_buffer = Vec::new(); let mut value_buffer = Vec::new();
for (key, value) in reader.iter() { for (key, value) in reader.iter() {
value_buffer.clear(); value_buffer.clear();
let mut value_writer = KvWriterDelAdd::new(&mut value_buffer); let mut value_writer = KvWriterDelAdd::new(&mut value_buffer);
let operation = operation(key);
if matches!(operation, DelAddOperation::Deletion | DelAddOperation::DeletionAndAddition) { if matches!(operation, DelAddOperation::Deletion | DelAddOperation::DeletionAndAddition) {
value_writer.insert(DelAdd::Deletion, value)?; value_writer.insert(DelAdd::Deletion, value)?;
} }

View File

@ -20,7 +20,10 @@ use super::{IndexDocumentsMethod, IndexerConfig};
use crate::documents::{DocumentsBatchIndex, EnrichedDocument, EnrichedDocumentsBatchReader}; use crate::documents::{DocumentsBatchIndex, EnrichedDocument, EnrichedDocumentsBatchReader};
use crate::error::{Error, InternalError, UserError}; use crate::error::{Error, InternalError, UserError};
use crate::index::{db_name, main_key}; use crate::index::{db_name, main_key};
use crate::update::del_add::{into_del_add_obkv, DelAdd, DelAddOperation, KvReaderDelAdd}; use crate::update::del_add::{
into_del_add_obkv, into_del_add_obkv_conditional_operation, DelAdd, DelAddOperation,
KvReaderDelAdd,
};
use crate::update::index_documents::GrenadParameters; use crate::update::index_documents::GrenadParameters;
use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff}; use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff};
use crate::update::{AvailableDocumentsIds, UpdateIndexingStep}; use crate::update::{AvailableDocumentsIds, UpdateIndexingStep};
@ -841,13 +844,28 @@ impl<'a, 'i> Transform<'a, 'i> {
let is_primary_key = |id: FieldId| -> bool { settings_diff.primary_key_id == Some(id) }; let is_primary_key = |id: FieldId| -> bool { settings_diff.primary_key_id == Some(id) };
// If only the `searchableAttributes` has been changed, keep only the searchable fields. // If only the `searchableAttributes` has been changed, keep only the searchable fields.
// However, if only new searchable attributes are added, this function will
// return false has fields do not need to be reindexed.
let must_reindex_searchables = settings_diff.reindex_searchable(); let must_reindex_searchables = settings_diff.reindex_searchable();
let necessary_searchable_field = |id: FieldId| -> bool { let must_index_only_additional_searchables = &settings_diff.only_additional_fields();
must_reindex_searchables let necessary_searchable_field_to_reindex = move |id: FieldId| -> bool {
must_index_only_additional_searchables.is_none()
&& must_reindex_searchables
&& (settings_diff.old.searchable_fields_ids.contains(&id) && (settings_diff.old.searchable_fields_ids.contains(&id)
|| settings_diff.new.searchable_fields_ids.contains(&id)) || settings_diff.new.searchable_fields_ids.contains(&id))
}; };
// If only new `searchableAttributes` are present, keep only those ones.
let additional_searchable_field_only = move |id: FieldId| -> bool {
match must_index_only_additional_searchables {
Some(additional_fields) => {
let additional_field = settings_diff.new.fields_ids_map.name(id).unwrap();
additional_fields.contains(additional_field)
}
None => false,
}
};
// If only a faceted field has been added, keep only this field. // If only a faceted field has been added, keep only this field.
let must_reindex_facets = settings_diff.reindex_facets(); let must_reindex_facets = settings_diff.reindex_facets();
let necessary_faceted_field = |id: FieldId| -> bool { let necessary_faceted_field = |id: FieldId| -> bool {
@ -862,14 +880,21 @@ impl<'a, 'i> Transform<'a, 'i> {
// we need the fields for the prompt/templating. // we need the fields for the prompt/templating.
let reindex_vectors = settings_diff.reindex_vectors(); let reindex_vectors = settings_diff.reindex_vectors();
// The set of additional searchable fields only,
// the only purpose of these fields is to be indexed from scratch.
let mut additional_searchables_only = HashSet::new();
let mut obkv_writer = KvWriter::<_, FieldId>::memory(); let mut obkv_writer = KvWriter::<_, FieldId>::memory();
for (id, val) in old_obkv.iter() { for (id, val) in old_obkv.iter() {
if is_primary_key(id) if is_primary_key(id)
|| necessary_searchable_field(id) || necessary_searchable_field_to_reindex(id)
|| necessary_faceted_field(id) || necessary_faceted_field(id)
|| reindex_vectors || reindex_vectors
{ {
obkv_writer.insert(id, val)?; obkv_writer.insert(id, val)?;
} else if additional_searchable_field_only(id) {
additional_searchables_only.insert(id);
obkv_writer.insert(id, val)?;
} }
} }
let data = obkv_writer.into_inner()?; let data = obkv_writer.into_inner()?;
@ -887,11 +912,15 @@ impl<'a, 'i> Transform<'a, 'i> {
let flattened = flattened.as_deref().map_or(obkv, KvReader::new); let flattened = flattened.as_deref().map_or(obkv, KvReader::new);
flattened_obkv_buffer.clear(); flattened_obkv_buffer.clear();
into_del_add_obkv( into_del_add_obkv_conditional_operation(flattened, flattened_obkv_buffer, |id| {
flattened, // If the field is only required because it is an additional
DelAddOperation::DeletionAndAddition, // searchable field only define it as an DelAdd::Addition only.
flattened_obkv_buffer, if additional_searchables_only.contains(&id) {
)?; DelAddOperation::Addition
} else {
DelAddOperation::DeletionAndAddition
}
})?;
} }
Ok(()) Ok(())