Refactor Document indexing process (Facets)

**Changes:**
The Documents changes now take a selector closure instead of a list of field to match the field to extract.
The seek_leaf_values_in_object function now uses a selector closure of a list of field to match the field to extract
The facet database extraction is now relying on the FilterableAttributesRule to match the field to extract.
The facet-search database extraction is now relying on the FieldIdMapWithMetadata to select the field to index.
The facet level database extraction is now relying on the FieldIdMapWithMetadata to select the field to index.

**Important:**
Because the filterable attributes are patterns now,
the fieldIdMap will only register the fields that exists in at least one document.
if a field doesn't exist in any document, it will not be registered even if it has been specified in the filterable fields.

**Impact:**
- Document Addition/modification facet indexing
- Document deletion facet indexing
This commit is contained in:
ManyTheFish 2025-03-03 10:30:42 +01:00
parent 659855c88e
commit 95bccaf5f5
8 changed files with 233 additions and 179 deletions

View file

@ -33,10 +33,8 @@ where
{
let index = indexing_context.index;
indexing_context.progress.update_progress(IndexingStep::PostProcessingFacets);
if index.facet_search(wtxn)? {
compute_facet_search_database(index, wtxn, global_fields_ids_map)?;
}
compute_facet_level_database(index, wtxn, facet_field_ids_delta)?;
compute_facet_level_database(index, wtxn, facet_field_ids_delta, &global_fields_ids_map)?;
compute_facet_search_database(index, wtxn, global_fields_ids_map)?;
indexing_context.progress.update_progress(IndexingStep::PostProcessingWords);
if let Some(prefix_delta) = compute_word_fst(index, wtxn)? {
compute_prefix_database(index, wtxn, prefix_delta, indexing_context.grenad_parameters)?;
@ -116,10 +114,18 @@ fn compute_facet_search_database(
global_fields_ids_map: GlobalFieldsIdsMap,
) -> Result<()> {
let rtxn = index.read_txn()?;
// if the facet search is not enabled, we can skip the rest of the function
if !index.facet_search(wtxn)? {
return Ok(());
}
let localized_attributes_rules = index.localized_attributes_rules(&rtxn)?;
let filterable_attributes_rules = index.filterable_attributes_rules(&rtxn)?;
let mut facet_search_builder = FacetSearchBuilder::new(
global_fields_ids_map,
localized_attributes_rules.unwrap_or_default(),
filterable_attributes_rules,
);
let previous_facet_id_string_docids = index
@ -164,8 +170,19 @@ fn compute_facet_level_database(
index: &Index,
wtxn: &mut RwTxn,
mut facet_field_ids_delta: FacetFieldIdsDelta,
global_fields_ids_map: &GlobalFieldsIdsMap,
) -> Result<()> {
let rtxn = index.read_txn()?;
let filterable_attributes_rules = index.filterable_attributes_rules(&rtxn)?;
for (fid, delta) in facet_field_ids_delta.consume_facet_string_delta() {
// skip field ids that should not be facet leveled
let Some(metadata) = global_fields_ids_map.metadata(fid) else {
continue;
};
if !metadata.require_facet_level_database(&filterable_attributes_rules) {
continue;
}
let span = tracing::trace_span!(target: "indexing::facet_field_ids", "string");
let _entered = span.enter();
match delta {

View file

@ -137,7 +137,6 @@ pub(super) fn update_index(
index.put_primary_key(wtxn, new_primary_key.name())?;
}
let mut inner_index_settings = InnerIndexSettings::from_index(index, wtxn, Some(embedders))?;
inner_index_settings.recompute_facets(wtxn, index)?;
inner_index_settings.recompute_searchables(wtxn, index)?;
index.put_field_distribution(wtxn, &field_distribution)?;
index.put_documents_ids(wtxn, &document_ids)?;