mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 20:37:15 +02:00
Refactor Document indexing process (Facets)
**Changes:** The Documents changes now take a selector closure instead of a list of field to match the field to extract. The seek_leaf_values_in_object function now uses a selector closure of a list of field to match the field to extract The facet database extraction is now relying on the FilterableAttributesRule to match the field to extract. The facet-search database extraction is now relying on the FieldIdMapWithMetadata to select the field to index. The facet level database extraction is now relying on the FieldIdMapWithMetadata to select the field to index. **Important:** Because the filterable attributes are patterns now, the fieldIdMap will only register the fields that exists in at least one document. if a field doesn't exist in any document, it will not be registered even if it has been specified in the filterable fields. **Impact:** - Document Addition/modification facet indexing - Document deletion facet indexing
This commit is contained in:
parent
659855c88e
commit
95bccaf5f5
8 changed files with 233 additions and 179 deletions
|
@ -9,12 +9,14 @@ use heed::{BytesDecode, BytesEncode, RoTxn, RwTxn};
|
|||
|
||||
use super::fst_merger_builder::FstMergerBuilder;
|
||||
use super::KvReaderDelAdd;
|
||||
use crate::attribute_patterns::PatternMatch;
|
||||
use crate::heed_codec::facet::FacetGroupKey;
|
||||
use crate::update::del_add::{DelAdd, KvWriterDelAdd};
|
||||
use crate::update::{create_sorter, MergeDeladdBtreesetString};
|
||||
use crate::{
|
||||
BEU16StrCodec, FieldId, GlobalFieldsIdsMap, Index, LocalizedAttributesRule, Result,
|
||||
MAX_FACET_VALUE_LENGTH,
|
||||
BEU16StrCodec, FieldId, FieldIdMapMissingEntry, FilterableAttributesFeatures,
|
||||
FilterableAttributesRule, GlobalFieldsIdsMap, Index, InternalError, LocalizedAttributesRule,
|
||||
Result, MAX_FACET_VALUE_LENGTH,
|
||||
};
|
||||
|
||||
pub struct FacetSearchBuilder<'indexer> {
|
||||
|
@ -22,6 +24,7 @@ pub struct FacetSearchBuilder<'indexer> {
|
|||
normalized_facet_string_docids_sorter: Sorter<MergeDeladdBtreesetString>,
|
||||
global_fields_ids_map: GlobalFieldsIdsMap<'indexer>,
|
||||
localized_attributes_rules: Vec<LocalizedAttributesRule>,
|
||||
filterable_attributes_rules: Vec<FilterableAttributesRule>,
|
||||
// Buffered data below
|
||||
buffer: Vec<u8>,
|
||||
localized_field_ids: HashMap<FieldId, Option<Vec<Language>>>,
|
||||
|
@ -31,6 +34,7 @@ impl<'indexer> FacetSearchBuilder<'indexer> {
|
|||
pub fn new(
|
||||
global_fields_ids_map: GlobalFieldsIdsMap<'indexer>,
|
||||
localized_attributes_rules: Vec<LocalizedAttributesRule>,
|
||||
filterable_attributes_rules: Vec<FilterableAttributesRule>,
|
||||
) -> Self {
|
||||
let registered_facets = HashMap::new();
|
||||
let normalized_facet_string_docids_sorter = create_sorter(
|
||||
|
@ -49,6 +53,7 @@ impl<'indexer> FacetSearchBuilder<'indexer> {
|
|||
buffer: Vec::new(),
|
||||
global_fields_ids_map,
|
||||
localized_attributes_rules,
|
||||
filterable_attributes_rules,
|
||||
localized_field_ids: HashMap::new(),
|
||||
}
|
||||
}
|
||||
|
@ -60,6 +65,13 @@ impl<'indexer> FacetSearchBuilder<'indexer> {
|
|||
) -> Result<()> {
|
||||
let FacetGroupKey { field_id, level: _level, left_bound } = facet_key;
|
||||
|
||||
let filterable_attributes_features = self.filterable_attributes_features(field_id)?;
|
||||
|
||||
// if facet search is disabled, we don't need to register the facet
|
||||
if !filterable_attributes_features.is_facet_searchable() {
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
if deladd == DelAdd::Addition {
|
||||
self.registered_facets.entry(field_id).and_modify(|count| *count += 1).or_insert(1);
|
||||
}
|
||||
|
@ -83,6 +95,24 @@ impl<'indexer> FacetSearchBuilder<'indexer> {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
fn filterable_attributes_features(
|
||||
&mut self,
|
||||
field_id: u16,
|
||||
) -> Result<FilterableAttributesFeatures> {
|
||||
let Some(filterable_attributes_features) =
|
||||
self.global_fields_ids_map.metadata(field_id).map(|metadata| {
|
||||
metadata.filterable_attributes_features(&self.filterable_attributes_rules)
|
||||
})
|
||||
else {
|
||||
return Err(InternalError::FieldIdMapMissingEntry(FieldIdMapMissingEntry::FieldId {
|
||||
field_id,
|
||||
process: "facet_search_builder::register_from_key",
|
||||
})
|
||||
.into());
|
||||
};
|
||||
Ok(filterable_attributes_features)
|
||||
}
|
||||
|
||||
fn locales(&mut self, field_id: FieldId) -> Option<&[Language]> {
|
||||
if let Entry::Vacant(e) = self.localized_field_ids.entry(field_id) {
|
||||
let Some(field_name) = self.global_fields_ids_map.name(field_id) else {
|
||||
|
@ -92,7 +122,7 @@ impl<'indexer> FacetSearchBuilder<'indexer> {
|
|||
let locales = self
|
||||
.localized_attributes_rules
|
||||
.iter()
|
||||
.find(|rule| rule.match_str(field_name))
|
||||
.find(|rule| rule.match_str(field_name) == PatternMatch::Match)
|
||||
.map(|rule| rule.locales.clone());
|
||||
|
||||
e.insert(locales);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue