Refactor the FieldIdMapWithMetadata

**Changes:**
The FieldIdMapWithMetadata structure now stores more information about fields.
The metadata_for_field function computes all the needed information relying on the user provided data instead of the enriched data (searchable/sortable)
which may solve an indexing bug on sortable attributes that was not matching the nested fields.

The FieldIdMapWithMetadata structure was duplicated in the embeddings as FieldsIdsMapWithMetadata,
so the FieldsIdsMapWithMetadata has been removed in favor of FieldIdMapWithMetadata.

The Facet distribution is now relying on the FieldIdMapWithMetadata with metadata to match is a field can be faceted.

**Impact:**
- searchable attributes matching
- searchable attributes weight computation
- sortable attributes matching
- faceted fields matching
- prompt computing
- facet distribution
This commit is contained in:
ManyTheFish 2025-03-03 10:29:33 +01:00
parent 967033579d
commit 4f7ece2411
6 changed files with 281 additions and 143 deletions

View file

@ -105,6 +105,11 @@ impl<'indexing> GlobalFieldsIdsMap<'indexing> {
self.local.name(id)
}
/// Get the metadata of a field based on its id.
pub fn metadata(&self, id: FieldId) -> Option<Metadata> {
self.local.metadata(id).or_else(|| self.global.read().unwrap().metadata(id))
}
}
impl<'indexing> MutFieldIdMapper for GlobalFieldsIdsMap<'indexing> {

View file

@ -5,14 +5,29 @@ use charabia::Language;
use heed::RoTxn;
use super::FieldsIdsMap;
use crate::{FieldId, Index, LocalizedAttributesRule, Result};
use crate::attribute_patterns::{match_field_legacy, PatternMatch};
use crate::constants::{RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME};
use crate::{
is_faceted_by, FieldId, FilterableAttributesFeatures, FilterableAttributesRule, Index,
LocalizedAttributesRule, Result, Weight,
};
#[derive(Debug, Clone, Copy)]
pub struct Metadata {
pub searchable: bool,
pub filterable: bool,
/// The weight as defined in the FieldidsWeightsMap of the searchable attribute if it is searchable.
pub searchable: Option<Weight>,
/// The field is part of the sortable attributes.
pub sortable: bool,
localized_attributes_rule_id: Option<NonZeroU16>,
/// The field is defined as the distinct attribute.
pub distinct: bool,
/// The field has been defined as asc/desc in the ranking rules.
pub asc_desc: bool,
/// The field is a geo field.
pub geo: bool,
/// The id of the localized attributes rule if the field is localized.
pub localized_attributes_rule_id: Option<NonZeroU16>,
/// The id of the filterable attributes rule if the field is filterable.
pub filterable_attributes_rule_id: Option<NonZeroU16>,
}
#[derive(Debug, Clone)]
@ -106,76 +121,215 @@ impl Metadata {
let rule = rules.get((localized_attributes_rule_id - 1) as usize).unwrap();
Some(rule.locales())
}
pub fn filterable_attributes<'rules>(
&self,
rules: &'rules [FilterableAttributesRule],
) -> Option<&'rules FilterableAttributesRule> {
let filterable_attributes_rule_id = self.filterable_attributes_rule_id?.get();
// - 1: `filterable_attributes_rule_id` is NonZero
let rule = rules.get((filterable_attributes_rule_id - 1) as usize).unwrap();
Some(rule)
}
pub fn filterable_attributes_features(
&self,
rules: &[FilterableAttributesRule],
) -> FilterableAttributesFeatures {
self.filterable_attributes(rules)
.map(|rule| rule.features())
// if there is no filterable attributes rule, return no features
.unwrap_or_else(FilterableAttributesFeatures::no_features)
}
pub fn is_sortable(&self) -> bool {
self.sortable
}
pub fn is_searchable(&self) -> bool {
self.searchable.is_some()
}
pub fn searchable_weight(&self) -> Option<Weight> {
self.searchable
}
pub fn is_distinct(&self) -> bool {
self.distinct
}
pub fn is_asc_desc(&self) -> bool {
self.asc_desc
}
pub fn is_geo(&self) -> bool {
self.geo
}
/// Returns `true` if the field is part of the facet databases. (sortable, distinct, asc_desc, filterable or facet searchable)
pub fn is_faceted(&self, rules: &[FilterableAttributesRule]) -> bool {
if self.is_distinct() || self.is_sortable() || self.is_asc_desc() {
return true;
}
let features = self.filterable_attributes_features(rules);
if features.is_filterable() || features.is_facet_searchable() {
return true;
}
false
}
pub fn require_facet_level_database(&self, rules: &[FilterableAttributesRule]) -> bool {
let features = self.filterable_attributes_features(rules);
self.is_sortable() || self.is_asc_desc() || features.is_filterable_comparison()
}
}
#[derive(Debug, Clone)]
pub struct MetadataBuilder {
searchable_attributes: Vec<String>,
filterable_attributes: HashSet<String>,
searchable_attributes: Option<Vec<String>>,
filterable_attributes: Vec<FilterableAttributesRule>,
sortable_attributes: HashSet<String>,
localized_attributes: Option<Vec<LocalizedAttributesRule>>,
distinct_attribute: Option<String>,
asc_desc_attributes: HashSet<String>,
}
impl MetadataBuilder {
pub fn from_index(index: &Index, rtxn: &RoTxn) -> Result<Self> {
let searchable_attributes =
index.searchable_fields(rtxn)?.into_iter().map(|s| s.to_string()).collect();
let filterable_attributes = index.filterable_fields(rtxn)?;
let searchable_attributes = match index.user_defined_searchable_fields(rtxn)? {
Some(fields) if fields.contains(&"*") => None,
None => None,
Some(fields) => Some(fields.into_iter().map(|s| s.to_string()).collect()),
};
let filterable_attributes = index.filterable_attributes_rules(rtxn)?;
let sortable_attributes = index.sortable_fields(rtxn)?;
let localized_attributes = index.localized_attributes_rules(rtxn)?;
let distinct_attribute = index.distinct_field(rtxn)?.map(|s| s.to_string());
let asc_desc_attributes = index.asc_desc_fields(rtxn)?;
Ok(Self {
searchable_attributes,
filterable_attributes,
sortable_attributes,
localized_attributes,
distinct_attribute,
asc_desc_attributes,
})
}
#[cfg(test)]
/// Build a new `MetadataBuilder` from the given parameters.
///
/// This is used for testing, prefer using `MetadataBuilder::from_index` instead.
pub fn new(
searchable_attributes: Vec<String>,
filterable_attributes: HashSet<String>,
searchable_attributes: Option<Vec<String>>,
filterable_attributes: Vec<FilterableAttributesRule>,
sortable_attributes: HashSet<String>,
localized_attributes: Option<Vec<LocalizedAttributesRule>>,
distinct_attribute: Option<String>,
asc_desc_attributes: HashSet<String>,
) -> Self {
let searchable_attributes = match searchable_attributes {
Some(fields) if fields.iter().any(|f| f == "*") => None,
None => None,
Some(fields) => Some(fields),
};
Self {
searchable_attributes,
filterable_attributes,
sortable_attributes,
localized_attributes,
distinct_attribute,
asc_desc_attributes,
}
}
pub fn metadata_for_field(&self, field: &str) -> Metadata {
let searchable = self
.searchable_attributes
if is_faceted_by(field, RESERVED_VECTORS_FIELD_NAME) {
// Vectors fields are not searchable, filterable, distinct or asc_desc
return Metadata {
searchable: None,
sortable: false,
distinct: false,
asc_desc: false,
geo: false,
localized_attributes_rule_id: None,
filterable_attributes_rule_id: None,
};
}
// A field is sortable if it is faceted by a sortable attribute
let sortable = self
.sortable_attributes
.iter()
.any(|attribute| attribute == "*" || attribute == field);
.any(|pattern| match_field_legacy(pattern, field) == PatternMatch::Match);
let filterable = self.filterable_attributes.contains(field);
let filterable_attributes_rule_id = self
.filterable_attributes
.iter()
.position(|attribute| attribute.match_str(field) == PatternMatch::Match)
// saturating_add(1): make `id` `NonZero`
.map(|id| NonZeroU16::new(id.saturating_add(1).try_into().unwrap()).unwrap());
let sortable = self.sortable_attributes.contains(field);
if match_field_legacy(RESERVED_GEO_FIELD_NAME, field) == PatternMatch::Match {
// Geo fields are not searchable, distinct or asc_desc
return Metadata {
searchable: None,
sortable,
distinct: false,
asc_desc: false,
geo: true,
localized_attributes_rule_id: None,
filterable_attributes_rule_id,
};
}
let searchable = match &self.searchable_attributes {
// A field is searchable if it is faceted by a searchable attribute
Some(attributes) => attributes
.iter()
.enumerate()
.find(|(_i, pattern)| is_faceted_by(field, pattern))
.map(|(i, _)| i as u16),
None => Some(0),
};
let distinct =
self.distinct_attribute.as_ref().is_some_and(|distinct_field| field == distinct_field);
let asc_desc = self.asc_desc_attributes.contains(field);
let localized_attributes_rule_id = self
.localized_attributes
.iter()
.flat_map(|v| v.iter())
.position(|rule| rule.match_str(field))
.position(|rule| rule.match_str(field) == PatternMatch::Match)
// saturating_add(1): make `id` `NonZero`
.map(|id| NonZeroU16::new(id.saturating_add(1).try_into().unwrap()).unwrap());
Metadata { searchable, filterable, sortable, localized_attributes_rule_id }
Metadata {
searchable,
sortable,
distinct,
asc_desc,
geo: false,
localized_attributes_rule_id,
filterable_attributes_rule_id,
}
}
pub fn searchable_attributes(&self) -> &[String] {
self.searchable_attributes.as_slice()
pub fn searchable_attributes(&self) -> Option<&[String]> {
self.searchable_attributes.as_deref()
}
pub fn sortable_attributes(&self) -> &HashSet<String> {
&self.sortable_attributes
}
pub fn filterable_attributes(&self) -> &HashSet<String> {
pub fn filterable_attributes(&self) -> &[FilterableAttributesRule] {
&self.filterable_attributes
}