diff --git a/crates/meilisearch-types/src/settings.rs b/crates/meilisearch-types/src/settings.rs index edd596fef..54d305c26 100644 --- a/crates/meilisearch-types/src/settings.rs +++ b/crates/meilisearch-types/src/settings.rs @@ -12,7 +12,7 @@ use milli::index::{IndexEmbeddingConfig, PrefixSearch}; use milli::proximity::ProximityPrecision; use milli::update::Setting; use milli::{ - Criterion, CriterionError, FilterableAttributesSettings, Index, DEFAULT_VALUES_PER_FACET, + Criterion, CriterionError, FilterableAttributesRule, Index, DEFAULT_VALUES_PER_FACET, }; use serde::{Deserialize, Serialize, Serializer}; use utoipa::ToSchema; @@ -204,8 +204,8 @@ pub struct Settings { /// Attributes to use for faceting and filtering. See [Filtering and Faceted Search](https://www.meilisearch.com/docs/learn/filtering_and_sorting/search_with_facet_filters). #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] - #[schema(value_type = Option>, example = json!(["release_date", "genre"]))] - pub filterable_attributes: Setting>, + #[schema(value_type = Option>, example = json!(["release_date", "genre"]))] + pub filterable_attributes: Setting>, /// Attributes to use when sorting search results. #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] @@ -793,7 +793,7 @@ pub fn settings( .user_defined_searchable_fields(rtxn)? .map(|fields| fields.into_iter().map(String::from).collect()); - let filterable_attributes = index.filterable_fields(rtxn)?.into_iter().collect(); + let filterable_attributes = index.filterable_attributes_rules(rtxn)?.into_iter().collect(); let sortable_attributes = index.sortable_fields(rtxn)?.into_iter().collect(); diff --git a/crates/milli/src/fields_ids_map/global.rs b/crates/milli/src/fields_ids_map/global.rs index 2ffc45eb7..e5f1212df 100644 --- a/crates/milli/src/fields_ids_map/global.rs +++ b/crates/milli/src/fields_ids_map/global.rs @@ -105,6 +105,11 @@ impl<'indexing> GlobalFieldsIdsMap<'indexing> { self.local.name(id) } + + /// Get the metadata of a field based on its id. + pub fn metadata(&self, id: FieldId) -> Option { + self.local.metadata(id).or_else(|| self.global.read().unwrap().metadata(id)) + } } impl<'indexing> MutFieldIdMapper for GlobalFieldsIdsMap<'indexing> { diff --git a/crates/milli/src/fields_ids_map/metadata.rs b/crates/milli/src/fields_ids_map/metadata.rs index 05a509baf..2138e8ad3 100644 --- a/crates/milli/src/fields_ids_map/metadata.rs +++ b/crates/milli/src/fields_ids_map/metadata.rs @@ -7,7 +7,7 @@ use heed::RoTxn; use super::FieldsIdsMap; use crate::attribute_patterns::PatternMatch; use crate::{ - FieldId, FilterableAttributesFeatures, FilterableAttributesSettings, Index, + is_faceted_by, FieldId, FilterableAttributesFeatures, FilterableAttributesRule, Index, LocalizedAttributesRule, Result, }; @@ -113,8 +113,8 @@ impl Metadata { pub fn filterable_attributes<'rules>( &self, - rules: &'rules [FilterableAttributesSettings], - ) -> Option<&'rules FilterableAttributesSettings> { + rules: &'rules [FilterableAttributesRule], + ) -> Option<&'rules FilterableAttributesRule> { let filterable_attributes_rule_id = self.filterable_attributes_rule_id?.get(); // - 1: `filterable_attributes_rule_id` is NonZero let rule = rules.get((filterable_attributes_rule_id - 1) as usize).unwrap(); @@ -123,7 +123,7 @@ impl Metadata { pub fn filterable_attributes_features( &self, - rules: &[FilterableAttributesSettings], + rules: &[FilterableAttributesRule], ) -> FilterableAttributesFeatures { self.filterable_attributes(rules) .map(|rule| rule.features()) @@ -138,21 +138,42 @@ impl Metadata { pub fn is_searchable(&self) -> bool { self.searchable } + + /// Returns `true` if the field is part of the facet databases. (sortable, filterable, or facet searchable) + pub fn is_faceted(&self, rules: &[FilterableAttributesRule]) -> bool { + if self.is_sortable() { + return true; + } + + let features = self.filterable_attributes_features(&rules); + if features.is_filterable() { + return true; + } + + if features.is_facet_searchable() { + return true; + } + + false + } } #[derive(Debug, Clone)] pub struct MetadataBuilder { - searchable_attributes: Vec, - filterable_attributes: Vec, + searchable_attributes: Option>, + filterable_attributes: Vec, sortable_attributes: HashSet, localized_attributes: Option>, } impl MetadataBuilder { pub fn from_index(index: &Index, rtxn: &RoTxn) -> Result { - let searchable_attributes = - index.searchable_fields(rtxn)?.into_iter().map(|s| s.to_string()).collect(); - let filterable_attributes = index.filterable_fields(rtxn)?; + let searchable_attributes = match index.user_defined_searchable_fields(rtxn)? { + Some(fields) if fields.contains(&"*") => None, + None => None, + Some(fields) => Some(fields.into_iter().map(|s| s.to_string()).collect()), + }; + let filterable_attributes = index.filterable_attributes_rules(rtxn)?; let sortable_attributes = index.sortable_fields(rtxn)?; let localized_attributes = index.localized_attributes_rules(rtxn)?; @@ -164,27 +185,35 @@ impl MetadataBuilder { }) } - pub fn new( - searchable_attributes: Vec, - filterable_attributes: Vec, - sortable_attributes: HashSet, - localized_attributes: Option>, - ) -> Self { - Self { - searchable_attributes, - filterable_attributes, - sortable_attributes, - localized_attributes, - } - } + // pub fn new( + // searchable_attributes: Option>, + // filterable_attributes: Vec, + // sortable_attributes: HashSet, + // localized_attributes: Option>, + // ) -> Self { + // let searchable_attributes = match searchable_attributes { + // Some(fields) if fields.iter().any(|f| f == "*") => None, + // None => None, + // Some(fields) => Some(fields), + // }; + + // Self { + // searchable_attributes, + // filterable_attributes, + // sortable_attributes, + // localized_attributes, + // } + // } pub fn metadata_for_field(&self, field: &str) -> Metadata { - let searchable = self - .searchable_attributes - .iter() - .any(|attribute| attribute == "*" || attribute == field); + let searchable = match &self.searchable_attributes { + // A field is searchable if it is faceted by a searchable attribute + Some(attributes) => attributes.iter().any(|pattern| is_faceted_by(field, pattern)), + None => true, + }; - let sortable = self.sortable_attributes.contains(field); + // A field is sortable if it is faceted by a sortable attribute + let sortable = self.sortable_attributes.iter().any(|pattern| is_faceted_by(field, pattern)); let localized_attributes_rule_id = self .localized_attributes @@ -209,15 +238,15 @@ impl MetadataBuilder { } } - pub fn searchable_attributes(&self) -> &[String] { - self.searchable_attributes.as_slice() + pub fn searchable_attributes(&self) -> Option<&[String]> { + self.searchable_attributes.as_deref() } pub fn sortable_attributes(&self) -> &HashSet { &self.sortable_attributes } - pub fn filterable_attributes(&self) -> &[FilterableAttributesSettings] { + pub fn filterable_attributes(&self) -> &[FilterableAttributesRule] { &self.filterable_attributes } diff --git a/crates/milli/src/filterable_fields.rs b/crates/milli/src/filterable_attributes_rules.rs similarity index 83% rename from crates/milli/src/filterable_fields.rs rename to crates/milli/src/filterable_attributes_rules.rs index 10329f966..f5291b5b8 100644 --- a/crates/milli/src/filterable_fields.rs +++ b/crates/milli/src/filterable_attributes_rules.rs @@ -11,29 +11,27 @@ use crate::{ #[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug, ToSchema)] #[serde(untagged)] -pub enum FilterableAttributesSettings { +pub enum FilterableAttributesRule { Field(String), Pattern(FilterableAttributesPatterns), } -impl FilterableAttributesSettings { +impl FilterableAttributesRule { pub fn match_str(&self, field: &str) -> PatternMatch { match self { - FilterableAttributesSettings::Field(pattern) => match_field_legacy(pattern, field), - FilterableAttributesSettings::Pattern(patterns) => patterns.match_str(field), + FilterableAttributesRule::Field(pattern) => match_field_legacy(pattern, field), + FilterableAttributesRule::Pattern(patterns) => patterns.match_str(field), } } pub fn has_geo(&self) -> bool { - matches!(self, FilterableAttributesSettings::Field(field_name) if field_name == RESERVED_GEO_FIELD_NAME) + matches!(self, FilterableAttributesRule::Field(field_name) if field_name == RESERVED_GEO_FIELD_NAME) } pub fn features(&self) -> FilterableAttributesFeatures { match self { - FilterableAttributesSettings::Field(_) => { - FilterableAttributesFeatures::legacy_default() - } - FilterableAttributesSettings::Pattern(patterns) => patterns.features(), + FilterableAttributesRule::Field(_) => FilterableAttributesFeatures::legacy_default(), + FilterableAttributesRule::Pattern(patterns) => patterns.features(), } } } @@ -93,6 +91,11 @@ impl FilterableAttributesFeatures { self.filter != FilterFeature::Disabled } + /// Check if `IS EXISTS` is allowed + pub fn is_filterable_exists(&self) -> bool { + self.filter != FilterFeature::Disabled + } + /// Check if `<`, `>`, `<=`, `>=` or `TO` are allowed pub fn is_filterable_order(&self) -> bool { self.filter == FilterFeature::Order @@ -104,7 +107,7 @@ impl FilterableAttributesFeatures { } } -impl Deserr for FilterableAttributesSettings { +impl Deserr for FilterableAttributesRule { fn deserialize_from_value( value: deserr::Value, location: ValuePointerRef, @@ -127,7 +130,7 @@ pub enum FilterFeature { } pub fn matching_field_ids( - filterable_attributes: &[FilterableAttributesSettings], + filterable_attributes: &[FilterableAttributesRule], fields_ids_map: &FieldsIdsMap, ) -> HashSet { let mut result = HashSet::new(); @@ -142,14 +145,14 @@ pub fn matching_field_ids( } pub fn matching_field_names<'fim>( - filterable_attributes: &[FilterableAttributesSettings], + filterable_attributes: &[FilterableAttributesRule], fields_ids_map: &'fim FieldsIdsMap, ) -> BTreeSet<&'fim str> { filtered_matching_field_names(filterable_attributes, fields_ids_map, &|_| true) } pub fn filtered_matching_field_names<'fim>( - filterable_attributes: &[FilterableAttributesSettings], + filterable_attributes: &[FilterableAttributesRule], fields_ids_map: &'fim FieldsIdsMap, filter: &impl Fn(&FilterableAttributesFeatures) -> bool, ) -> BTreeSet<&'fim str> { @@ -169,7 +172,7 @@ pub fn filtered_matching_field_names<'fim>( pub fn matching_features( field_name: &str, - filterable_attributes: &[FilterableAttributesSettings], + filterable_attributes: &[FilterableAttributesRule], ) -> Option { for filterable_attribute in filterable_attributes { if filterable_attribute.match_str(field_name) == PatternMatch::Match { @@ -181,7 +184,7 @@ pub fn matching_features( pub fn is_field_filterable( field_name: &str, - filterable_attributes: &[FilterableAttributesSettings], + filterable_attributes: &[FilterableAttributesRule], ) -> bool { matching_features(field_name, filterable_attributes) .map_or(false, |features| features.is_filterable()) @@ -189,7 +192,7 @@ pub fn is_field_filterable( pub fn match_pattern_by_features( field_name: &str, - filterable_attributes: &[FilterableAttributesSettings], + filterable_attributes: &[FilterableAttributesRule], filter: &impl Fn(&FilterableAttributesFeatures) -> bool, ) -> PatternMatch { let mut selection = PatternMatch::NoMatch; diff --git a/crates/milli/src/index.rs b/crates/milli/src/index.rs index aa96a9c7f..52021d919 100644 --- a/crates/milli/src/index.rs +++ b/crates/milli/src/index.rs @@ -15,7 +15,7 @@ use crate::constants::{RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME}; use crate::documents::PrimaryKey; use crate::error::{InternalError, UserError}; use crate::fields_ids_map::FieldsIdsMap; -use crate::filterable_fields::{match_pattern_by_features, matching_field_ids}; +use crate::filterable_attributes_rules::match_pattern_by_features; use crate::heed_codec::facet::{ FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, FieldIdCodec, OrderedF64Codec, @@ -27,7 +27,7 @@ use crate::vector::{ArroyWrapper, Embedding, EmbeddingConfig}; use crate::{ default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec, - FieldidsWeightsMap, FilterableAttributesSettings, GeoPoint, LocalizedAttributesRule, ObkvCodec, + FieldidsWeightsMap, FilterableAttributesRule, GeoPoint, LocalizedAttributesRule, ObkvCodec, Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, Weight, BEU16, BEU32, BEU64, }; @@ -786,11 +786,11 @@ impl Index { /* filterable fields */ - /// Writes the filterable fields names in the database. - pub(crate) fn put_filterable_fields( + /// Writes the filterable attributes rules in the database. + pub(crate) fn put_filterable_attributes_rules( &self, wtxn: &mut RwTxn<'_>, - fields: &Vec, + fields: &Vec, ) -> heed::Result<()> { self.main.remap_types::>().put( wtxn, @@ -799,16 +799,19 @@ impl Index { ) } - /// Deletes the filterable fields ids in the database. - pub(crate) fn delete_filterable_fields(&self, wtxn: &mut RwTxn<'_>) -> heed::Result { + /// Deletes the filterable attributes rules in the database. + pub(crate) fn delete_filterable_attributes_rules( + &self, + wtxn: &mut RwTxn<'_>, + ) -> heed::Result { self.main.remap_key_type::().delete(wtxn, main_key::FILTERABLE_FIELDS_KEY) } - /// Returns the filterable fields setting value. - pub fn filterable_fields( + /// Returns the filterable attributes rules. + pub fn filterable_attributes_rules( &self, rtxn: &RoTxn<'_>, - ) -> heed::Result> { + ) -> heed::Result> { Ok(self .main .remap_types::>() @@ -817,14 +820,14 @@ impl Index { } /// Returns the filterable fields ids. - pub fn filterable_fields_ids(&self, rtxn: &RoTxn<'_>) -> Result> { - let fields = self.filterable_fields(rtxn)?; - let fields_ids_map = self.fields_ids_map(rtxn)?; + // pub fn filterable_fields_ids(&self, rtxn: &RoTxn<'_>) -> Result> { + // let fields = self.filterable_attributes_rules(rtxn)?; + // let fields_ids_map = self.fields_ids_map(rtxn)?; - let matching_field_ids = matching_field_ids(&fields, &fields_ids_map); + // let matching_field_ids = matching_field_ids(&fields, &fields_ids_map); - Ok(matching_field_ids) - } + // Ok(matching_field_ids) + // } /* sortable fields */ @@ -865,17 +868,17 @@ impl Index { /* faceted fields */ /// Writes the faceted fields in the database. - pub(crate) fn put_faceted_fields( - &self, - wtxn: &mut RwTxn<'_>, - fields: &HashSet, - ) -> heed::Result<()> { - self.main.remap_types::>().put( - wtxn, - main_key::HIDDEN_FACETED_FIELDS_KEY, - fields, - ) - } + // pub(crate) fn put_faceted_fields( + // &self, + // wtxn: &mut RwTxn<'_>, + // fields: &HashSet, + // ) -> heed::Result<()> { + // self.main.remap_types::>().put( + // wtxn, + // main_key::HIDDEN_FACETED_FIELDS_KEY, + // fields, + // ) + // } /// Returns true if the geo feature is activated. pub fn is_geo_activated(&self, rtxn: &RoTxn<'_>) -> Result { @@ -892,25 +895,26 @@ impl Index { /// Returns true if the geo filtering feature is activated. pub fn is_geo_filtering_activated(&self, rtxn: &RoTxn<'_>) -> Result { - let geo_filter = self.filterable_fields(rtxn)?.iter().any(|field| field.has_geo()); + let geo_filter = + self.filterable_attributes_rules(rtxn)?.iter().any(|field| field.has_geo()); Ok(geo_filter) } /// Returns the field ids of the fields that are filterable using the ordering operators or are sortable. - pub fn facet_leveled_field_ids(&self, rtxn: &RoTxn<'_>) -> Result> { - let filterable_fields = self.filterable_fields(rtxn)?; + pub fn facet_leveled_field_ids(&self, rtxn: &RoTxn<'_>) -> Result> { + let filterable_fields = self.filterable_attributes_rules(rtxn)?; let sortable_fields = self.sortable_fields(rtxn)?; let fields_ids_map = self.fields_ids_map(rtxn)?; - let mut fields_ids = Vec::new(); + let mut fields_ids = HashSet::new(); for (field_id, field_name) in fields_ids_map.iter() { if match_pattern_by_features(field_name, &filterable_fields, &|features| { features.is_filterable_order() }) == PatternMatch::Match { - fields_ids.push(field_id); + fields_ids.insert(field_id); } else if sortable_fields.contains(field_name) { - fields_ids.push(field_id); + fields_ids.insert(field_id); } } @@ -1778,7 +1782,7 @@ pub(crate) mod tests { use crate::vector::settings::{EmbedderSource, EmbeddingSettings}; use crate::vector::EmbeddingConfigs; use crate::{ - db_snap, obkv_to_json, Filter, FilterableAttributesSettings, Index, Search, SearchResult, + db_snap, obkv_to_json, Filter, FilterableAttributesRule, Index, Search, SearchResult, ThreadPoolNoAbortBuilder, }; @@ -2209,7 +2213,7 @@ pub(crate) mod tests { index .update_settings(|settings| { - settings.set_filterable_fields(vec![FilterableAttributesSettings::Field( + settings.set_filterable_fields(vec![FilterableAttributesRule::Field( RESERVED_GEO_FIELD_NAME.to_string(), )]); }) @@ -2319,7 +2323,7 @@ pub(crate) mod tests { index .update_settings(|settings| { - settings.set_filterable_fields(vec![FilterableAttributesSettings::Field( + settings.set_filterable_fields(vec![FilterableAttributesRule::Field( "doggo".to_string(), )]); }) @@ -2363,7 +2367,7 @@ pub(crate) mod tests { index .update_settings(|settings| { settings.set_primary_key("id".to_owned()); - settings.set_filterable_fields(vec![FilterableAttributesSettings::Field( + settings.set_filterable_fields(vec![FilterableAttributesRule::Field( "doggo".to_string(), )]); }) @@ -2898,7 +2902,7 @@ pub(crate) mod tests { index .update_settings(|settings| { settings.set_primary_key("id".to_string()); - settings.set_filterable_fields(vec![FilterableAttributesSettings::Field( + settings.set_filterable_fields(vec![FilterableAttributesRule::Field( RESERVED_GEO_FIELD_NAME.to_string(), )]); }) @@ -2934,7 +2938,7 @@ pub(crate) mod tests { index .update_settings(|settings| { settings.set_primary_key("id".to_string()); - settings.set_filterable_fields(vec![FilterableAttributesSettings::Field( + settings.set_filterable_fields(vec![FilterableAttributesRule::Field( RESERVED_GEO_FIELD_NAME.to_string(), )]); }) @@ -2969,7 +2973,7 @@ pub(crate) mod tests { index .update_settings(|settings| { settings.set_searchable_fields(vec![S("name")]); - settings.set_filterable_fields(vec![FilterableAttributesSettings::Field( + settings.set_filterable_fields(vec![FilterableAttributesRule::Field( "age".to_string(), )]); }) @@ -2993,7 +2997,7 @@ pub(crate) mod tests { index .update_settings(|settings| { settings.set_searchable_fields(vec![S("name"), S("realName")]); - settings.set_filterable_fields(vec![FilterableAttributesSettings::Field( + settings.set_filterable_fields(vec![FilterableAttributesRule::Field( "age".to_string(), )]); }) @@ -3095,8 +3099,8 @@ pub(crate) mod tests { .update_settings(|settings| { settings.set_searchable_fields(vec![S("_vectors"), S("_vectors.doggo")]); settings.set_filterable_fields(vec![ - FilterableAttributesSettings::Field("_vectors".to_string()), - FilterableAttributesSettings::Field("_vectors.doggo".to_string()), + FilterableAttributesRule::Field("_vectors".to_string()), + FilterableAttributesRule::Field("_vectors.doggo".to_string()), ]); }) .unwrap(); diff --git a/crates/milli/src/lib.rs b/crates/milli/src/lib.rs index dc9bc4911..3e90af100 100644 --- a/crates/milli/src/lib.rs +++ b/crates/milli/src/lib.rs @@ -15,7 +15,7 @@ mod error; mod external_documents_ids; pub mod facet; mod fields_ids_map; -mod filterable_fields; +mod filterable_attributes_rules; pub mod heed_codec; pub mod index; mod localized_attributes_rules; @@ -61,8 +61,8 @@ pub use self::error::{ pub use self::external_documents_ids::ExternalDocumentsIds; pub use self::fieldids_weights_map::FieldidsWeightsMap; pub use self::fields_ids_map::{FieldsIdsMap, GlobalFieldsIdsMap}; -pub use self::filterable_fields::{ - FilterableAttributesFeatures, FilterableAttributesPatterns, FilterableAttributesSettings, +pub use self::filterable_attributes_rules::{ + FilterableAttributesFeatures, FilterableAttributesPatterns, FilterableAttributesRule, }; pub use self::heed_codec::{ BEU16StrCodec, BEU32StrCodec, BoRoaringBitmapCodec, BoRoaringBitmapLenCodec, diff --git a/crates/milli/src/search/facet/facet_distribution.rs b/crates/milli/src/search/facet/facet_distribution.rs index fa9e64a1c..769fa4592 100644 --- a/crates/milli/src/search/facet/facet_distribution.rs +++ b/crates/milli/src/search/facet/facet_distribution.rs @@ -1,4 +1,4 @@ -use std::collections::{BTreeMap, HashMap, HashSet}; +use std::collections::{BTreeMap, HashMap}; use std::fmt::Display; use std::ops::ControlFlow; use std::{fmt, mem}; @@ -9,9 +9,8 @@ use indexmap::IndexMap; use roaring::RoaringBitmap; use serde::{Deserialize, Serialize}; -use crate::error::UserError; use crate::facet::FacetType; -use crate::filterable_fields::{is_field_filterable, matching_field_names}; +use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, Metadata, MetadataBuilder}; use crate::heed_codec::facet::{ FacetGroupKeyCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, OrderedF64Codec, }; @@ -19,7 +18,7 @@ use crate::heed_codec::{BytesRefCodec, StrRefCodec}; use crate::search::facet::facet_distribution_iter::{ count_iterate_over_facet_distribution, lexicographically_iterate_over_facet_distribution, }; -use crate::{FieldId, FieldsIdsMap, Index, Result}; +use crate::{FieldId, FilterableAttributesRule, Index, Result}; /// The default number of values by facets that will /// be fetched from the key-value store. @@ -281,18 +280,22 @@ impl<'a> FacetDistribution<'a> { } pub fn compute_stats(&self) -> Result> { - let fields_ids_map = self.index.fields_ids_map(self.rtxn)?; let candidates = if let Some(candidates) = self.candidates.clone() { candidates } else { return Ok(Default::default()); }; - let fields = self.faceted_fields_names(&fields_ids_map)?; + let fields_ids_map = self.index.fields_ids_map(self.rtxn)?; + let fields_ids_map = FieldIdMapWithMetadata::new( + fields_ids_map, + MetadataBuilder::from_index(self.index, self.rtxn)?, + ); + let filterable_attributes_rules = self.index.filterable_attributes_rules(self.rtxn)?; let mut distribution = BTreeMap::new(); - for (fid, name) in fields_ids_map.iter() { - if crate::is_faceted(name, &fields) { + for (fid, name, metadata) in fields_ids_map.iter() { + if self.select_field(name, &metadata, &filterable_attributes_rules) { let min_value = if let Some(min_value) = crate::search::facet::facet_min_value( self.index, self.rtxn, @@ -323,12 +326,15 @@ impl<'a> FacetDistribution<'a> { pub fn execute(&self) -> Result>> { let fields_ids_map = self.index.fields_ids_map(self.rtxn)?; - - let fields = self.faceted_fields_names(&fields_ids_map)?; + let fields_ids_map = FieldIdMapWithMetadata::new( + fields_ids_map, + MetadataBuilder::from_index(self.index, self.rtxn)?, + ); + let filterable_attributes_rules = self.index.filterable_attributes_rules(self.rtxn)?; let mut distribution = BTreeMap::new(); - for (fid, name) in fields_ids_map.iter() { - if crate::is_faceted(name, &fields) { + for (fid, name, metadata) in fields_ids_map.iter() { + if self.select_field(name, &metadata, &filterable_attributes_rules) { let order_by = self .facets .as_ref() @@ -342,38 +348,21 @@ impl<'a> FacetDistribution<'a> { Ok(distribution) } - fn faceted_fields_names(&self, fields_ids_map: &FieldsIdsMap) -> Result> { - /// TODO: @many: this is a bit of a mess, we should refactor it - let filterable_fields = self.index.filterable_fields(self.rtxn)?; - let fields = match &self.facets { - Some(facets) => { - let invalid_fields: HashSet<_> = facets - .iter() - .map(|(name, _)| name) - .filter(|facet| !is_field_filterable(facet, &filterable_fields)) - .collect(); - if !invalid_fields.is_empty() { - let valid_facets_name = - matching_field_names(&filterable_fields, &fields_ids_map); - return Err(UserError::InvalidFacetsDistribution { - invalid_facets_name: invalid_fields.into_iter().cloned().collect(), - valid_facets_name: valid_facets_name - .into_iter() - .map(String::from) - .collect(), - } - .into()); - } else { - facets.iter().map(|(name, _)| name).cloned().collect() - } - } - None => matching_field_names(&filterable_fields, &fields_ids_map) - .into_iter() - .map(String::from) - .collect(), - }; + /// Select a field if it is faceted and in the facets. + fn select_field( + &self, + name: &str, + metadata: &Metadata, + filterable_attributes_rules: &[FilterableAttributesRule], + ) -> bool { + if !metadata.is_faceted(filterable_attributes_rules) { + return false; + } - Ok(fields) + match &self.facets { + Some(facets) => facets.contains_key(name), + None => true, + } } } @@ -405,7 +394,7 @@ mod tests { use crate::documents::mmap_from_objects; use crate::index::tests::TempIndex; - use crate::{milli_snap, FacetDistribution, FilterableAttributesSettings, OrderBy}; + use crate::{milli_snap, FacetDistribution, FilterableAttributesRule, OrderBy}; #[test] fn few_candidates_few_facet_values() { @@ -416,8 +405,7 @@ mod tests { index .update_settings(|settings| { - settings - .set_filterable_fields(vec![FilterableAttributesSettings::Field(S("colour"))]) + settings.set_filterable_fields(vec![FilterableAttributesRule::Field(S("colour"))]) }) .unwrap(); @@ -490,8 +478,7 @@ mod tests { index .update_settings(|settings| { - settings - .set_filterable_fields(vec![FilterableAttributesSettings::Field(S("colour"))]) + settings.set_filterable_fields(vec![FilterableAttributesRule::Field(S("colour"))]) }) .unwrap(); @@ -578,8 +565,7 @@ mod tests { index .update_settings(|settings| { - settings - .set_filterable_fields(vec![FilterableAttributesSettings::Field(S("colour"))]) + settings.set_filterable_fields(vec![FilterableAttributesRule::Field(S("colour"))]) }) .unwrap(); @@ -640,8 +626,7 @@ mod tests { index .update_settings(|settings| { - settings - .set_filterable_fields(vec![FilterableAttributesSettings::Field(S("colour"))]) + settings.set_filterable_fields(vec![FilterableAttributesRule::Field(S("colour"))]) }) .unwrap(); @@ -694,8 +679,7 @@ mod tests { index .update_settings(|settings| { - settings - .set_filterable_fields(vec![FilterableAttributesSettings::Field(S("colour"))]) + settings.set_filterable_fields(vec![FilterableAttributesRule::Field(S("colour"))]) }) .unwrap(); @@ -748,8 +732,7 @@ mod tests { index .update_settings(|settings| { - settings - .set_filterable_fields(vec![FilterableAttributesSettings::Field(S("colour"))]) + settings.set_filterable_fields(vec![FilterableAttributesRule::Field(S("colour"))]) }) .unwrap(); @@ -802,8 +785,7 @@ mod tests { index .update_settings(|settings| { - settings - .set_filterable_fields(vec![FilterableAttributesSettings::Field(S("colour"))]) + settings.set_filterable_fields(vec![FilterableAttributesRule::Field(S("colour"))]) }) .unwrap(); diff --git a/crates/milli/src/search/facet/filter.rs b/crates/milli/src/search/facet/filter.rs index 97e59fc0a..cf54754bf 100644 --- a/crates/milli/src/search/facet/filter.rs +++ b/crates/milli/src/search/facet/filter.rs @@ -12,7 +12,7 @@ use serde_json::Value; use super::facet_range_search; use crate::constants::RESERVED_GEO_FIELD_NAME; use crate::error::{Error, UserError}; -use crate::filterable_fields::{ +use crate::filterable_attributes_rules::{ filtered_matching_field_names, is_field_filterable, matching_features, }; use crate::heed_codec::facet::{ @@ -21,7 +21,7 @@ use crate::heed_codec::facet::{ use crate::index::db_name::FACET_ID_STRING_DOCIDS; use crate::{ distance_between_two_points, lat_lng_to_xyz, FieldId, FilterableAttributesFeatures, - FilterableAttributesSettings, Index, InternalError, Result, SerializationError, + FilterableAttributesRule, Index, InternalError, Result, SerializationError, }; /// The maximum number of filters the filter AST can process. @@ -233,22 +233,22 @@ impl<'a> Filter<'a> { impl<'a> Filter<'a> { pub fn evaluate(&self, rtxn: &heed::RoTxn<'_>, index: &Index) -> Result { // to avoid doing this for each recursive call we're going to do it ONCE ahead of time - let filterable_fields = index.filterable_fields(rtxn)?; + let filterable_attributes_rules = index.filterable_attributes_rules(rtxn)?; for fid in self.condition.fids(MAX_FILTER_DEPTH) { let attribute = fid.value(); - if !is_field_filterable(attribute, &filterable_fields) { + if !is_field_filterable(attribute, &filterable_attributes_rules) { let fields_ids_map = index.fields_ids_map(rtxn)?; return Err(fid.as_external_error(FilterError::AttributeNotFilterable { attribute, filterable_fields: filtered_matching_field_names( - &filterable_fields, + &filterable_attributes_rules, &fields_ids_map, &|features| features.is_filterable(), ), }))?; } } - self.inner_evaluate(rtxn, index, &filterable_fields, None) + self.inner_evaluate(rtxn, index, &filterable_attributes_rules, None) } fn evaluate_operator( @@ -278,6 +278,18 @@ impl<'a> Filter<'a> { /// TODO produce an dedicated error for this todo!("filtering on non-ordered fields is not supported, return an error") } + Condition::Empty if !features.is_filterable_empty() => { + /// TODO produce an dedicated error for this + todo!("filtering on non-empty fields is not supported, return an error") + } + Condition::Null if !features.is_filterable_null() => { + /// TODO produce an dedicated error for this + todo!("filtering on non-null fields is not supported, return an error") + } + Condition::Exists if !features.is_filterable_exists() => { + /// TODO produce an dedicated error for this + todo!("filtering on non-exists fields is not supported, return an error") + } Condition::GreaterThan(val) => { (Excluded(val.parse_finite_float()?), Included(f64::MAX)) } @@ -430,7 +442,7 @@ impl<'a> Filter<'a> { &self, rtxn: &heed::RoTxn<'_>, index: &Index, - filterable_fields: &[FilterableAttributesSettings], + filterable_fields: &[FilterableAttributesRule], universe: Option<&RoaringBitmap>, ) -> Result { if universe.map_or(false, |u| u.is_empty()) { @@ -742,7 +754,7 @@ mod tests { use crate::constants::RESERVED_GEO_FIELD_NAME; use crate::index::tests::TempIndex; - use crate::{Filter, FilterableAttributesSettings}; + use crate::{Filter, FilterableAttributesRule}; #[test] fn empty_db() { @@ -750,7 +762,7 @@ mod tests { //Set the filterable fields to be the channel. index .update_settings(|settings| { - settings.set_filterable_fields(vec![FilterableAttributesSettings::Field( + settings.set_filterable_fields(vec![FilterableAttributesRule::Field( "PrIcE".to_string(), )]); }) @@ -856,7 +868,7 @@ mod tests { index .update_settings(|settings| { settings.set_searchable_fields(vec![S("title")]); - settings.set_filterable_fields(vec![FilterableAttributesSettings::Field( + settings.set_filterable_fields(vec![FilterableAttributesRule::Field( "title".to_string(), )]); }) @@ -924,7 +936,7 @@ mod tests { index .update_settings(|settings| { - settings.set_filterable_fields(vec![FilterableAttributesSettings::Field( + settings.set_filterable_fields(vec![FilterableAttributesRule::Field( "monitor_diagonal".to_string(), )]); }) @@ -957,7 +969,7 @@ mod tests { index .update_settings(|settings| { - settings.set_filterable_fields(vec![FilterableAttributesSettings::Field(S( + settings.set_filterable_fields(vec![FilterableAttributesRule::Field(S( RESERVED_GEO_FIELD_NAME, ))]); }) @@ -1007,8 +1019,8 @@ mod tests { .update_settings(|settings| { settings.set_searchable_fields(vec![S(RESERVED_GEO_FIELD_NAME), S("price")]); // to keep the fields order settings.set_filterable_fields(vec![ - FilterableAttributesSettings::Field(S(RESERVED_GEO_FIELD_NAME)), - FilterableAttributesSettings::Field("price".to_string()), + FilterableAttributesRule::Field(S(RESERVED_GEO_FIELD_NAME)), + FilterableAttributesRule::Field("price".to_string()), ]); }) .unwrap(); @@ -1060,8 +1072,8 @@ mod tests { .update_settings(|settings| { settings.set_searchable_fields(vec![S(RESERVED_GEO_FIELD_NAME), S("price")]); // to keep the fields order settings.set_filterable_fields(vec![ - FilterableAttributesSettings::Field(S(RESERVED_GEO_FIELD_NAME)), - FilterableAttributesSettings::Field("price".to_string()), + FilterableAttributesRule::Field(S(RESERVED_GEO_FIELD_NAME)), + FilterableAttributesRule::Field("price".to_string()), ]); }) .unwrap(); @@ -1172,7 +1184,7 @@ mod tests { index .update_settings(|settings| { settings.set_searchable_fields(vec![S("price")]); // to keep the fields order - settings.set_filterable_fields(vec![FilterableAttributesSettings::Field( + settings.set_filterable_fields(vec![FilterableAttributesRule::Field( "price".to_string(), )]); }) @@ -1231,9 +1243,9 @@ mod tests { .update_settings(|settings| { settings.set_primary_key("id".to_owned()); settings.set_filterable_fields(vec![ - FilterableAttributesSettings::Field("id".to_string()), - FilterableAttributesSettings::Field("one".to_string()), - FilterableAttributesSettings::Field("two".to_string()), + FilterableAttributesRule::Field("id".to_string()), + FilterableAttributesRule::Field("one".to_string()), + FilterableAttributesRule::Field("two".to_string()), ]); }) .unwrap(); diff --git a/crates/milli/src/search/facet/search.rs b/crates/milli/src/search/facet/search.rs index 4a0e58b33..e562ff232 100644 --- a/crates/milli/src/search/facet/search.rs +++ b/crates/milli/src/search/facet/search.rs @@ -10,7 +10,7 @@ use roaring::RoaringBitmap; use tracing::error; use crate::error::UserError; -use crate::filterable_fields::{is_field_filterable, matching_field_names}; +use crate::filterable_attributes_rules::{is_field_filterable, matching_field_names}; use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue}; use crate::search::build_dfa; use crate::{DocumentId, FieldId, OrderBy, Result, Search}; @@ -74,7 +74,7 @@ impl<'a> SearchForFacetValues<'a> { let index = self.search_query.index; let rtxn = self.search_query.rtxn; - let filterable_fields = index.filterable_fields(rtxn)?; + let filterable_fields = index.filterable_attributes_rules(rtxn)?; if !is_field_filterable(&self.facet, &filterable_fields) { let fields_ids_map = index.fields_ids_map(rtxn)?; let matching_field_names = matching_field_names(&filterable_fields, &fields_ids_map); diff --git a/crates/milli/src/search/mod.rs b/crates/milli/src/search/mod.rs index 80a3ccc2c..891f98557 100644 --- a/crates/milli/src/search/mod.rs +++ b/crates/milli/src/search/mod.rs @@ -9,7 +9,7 @@ use roaring::bitmap::RoaringBitmap; pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET}; pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords}; use self::new::{execute_vector_search, PartialSearchResult}; -use crate::filterable_fields::{is_field_filterable, matching_field_names}; +use crate::filterable_attributes_rules::{is_field_filterable, matching_field_names}; use crate::score_details::{ScoreDetails, ScoringStrategy}; use crate::vector::Embedder; use crate::{ @@ -188,7 +188,7 @@ impl<'a> Search<'a> { } if let Some(distinct) = &self.distinct { - let filterable_fields = ctx.index.filterable_fields(ctx.txn)?; + let filterable_fields = ctx.index.filterable_attributes_rules(ctx.txn)?; // check if the distinct field is in the filterable fields if !is_field_filterable(distinct, &filterable_fields) { // if not, remove the hidden fields from the filterable fields to generate the error message diff --git a/crates/milli/src/search/new/tests/cutoff.rs b/crates/milli/src/search/new/tests/cutoff.rs index 99cb8f1d3..f2dfb45d6 100644 --- a/crates/milli/src/search/new/tests/cutoff.rs +++ b/crates/milli/src/search/new/tests/cutoff.rs @@ -9,7 +9,7 @@ use meili_snap::snapshot; use crate::index::tests::TempIndex; use crate::score_details::{ScoreDetails, ScoringStrategy}; -use crate::{Criterion, Filter, FilterableAttributesSettings, Search, TimeBudget}; +use crate::{Criterion, Filter, FilterableAttributesRule, Search, TimeBudget}; fn create_index() -> TempIndex { let index = TempIndex::new(); @@ -18,7 +18,7 @@ fn create_index() -> TempIndex { .update_settings(|s| { s.set_primary_key("id".to_owned()); s.set_searchable_fields(vec!["text".to_owned()]); - s.set_filterable_fields(vec![FilterableAttributesSettings::Field("id".to_owned())]); + s.set_filterable_fields(vec![FilterableAttributesRule::Field("id".to_owned())]); s.set_criteria(vec![Criterion::Words, Criterion::Typo]); }) .unwrap(); diff --git a/crates/milli/src/search/new/tests/distinct.rs b/crates/milli/src/search/new/tests/distinct.rs index 492f3961d..d3c453957 100644 --- a/crates/milli/src/search/new/tests/distinct.rs +++ b/crates/milli/src/search/new/tests/distinct.rs @@ -20,7 +20,7 @@ use maplit::hashset; use super::collect_field_values; use crate::index::tests::TempIndex; use crate::{ - AscDesc, Criterion, FilterableAttributesSettings, Index, Member, Search, SearchResult, + AscDesc, Criterion, FilterableAttributesRule, Index, Member, Search, SearchResult, TermsMatchingStrategy, }; @@ -239,7 +239,7 @@ fn test_distinct_placeholder_no_ranking_rules() { // Set the letter as filterable and unset the distinct attribute. index .update_settings(|s| { - s.set_filterable_fields(vec![FilterableAttributesSettings::Field("letter".to_owned())]); + s.set_filterable_fields(vec![FilterableAttributesRule::Field("letter".to_owned())]); s.reset_distinct_field(); }) .unwrap(); diff --git a/crates/milli/src/search/new/tests/integration.rs b/crates/milli/src/search/new/tests/integration.rs index cbed3b79f..1b6e88d47 100644 --- a/crates/milli/src/search/new/tests/integration.rs +++ b/crates/milli/src/search/new/tests/integration.rs @@ -9,7 +9,7 @@ use crate::progress::Progress; use crate::update::new::indexer; use crate::update::{IndexDocumentsMethod, IndexerConfig, Settings}; use crate::vector::EmbeddingConfigs; -use crate::{db_snap, Criterion, FilterableAttributesSettings, Index}; +use crate::{db_snap, Criterion, FilterableAttributesRule, Index}; pub const CONTENT: &str = include_str!("../../../../tests/assets/test_set.ndjson"); use crate::constants::RESERVED_GEO_FIELD_NAME; @@ -26,12 +26,12 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index { builder.set_criteria(criteria.to_vec()); builder.set_filterable_fields(vec![ - FilterableAttributesSettings::Field(S("tag")), - FilterableAttributesSettings::Field(S("asc_desc_rank")), - FilterableAttributesSettings::Field(S(RESERVED_GEO_FIELD_NAME)), - FilterableAttributesSettings::Field(S("opt1")), - FilterableAttributesSettings::Field(S("opt1.opt2")), - FilterableAttributesSettings::Field(S("tag_in")), + FilterableAttributesRule::Field(S("tag")), + FilterableAttributesRule::Field(S("asc_desc_rank")), + FilterableAttributesRule::Field(S(RESERVED_GEO_FIELD_NAME)), + FilterableAttributesRule::Field(S("opt1")), + FilterableAttributesRule::Field(S("opt1.opt2")), + FilterableAttributesRule::Field(S("tag_in")), ]); builder.set_sortable_fields(hashset! { S("tag"), diff --git a/crates/milli/src/snapshot_tests.rs b/crates/milli/src/snapshot_tests.rs index 6635ab2f4..3e58c44d9 100644 --- a/crates/milli/src/snapshot_tests.rs +++ b/crates/milli/src/snapshot_tests.rs @@ -386,7 +386,7 @@ pub fn snap_settings(index: &Index) -> String { write_setting_to_snap!(criteria); write_setting_to_snap!(displayed_fields); write_setting_to_snap!(distinct_field); - write_setting_to_snap!(filterable_fields); + write_setting_to_snap!(filterable_attributes_rules); write_setting_to_snap!(sortable_fields); write_setting_to_snap!(synonyms); write_setting_to_snap!(authorize_typos); diff --git a/crates/milli/src/update/facet/bulk.rs b/crates/milli/src/update/facet/bulk.rs index dfbdeea77..6e0892b97 100644 --- a/crates/milli/src/update/facet/bulk.rs +++ b/crates/milli/src/update/facet/bulk.rs @@ -374,7 +374,7 @@ mod tests { use crate::heed_codec::StrRefCodec; use crate::index::tests::TempIndex; use crate::update::facet::test_helpers::{ordered_string, FacetIndex}; - use crate::{db_snap, milli_snap, FilterableAttributesSettings}; + use crate::{db_snap, milli_snap, FilterableAttributesRule}; #[test] fn insert() { @@ -474,7 +474,7 @@ mod tests { index .update_settings(|settings| { settings.set_primary_key("id".to_owned()); - settings.set_filterable_fields(vec![FilterableAttributesSettings::Field( + settings.set_filterable_fields(vec![FilterableAttributesRule::Field( "id".to_string(), )]); }) diff --git a/crates/milli/src/update/index_documents/mod.rs b/crates/milli/src/update/index_documents/mod.rs index df66961e2..b645ca0bd 100644 --- a/crates/milli/src/update/index_documents/mod.rs +++ b/crates/milli/src/update/index_documents/mod.rs @@ -765,14 +765,14 @@ mod tests { use super::*; use crate::constants::RESERVED_GEO_FIELD_NAME; use crate::documents::mmap_from_objects; - use crate::filterable_fields::filtered_matching_field_names; + use crate::filterable_attributes_rules::filtered_matching_field_names; use crate::index::tests::TempIndex; use crate::index::IndexEmbeddingConfig; use crate::progress::Progress; use crate::search::TermsMatchingStrategy; use crate::update::new::indexer; use crate::update::Setting; - use crate::{db_snap, Filter, FilterableAttributesSettings, Search, UserError}; + use crate::{db_snap, Filter, FilterableAttributesRule, Search, UserError}; #[test] fn simple_document_replacement() { @@ -1002,7 +1002,7 @@ mod tests { index .update_settings(|settings| { - settings.set_filterable_fields(vec![FilterableAttributesSettings::Field( + settings.set_filterable_fields(vec![FilterableAttributesRule::Field( RESERVED_GEO_FIELD_NAME.to_string(), )]); }) @@ -1016,7 +1016,7 @@ mod tests { index .update_settings(|settings| { - settings.set_filterable_fields(vec![FilterableAttributesSettings::Field( + settings.set_filterable_fields(vec![FilterableAttributesRule::Field( RESERVED_GEO_FIELD_NAME.to_string(), )]); }) @@ -1235,9 +1235,9 @@ mod tests { settings.set_searchable_fields(searchable_fields); let faceted_fields = vec![ - FilterableAttributesSettings::Field("title".to_string()), - FilterableAttributesSettings::Field("nested.object".to_string()), - FilterableAttributesSettings::Field("nested.machin".to_string()), + FilterableAttributesRule::Field("title".to_string()), + FilterableAttributesRule::Field("nested.object".to_string()), + FilterableAttributesRule::Field("nested.machin".to_string()), ]; settings.set_filterable_fields(faceted_fields); }) @@ -1245,7 +1245,7 @@ mod tests { let rtxn = index.read_txn().unwrap(); - let filterable_fields = index.filterable_fields(&rtxn).unwrap(); + let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap(); let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let facets = filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| { @@ -1447,7 +1447,7 @@ mod tests { index .update_settings(|settings| { - settings.set_filterable_fields(vec![FilterableAttributesSettings::Field( + settings.set_filterable_fields(vec![FilterableAttributesRule::Field( "dog".to_string(), )]); }) @@ -1468,7 +1468,7 @@ mod tests { let rtxn = index.read_txn().unwrap(); - let filterable_fields = index.filterable_fields(&rtxn).unwrap(); + let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap(); let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let facets = filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| { @@ -1496,7 +1496,7 @@ mod tests { let rtxn = index.read_txn().unwrap(); - let filterable_fields = index.filterable_fields(&rtxn).unwrap(); + let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap(); let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let facets = filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| { @@ -1527,7 +1527,7 @@ mod tests { let rtxn = index.read_txn().unwrap(); - let filterable_fields = index.filterable_fields(&rtxn).unwrap(); + let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap(); let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let facets = filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| { @@ -1743,7 +1743,7 @@ mod tests { let check_ok = |index: &Index| { let rtxn = index.read_txn().unwrap(); - let filterable_fields = index.filterable_fields(&rtxn).unwrap(); + let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap(); let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let facets = filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| { @@ -1769,7 +1769,7 @@ mod tests { assert_eq!(bitmap_colour_blue.into_iter().collect::>(), vec![7]); }; - let faceted_fields = vec![FilterableAttributesSettings::Field("colour".to_string())]; + let faceted_fields = vec![FilterableAttributesRule::Field("colour".to_string())]; let index = TempIndex::new(); index.add_documents(content()).unwrap(); @@ -1854,7 +1854,7 @@ mod tests { let check_ok = |index: &Index| { let rtxn = index.read_txn().unwrap(); - let filterable_fields = index.filterable_fields(&rtxn).unwrap(); + let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap(); let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let facets = filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| { @@ -1880,7 +1880,7 @@ mod tests { assert_eq!(bitmap_colour_blue.into_iter().collect::>(), vec![3]); }; - let faceted_fields = vec![FilterableAttributesSettings::Field("colour".to_string())]; + let faceted_fields = vec![FilterableAttributesRule::Field("colour".to_string())]; let index = TempIndex::new(); index.add_documents(content()).unwrap(); @@ -1923,7 +1923,7 @@ mod tests { let check_ok = |index: &Index| { let rtxn = index.read_txn().unwrap(); - let filterable_fields = index.filterable_fields(&rtxn).unwrap(); + let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap(); let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let facets = filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| { @@ -1948,7 +1948,7 @@ mod tests { assert_eq!(bitmap_tags_blue.into_iter().collect::>(), vec![12]); }; - let faceted_fields = vec![FilterableAttributesSettings::Field("tags".to_string())]; + let faceted_fields = vec![FilterableAttributesRule::Field("tags".to_string())]; let index = TempIndex::new(); index.add_documents(content()).unwrap(); @@ -2132,7 +2132,7 @@ mod tests { index .update_settings(|settings| { - settings.set_filterable_fields(vec![FilterableAttributesSettings::Field( + settings.set_filterable_fields(vec![FilterableAttributesRule::Field( "title".to_string(), )]); }) @@ -2990,8 +2990,8 @@ mod tests { .update_settings_using_wtxn(&mut wtxn, |settings| { settings.set_primary_key(S("docid")); settings.set_filterable_fields(vec![ - FilterableAttributesSettings::Field("label".to_string()), - FilterableAttributesSettings::Field("label2".to_string()), + FilterableAttributesRule::Field("label".to_string()), + FilterableAttributesRule::Field("label2".to_string()), ]); }) .unwrap(); @@ -3171,7 +3171,7 @@ mod tests { index .update_settings_using_wtxn(&mut wtxn, |settings| { settings.set_primary_key(S("id")); - settings.set_filterable_fields(vec![FilterableAttributesSettings::Field( + settings.set_filterable_fields(vec![FilterableAttributesRule::Field( RESERVED_GEO_FIELD_NAME.to_string(), )]); settings.set_sortable_fields(hashset!(S(RESERVED_GEO_FIELD_NAME))); diff --git a/crates/milli/src/update/new/extract/faceted/extract_facets.rs b/crates/milli/src/update/new/extract/faceted/extract_facets.rs index 2c74238de..4433c6e75 100644 --- a/crates/milli/src/update/new/extract/faceted/extract_facets.rs +++ b/crates/milli/src/update/new/extract/faceted/extract_facets.rs @@ -24,7 +24,7 @@ use crate::update::new::thread_local::{FullySend, ThreadLocal}; use crate::update::new::DocumentChange; use crate::update::GrenadParameters; use crate::{ - DocumentId, FieldId, FilterableAttributesFeatures, FilterableAttributesSettings, Result, + DocumentId, FieldId, FilterableAttributesFeatures, FilterableAttributesRule, Result, MAX_FACET_VALUE_LENGTH, }; @@ -32,7 +32,7 @@ pub struct FacetedExtractorData<'a, 'b> { sender: &'a FieldIdDocidFacetSender<'a, 'b>, grenad_parameters: &'a GrenadParameters, buckets: usize, - filterable_attributes: Vec, + filterable_attributes: Vec, sortable_fields: HashSet, } @@ -71,7 +71,7 @@ pub struct FacetedDocidsExtractor; impl FacetedDocidsExtractor { fn extract_document_change( context: &DocumentChangeContext>, - filterable_attributes: &[FilterableAttributesSettings], + filterable_attributes: &[FilterableAttributesRule], sortable_fields: &HashSet, document_change: DocumentChange, sender: &FieldIdDocidFacetSender, @@ -424,7 +424,7 @@ impl FacetedDocidsExtractor { { let index = indexing_context.index; let rtxn = index.read_txn()?; - let filterable_attributes = index.filterable_fields(&rtxn)?; + let filterable_attributes = index.filterable_attributes_rules(&rtxn)?; let sortable_fields = index.sortable_fields(&rtxn)?; let datastore = ThreadLocal::new(); diff --git a/crates/milli/src/update/new/extract/geo/mod.rs b/crates/milli/src/update/new/extract/geo/mod.rs index 0d64e4b23..1cc78aab3 100644 --- a/crates/milli/src/update/new/extract/geo/mod.rs +++ b/crates/milli/src/update/new/extract/geo/mod.rs @@ -29,7 +29,7 @@ impl GeoExtractor { grenad_parameters: GrenadParameters, ) -> Result> { if index.is_geo_activated(rtxn)? { - Ok(Some(GeoExtractor { grenad_parameters })) + Ok(Some(GeoExtractor {filterable_attributes_ruless })) } else { Ok(None) } diff --git a/crates/milli/src/update/new/facet_search_builder.rs b/crates/milli/src/update/new/facet_search_builder.rs index 84de420f7..6e9ffa1ed 100644 --- a/crates/milli/src/update/new/facet_search_builder.rs +++ b/crates/milli/src/update/new/facet_search_builder.rs @@ -14,8 +14,9 @@ use crate::heed_codec::facet::FacetGroupKey; use crate::update::del_add::{DelAdd, KvWriterDelAdd}; use crate::update::{create_sorter, MergeDeladdBtreesetString}; use crate::{ - BEU16StrCodec, FieldId, GlobalFieldsIdsMap, Index, LocalizedAttributesRule, Result, - MAX_FACET_VALUE_LENGTH, + BEU16StrCodec, FieldId, FieldIdMapMissingEntry, FilterableAttributesFeatures, + FilterableAttributesRule, GlobalFieldsIdsMap, Index, InternalError, LocalizedAttributesRule, + Result, MAX_FACET_VALUE_LENGTH, }; pub struct FacetSearchBuilder<'indexer> { @@ -23,6 +24,7 @@ pub struct FacetSearchBuilder<'indexer> { normalized_facet_string_docids_sorter: Sorter, global_fields_ids_map: GlobalFieldsIdsMap<'indexer>, localized_attributes_rules: Vec, + filterable_attributes_rules: Vec, // Buffered data below buffer: Vec, localized_field_ids: HashMap>>, @@ -32,6 +34,7 @@ impl<'indexer> FacetSearchBuilder<'indexer> { pub fn new( global_fields_ids_map: GlobalFieldsIdsMap<'indexer>, localized_attributes_rules: Vec, + filterable_attributes_rules: Vec, ) -> Self { let registered_facets = HashMap::new(); let normalized_facet_string_docids_sorter = create_sorter( @@ -50,6 +53,7 @@ impl<'indexer> FacetSearchBuilder<'indexer> { buffer: Vec::new(), global_fields_ids_map, localized_attributes_rules, + filterable_attributes_rules, localized_field_ids: HashMap::new(), } } @@ -61,6 +65,13 @@ impl<'indexer> FacetSearchBuilder<'indexer> { ) -> Result<()> { let FacetGroupKey { field_id, level: _level, left_bound } = facet_key; + let filterable_attributes_features = self.filterable_attributes_features(field_id)?; + + // if facet search is disabled, we don't need to register the facet + if !filterable_attributes_features.is_facet_searchable() { + return Ok(()); + }; + if deladd == DelAdd::Addition { self.registered_facets.entry(field_id).and_modify(|count| *count += 1).or_insert(1); } @@ -84,6 +95,24 @@ impl<'indexer> FacetSearchBuilder<'indexer> { Ok(()) } + fn filterable_attributes_features( + &mut self, + field_id: u16, + ) -> Result { + let Some(filterable_attributes_features) = + self.global_fields_ids_map.metadata(field_id).map(|metadata| { + metadata.filterable_attributes_features(&self.filterable_attributes_rules) + }) + else { + return Err(InternalError::FieldIdMapMissingEntry(FieldIdMapMissingEntry::FieldId { + field_id, + process: "facet_search_builder::register_from_key", + }) + .into()); + }; + Ok(filterable_attributes_features) + } + fn locales(&mut self, field_id: FieldId) -> Option<&[Language]> { if let Entry::Vacant(e) = self.localized_field_ids.entry(field_id) { let Some(field_name) = self.global_fields_ids_map.name(field_id) else { diff --git a/crates/milli/src/update/new/indexer/post_processing.rs b/crates/milli/src/update/new/indexer/post_processing.rs index 201ab9ec9..edb372063 100644 --- a/crates/milli/src/update/new/indexer/post_processing.rs +++ b/crates/milli/src/update/new/indexer/post_processing.rs @@ -117,9 +117,11 @@ fn compute_facet_search_database( ) -> Result<()> { let rtxn = index.read_txn()?; let localized_attributes_rules = index.localized_attributes_rules(&rtxn)?; + let filterable_attributes_rules = index.filterable_attributes_rules(&rtxn)?; let mut facet_search_builder = FacetSearchBuilder::new( global_fields_ids_map, localized_attributes_rules.unwrap_or_default(), + filterable_attributes_rules, ); let previous_facet_id_string_docids = index @@ -165,7 +167,13 @@ fn compute_facet_level_database( wtxn: &mut RwTxn, mut facet_field_ids_delta: FacetFieldIdsDelta, ) -> Result<()> { + let facet_leveled_field_ids = index.facet_leveled_field_ids(&*wtxn)?; for (fid, delta) in facet_field_ids_delta.consume_facet_string_delta() { + // skip field ids that should not be facet leveled + if !facet_leveled_field_ids.contains(&fid) { + continue; + } + let span = tracing::trace_span!(target: "indexing::facet_field_ids", "string"); let _entered = span.enter(); match delta {