From 0200c65ebf24924c6077e52b994c51f8cf1691fd Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Mon, 3 Mar 2025 10:22:02 +0100 Subject: [PATCH 01/35] Change the filterableAttributes setting API **Changes:** The filterableAttributes type has been changed from a `BTreeSet` to a `Vec`, Which is a list of rules defining patterns to match the documents' fields and a set of feature to apply on the matching fields. The rule order given by the user is now an important information, the features applied on a filterable field will be chosen based on the rule order as we do for the LocalizedAttributesRules. This means that the list will not be reordered anymore and will keep the user defined order, moreover, if there are any duplicates, they will not be de-duplicated anymore. **Impact:** - Settings API - the database format of the filterable attributes changed - may impact the LocalizedAttributesRules due to the AttributePatterns factorization - OpenAPI generator --- crates/meilisearch-types/src/locales.rs | 4 +- crates/meilisearch-types/src/settings.rs | 8 +- .../src/routes/indexes/settings.rs | 2 +- .../src/routes/indexes/settings_analytics.rs | 15 +- crates/meilisearch/src/routes/mod.rs | 6 +- crates/milli/src/attribute_patterns.rs | 128 +++++++++++ .../milli/src/filterable_attributes_rules.rs | 204 ++++++++++++++++++ crates/milli/src/index.rs | 35 ++- crates/milli/src/lib.rs | 9 +- .../milli/src/localized_attributes_rules.rs | 54 +---- 10 files changed, 386 insertions(+), 79 deletions(-) create mode 100644 crates/milli/src/attribute_patterns.rs create mode 100644 crates/milli/src/filterable_attributes_rules.rs diff --git a/crates/meilisearch-types/src/locales.rs b/crates/meilisearch-types/src/locales.rs index 945c38cc3..b3fb90493 100644 --- a/crates/meilisearch-types/src/locales.rs +++ b/crates/meilisearch-types/src/locales.rs @@ -1,5 +1,5 @@ use deserr::Deserr; -use milli::LocalizedAttributesRule; +use milli::{AttributePatterns, LocalizedAttributesRule}; use serde::{Deserialize, Serialize}; use utoipa::ToSchema; @@ -7,7 +7,7 @@ use utoipa::ToSchema; #[deserr(rename_all = camelCase)] #[serde(rename_all = "camelCase")] pub struct LocalizedAttributesRuleView { - pub attribute_patterns: Vec, + pub attribute_patterns: AttributePatterns, pub locales: Vec, } diff --git a/crates/meilisearch-types/src/settings.rs b/crates/meilisearch-types/src/settings.rs index e501d7359..7b5807d06 100644 --- a/crates/meilisearch-types/src/settings.rs +++ b/crates/meilisearch-types/src/settings.rs @@ -11,7 +11,7 @@ use fst::IntoStreamer; use milli::index::{IndexEmbeddingConfig, PrefixSearch}; use milli::proximity::ProximityPrecision; use milli::update::Setting; -use milli::{Criterion, CriterionError, Index, DEFAULT_VALUES_PER_FACET}; +use milli::{Criterion, CriterionError, FilterableAttributesRule, Index, DEFAULT_VALUES_PER_FACET}; use serde::{Deserialize, Serialize, Serializer}; use utoipa::ToSchema; @@ -202,8 +202,8 @@ pub struct Settings { /// Attributes to use for faceting and filtering. See [Filtering and Faceted Search](https://www.meilisearch.com/docs/learn/filtering_and_sorting/search_with_facet_filters). #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] - #[schema(value_type = Option>, example = json!(["release_date", "genre"]))] - pub filterable_attributes: Setting>, + #[schema(value_type = Option>, example = json!(["release_date", "genre"]))] + pub filterable_attributes: Setting>, /// Attributes to use when sorting search results. #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] @@ -791,7 +791,7 @@ pub fn settings( .user_defined_searchable_fields(rtxn)? .map(|fields| fields.into_iter().map(String::from).collect()); - let filterable_attributes = index.filterable_fields(rtxn)?.into_iter().collect(); + let filterable_attributes = index.filterable_attributes_rules(rtxn)?.into_iter().collect(); let sortable_attributes = index.sortable_fields(rtxn)?.into_iter().collect(); diff --git a/crates/meilisearch/src/routes/indexes/settings.rs b/crates/meilisearch/src/routes/indexes/settings.rs index ad76b3f42..6ecc77ec3 100644 --- a/crates/meilisearch/src/routes/indexes/settings.rs +++ b/crates/meilisearch/src/routes/indexes/settings.rs @@ -291,7 +291,7 @@ make_setting_routes!( { route: "/filterable-attributes", update_verb: put, - value_type: std::collections::BTreeSet, + value_type: Vec, err_type: meilisearch_types::deserr::DeserrJsonError< meilisearch_types::error::deserr_codes::InvalidSettingsFilterableAttributes, >, diff --git a/crates/meilisearch/src/routes/indexes/settings_analytics.rs b/crates/meilisearch/src/routes/indexes/settings_analytics.rs index ffeadcab6..627f9103e 100644 --- a/crates/meilisearch/src/routes/indexes/settings_analytics.rs +++ b/crates/meilisearch/src/routes/indexes/settings_analytics.rs @@ -8,6 +8,7 @@ use std::collections::{BTreeMap, BTreeSet, HashSet}; use meilisearch_types::facet_values_sort::FacetValuesSort; use meilisearch_types::locales::{Locale, LocalizedAttributesRuleView}; use meilisearch_types::milli::update::Setting; +use meilisearch_types::milli::FilterableAttributesRule; use meilisearch_types::settings::{ FacetingSettings, PaginationSettings, PrefixSearchSettings, ProximityPrecisionView, RankingRuleView, SettingEmbeddingSettings, TypoSettings, @@ -89,6 +90,10 @@ impl Aggregate for SettingsAnalytics { filterable_attributes: FilterableAttributesAnalytics { total: new.filterable_attributes.total.or(self.filterable_attributes.total), has_geo: new.filterable_attributes.has_geo.or(self.filterable_attributes.has_geo), + has_patterns: new + .filterable_attributes + .has_patterns + .or(self.filterable_attributes.has_patterns), }, distinct_attribute: DistinctAttributeAnalytics { set: self.distinct_attribute.set | new.distinct_attribute.set, @@ -328,13 +333,19 @@ impl SortableAttributesAnalytics { pub struct FilterableAttributesAnalytics { pub total: Option, pub has_geo: Option, + pub has_patterns: Option, } impl FilterableAttributesAnalytics { - pub fn new(setting: Option<&BTreeSet>) -> Self { + pub fn new(setting: Option<&Vec>) -> Self { Self { total: setting.as_ref().map(|filter| filter.len()), - has_geo: setting.as_ref().map(|filter| filter.contains("_geo")), + has_geo: setting + .as_ref() + .map(|filter| filter.iter().any(FilterableAttributesRule::has_geo)), + has_patterns: setting.as_ref().map(|filter| { + filter.iter().any(|rule| matches!(rule, FilterableAttributesRule::Pattern(_))) + }), } } diff --git a/crates/meilisearch/src/routes/mod.rs b/crates/meilisearch/src/routes/mod.rs index 02cb4130a..cc9aeb7d2 100644 --- a/crates/meilisearch/src/routes/mod.rs +++ b/crates/meilisearch/src/routes/mod.rs @@ -9,6 +9,10 @@ use meilisearch_types::batches::BatchStats; use meilisearch_types::error::{Code, ErrorType, ResponseError}; use meilisearch_types::index_uid::IndexUid; use meilisearch_types::keys::CreateApiKey; +use meilisearch_types::milli::{ + AttributePatterns, FilterFeatures, FilterableAttributesFeatures, FilterableAttributesPatterns, + FilterableAttributesRule, +}; use meilisearch_types::settings::{ Checked, FacetingSettings, MinWordSizeTyposSetting, PaginationSettings, Settings, TypoSettings, Unchecked, @@ -88,7 +92,7 @@ pub mod tasks; url = "/", description = "Local server", )), - components(schemas(PaginationView, PaginationView, IndexView, DocumentDeletionByFilter, AllBatches, BatchStats, ProgressStepView, ProgressView, BatchView, RuntimeTogglableFeatures, SwapIndexesPayload, DocumentEditionByFunction, MergeFacets, FederationOptions, SearchQueryWithIndex, Federation, FederatedSearch, FederatedSearchResult, SearchResults, SearchResultWithIndex, SimilarQuery, SimilarResult, PaginationView, BrowseQuery, UpdateIndexRequest, IndexUid, IndexCreateRequest, KeyView, Action, CreateApiKey, UpdateStderrLogs, LogMode, GetLogs, IndexStats, Stats, HealthStatus, HealthResponse, VersionResponse, Code, ErrorType, AllTasks, TaskView, Status, DetailsView, ResponseError, Settings, Settings, TypoSettings, MinWordSizeTyposSetting, FacetingSettings, PaginationSettings, SummarizedTaskView, Kind, Network, Remote)) + components(schemas(PaginationView, PaginationView, IndexView, DocumentDeletionByFilter, AllBatches, BatchStats, ProgressStepView, ProgressView, BatchView, RuntimeTogglableFeatures, SwapIndexesPayload, DocumentEditionByFunction, MergeFacets, FederationOptions, SearchQueryWithIndex, Federation, FederatedSearch, FederatedSearchResult, SearchResults, SearchResultWithIndex, SimilarQuery, SimilarResult, PaginationView, BrowseQuery, UpdateIndexRequest, IndexUid, IndexCreateRequest, KeyView, Action, CreateApiKey, UpdateStderrLogs, LogMode, GetLogs, IndexStats, Stats, HealthStatus, HealthResponse, VersionResponse, Code, ErrorType, AllTasks, TaskView, Status, DetailsView, ResponseError, Settings, Settings, TypoSettings, MinWordSizeTyposSetting, FacetingSettings, PaginationSettings, SummarizedTaskView, Kind, Network, Remote, FilterableAttributesRule, FilterableAttributesPatterns, AttributePatterns, FilterableAttributesFeatures, FilterFeatures)) )] pub struct MeilisearchApi; diff --git a/crates/milli/src/attribute_patterns.rs b/crates/milli/src/attribute_patterns.rs new file mode 100644 index 000000000..baf239c3f --- /dev/null +++ b/crates/milli/src/attribute_patterns.rs @@ -0,0 +1,128 @@ +use deserr::Deserr; +use serde::{Deserialize, Serialize}; +use utoipa::ToSchema; + +use crate::is_faceted_by; + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, ToSchema)] +#[repr(transparent)] +#[serde(transparent)] +pub struct AttributePatterns { + #[schema(value_type = Vec)] + pub patterns: Vec, +} + +impl Deserr for AttributePatterns { + fn deserialize_from_value( + value: deserr::Value, + location: deserr::ValuePointerRef, + ) -> Result { + Vec::::deserialize_from_value(value, location).map(|patterns| Self { patterns }) + } +} + +impl From> for AttributePatterns { + fn from(patterns: Vec) -> Self { + Self { patterns } + } +} + +impl AttributePatterns { + pub fn match_str(&self, str: &str) -> PatternMatch { + let mut pattern_match = PatternMatch::NoMatch; + for pattern in &self.patterns { + match match_pattern(pattern, str) { + PatternMatch::Match => return PatternMatch::Match, + PatternMatch::Parent => pattern_match = PatternMatch::Parent, + PatternMatch::NoMatch => {} + } + } + pattern_match + } +} + +fn match_pattern(pattern: &str, str: &str) -> PatternMatch { + if pattern == "*" { + return PatternMatch::Match; + } else if pattern.starts_with('*') && pattern.ends_with('*') { + if str.contains(&pattern[1..pattern.len() - 1]) { + return PatternMatch::Match; + } + } else if let Some(pattern) = pattern.strip_prefix('*') { + if str.ends_with(pattern) { + return PatternMatch::Match; + } + } else if let Some(pattern) = pattern.strip_suffix('*') { + if str.starts_with(pattern) { + return PatternMatch::Match; + } + } else if pattern == str { + return PatternMatch::Match; + } + + // If the field is a parent field of the pattern, return Parent + if is_faceted_by(pattern, str) { + PatternMatch::Parent + } else { + PatternMatch::NoMatch + } +} + +pub fn match_field_legacy(pattern: &str, field: &str) -> PatternMatch { + if is_faceted_by(field, pattern) { + // If the field matches the pattern or is a nested field of the pattern, return Match (legacy behavior) + PatternMatch::Match + } else if is_faceted_by(pattern, field) { + // If the field is a parent field of the pattern, return Parent + PatternMatch::Parent + } else { + // If the field does not match the pattern and is not a parent of a nested field that matches the pattern, return NoMatch + PatternMatch::NoMatch + } +} + +/// Match a field against a distinct field. +pub fn match_distinct_field(distinct_field: Option<&str>, field: &str) -> PatternMatch { + if let Some(distinct_field) = distinct_field { + if field == distinct_field { + // If the field matches exactly the distinct field, return Match + return PatternMatch::Match; + } else if is_faceted_by(distinct_field, field) { + // If the field is a parent field of the distinct field, return Parent + return PatternMatch::Parent; + } + } + // If the field does not match the distinct field and is not a parent of a nested field that matches the distinct field, return NoMatch + PatternMatch::NoMatch +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum PatternMatch { + /// The field is a parent of the of a nested field that matches the pattern + Parent, + /// The field matches the pattern + Match, + /// The field does not match the pattern + NoMatch, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_match_pattern() { + assert_eq!(match_pattern("*", "test"), PatternMatch::Match); + assert_eq!(match_pattern("test*", "test"), PatternMatch::Match); + assert_eq!(match_pattern("test*", "testa"), PatternMatch::Match); + assert_eq!(match_pattern("*test", "test"), PatternMatch::Match); + assert_eq!(match_pattern("*test", "atest"), PatternMatch::Match); + assert_eq!(match_pattern("*test*", "test"), PatternMatch::Match); + assert_eq!(match_pattern("*test*", "atesta"), PatternMatch::Match); + assert_eq!(match_pattern("*test*", "atest"), PatternMatch::Match); + assert_eq!(match_pattern("*test*", "testa"), PatternMatch::Match); + assert_eq!(match_pattern("test*test", "test"), PatternMatch::NoMatch); + assert_eq!(match_pattern("*test", "testa"), PatternMatch::NoMatch); + assert_eq!(match_pattern("test*", "atest"), PatternMatch::NoMatch); + } +} diff --git a/crates/milli/src/filterable_attributes_rules.rs b/crates/milli/src/filterable_attributes_rules.rs new file mode 100644 index 000000000..fe603c1c2 --- /dev/null +++ b/crates/milli/src/filterable_attributes_rules.rs @@ -0,0 +1,204 @@ +use deserr::{DeserializeError, Deserr, ValuePointerRef}; +use serde::{Deserialize, Serialize}; +use std::collections::{BTreeSet, HashSet}; +use utoipa::ToSchema; + +use crate::{ + attribute_patterns::{match_distinct_field, match_field_legacy, PatternMatch}, + constants::RESERVED_GEO_FIELD_NAME, + AttributePatterns, FieldsIdsMap, +}; + +#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug, ToSchema)] +#[serde(untagged)] +pub enum FilterableAttributesRule { + Field(String), + Pattern(FilterableAttributesPatterns), +} + +impl FilterableAttributesRule { + pub fn match_str(&self, field: &str) -> PatternMatch { + match self { + FilterableAttributesRule::Field(pattern) => match_field_legacy(pattern, field), + FilterableAttributesRule::Pattern(patterns) => patterns.match_str(field), + } + } + + pub fn has_geo(&self) -> bool { + matches!(self, FilterableAttributesRule::Field(field_name) if field_name == RESERVED_GEO_FIELD_NAME) + } + + pub fn features(&self) -> FilterableAttributesFeatures { + match self { + FilterableAttributesRule::Field(_) => FilterableAttributesFeatures::legacy_default(), + FilterableAttributesRule::Pattern(patterns) => patterns.features(), + } + } +} + +#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug, Deserr, ToSchema)] +#[serde(deny_unknown_fields, rename_all = "camelCase")] +#[deserr(rename_all = camelCase, deny_unknown_fields)] +pub struct FilterableAttributesPatterns { + pub patterns: AttributePatterns, + #[serde(default)] + #[deserr(default)] + pub features: FilterableAttributesFeatures, +} + +impl FilterableAttributesPatterns { + pub fn match_str(&self, field: &str) -> PatternMatch { + self.patterns.match_str(field) + } + + pub fn features(&self) -> FilterableAttributesFeatures { + self.features.clone() + } +} + +#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug, Deserr, ToSchema)] +#[serde(deny_unknown_fields, rename_all = "camelCase")] +#[deserr(rename_all = camelCase, deny_unknown_fields)] +#[derive(Default)] +pub struct FilterableAttributesFeatures { + facet_search: bool, + filter: FilterFeatures, +} + +impl FilterableAttributesFeatures { + pub fn legacy_default() -> Self { + Self { facet_search: true, filter: FilterFeatures::legacy_default() } + } + + pub fn no_features() -> Self { + Self { facet_search: false, filter: FilterFeatures::no_features() } + } + + pub fn is_filterable(&self) -> bool { + self.filter.is_filterable() + } + + /// Check if `IS EMPTY` is allowed + pub fn is_filterable_empty(&self) -> bool { + self.filter.is_filterable_empty() + } + + /// Check if `=` and `IN` are allowed + pub fn is_filterable_equality(&self) -> bool { + self.filter.is_filterable_equality() + } + + /// Check if `IS NULL` is allowed + pub fn is_filterable_null(&self) -> bool { + self.filter.is_filterable_null() + } + + /// Check if `IS EXISTS` is allowed + pub fn is_filterable_exists(&self) -> bool { + self.filter.is_filterable_exists() + } + + /// Check if `<`, `>`, `<=`, `>=` or `TO` are allowed + pub fn is_filterable_comparison(&self) -> bool { + self.filter.is_filterable_comparison() + } + + /// Check if the facet search is allowed + pub fn is_facet_searchable(&self) -> bool { + self.facet_search + } + + pub fn allowed_filter_operators(&self) -> Vec { + self.filter.allowed_operators() + } +} + +impl Deserr for FilterableAttributesRule { + fn deserialize_from_value( + value: deserr::Value, + location: ValuePointerRef, + ) -> Result { + if value.kind() == deserr::ValueKind::Map { + Ok(Self::Pattern(FilterableAttributesPatterns::deserialize_from_value( + value, location, + )?)) + } else { + Ok(Self::Field(String::deserialize_from_value(value, location)?)) + } + } +} + +#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug, Deserr, ToSchema)] +pub struct FilterFeatures { + equality: bool, + comparison: bool, +} + +impl FilterFeatures { + pub fn allowed_operators(&self) -> Vec { + if !self.is_filterable() { + return vec![]; + } + + let mut operators = vec!["OR", "AND", "NOT"]; + if self.is_filterable_equality() { + operators.extend_from_slice(&["=", "!=", "IN"]); + } + if self.is_filterable_comparison() { + operators.extend_from_slice(&["<", ">", "<=", ">=", "TO"]); + } + if self.is_filterable_empty() { + operators.push("IS EMPTY"); + } + if self.is_filterable_null() { + operators.push("IS NULL"); + } + if self.is_filterable_exists() { + operators.push("EXISTS"); + } + + operators.into_iter().map(String::from).collect() + } + + pub fn is_filterable(&self) -> bool { + self.equality || self.comparison + } + + pub fn is_filterable_equality(&self) -> bool { + self.equality + } + + /// Check if `<`, `>`, `<=`, `>=` or `TO` are allowed + pub fn is_filterable_comparison(&self) -> bool { + self.comparison + } + + /// Check if `IS EMPTY` is allowed + pub fn is_filterable_empty(&self) -> bool { + self.is_filterable() + } + + /// Check if `IS EXISTS` is allowed + pub fn is_filterable_exists(&self) -> bool { + self.is_filterable() + } + + /// Check if `IS NULL` is allowed + pub fn is_filterable_null(&self) -> bool { + self.is_filterable() + } + + pub fn legacy_default() -> Self { + Self { equality: true, comparison: true } + } + + pub fn no_features() -> Self { + Self { equality: false, comparison: false } + } +} + +impl Default for FilterFeatures { + fn default() -> Self { + Self { equality: true, comparison: false } + } +} diff --git a/crates/milli/src/index.rs b/crates/milli/src/index.rs index c748324ae..d40ddb15d 100644 --- a/crates/milli/src/index.rs +++ b/crates/milli/src/index.rs @@ -876,11 +876,11 @@ impl Index { /* filterable fields */ - /// Writes the filterable fields names in the database. - pub(crate) fn put_filterable_fields( + /// Writes the filterable attributes rules in the database. + pub(crate) fn put_filterable_attributes_rules( &self, wtxn: &mut RwTxn<'_>, - fields: &HashSet, + #[allow(clippy::ptr_arg)] fields: &Vec, ) -> heed::Result<()> { self.main.remap_types::>().put( wtxn, @@ -889,13 +889,19 @@ impl Index { ) } - /// Deletes the filterable fields ids in the database. - pub(crate) fn delete_filterable_fields(&self, wtxn: &mut RwTxn<'_>) -> heed::Result { + /// Deletes the filterable attributes rules in the database. + pub(crate) fn delete_filterable_attributes_rules( + &self, + wtxn: &mut RwTxn<'_>, + ) -> heed::Result { self.main.remap_key_type::().delete(wtxn, main_key::FILTERABLE_FIELDS_KEY) } - /// Returns the filterable fields names. - pub fn filterable_fields(&self, rtxn: &RoTxn<'_>) -> heed::Result> { + /// Returns the filterable attributes rules. + pub fn filterable_attributes_rules( + &self, + rtxn: &RoTxn<'_>, + ) -> heed::Result> { Ok(self .main .remap_types::>() @@ -903,21 +909,6 @@ impl Index { .unwrap_or_default()) } - /// Identical to `filterable_fields`, but returns ids instead. - pub fn filterable_fields_ids(&self, rtxn: &RoTxn<'_>) -> Result> { - let fields = self.filterable_fields(rtxn)?; - let fields_ids_map = self.fields_ids_map(rtxn)?; - - let mut fields_ids = HashSet::new(); - for name in fields { - if let Some(field_id) = fields_ids_map.id(&name) { - fields_ids.insert(field_id); - } - } - - Ok(fields_ids) - } - /* sortable fields */ /// Writes the sortable fields names in the database. diff --git a/crates/milli/src/lib.rs b/crates/milli/src/lib.rs index 1d6d04fc7..85540c82e 100644 --- a/crates/milli/src/lib.rs +++ b/crates/milli/src/lib.rs @@ -9,12 +9,14 @@ pub static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc; pub mod documents; mod asc_desc; +mod attribute_patterns; mod criterion; pub mod database_stats; mod error; mod external_documents_ids; pub mod facet; mod fields_ids_map; +mod filterable_attributes_rules; pub mod heed_codec; pub mod index; mod localized_attributes_rules; @@ -52,6 +54,8 @@ pub use thread_pool_no_abort::{PanicCatched, ThreadPoolNoAbort, ThreadPoolNoAbor pub use {charabia as tokenizer, heed, rhai}; pub use self::asc_desc::{AscDesc, AscDescError, Member, SortError}; +pub use self::attribute_patterns::AttributePatterns; +pub use self::attribute_patterns::PatternMatch; pub use self::criterion::{default_criteria, Criterion, CriterionError}; pub use self::error::{ Error, FieldIdMapMissingEntry, InternalError, SerializationError, UserError, @@ -59,6 +63,10 @@ pub use self::error::{ pub use self::external_documents_ids::ExternalDocumentsIds; pub use self::fieldids_weights_map::FieldidsWeightsMap; pub use self::fields_ids_map::{FieldsIdsMap, GlobalFieldsIdsMap}; +pub use self::filterable_attributes_rules::{ + FilterFeatures, FilterableAttributesFeatures, FilterableAttributesPatterns, + FilterableAttributesRule, +}; pub use self::heed_codec::{ BEU16StrCodec, BEU32StrCodec, BoRoaringBitmapCodec, BoRoaringBitmapLenCodec, CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, FieldIdWordCountCodec, ObkvCodec, @@ -67,7 +75,6 @@ pub use self::heed_codec::{ }; pub use self::index::Index; pub use self::localized_attributes_rules::LocalizedAttributesRule; -use self::localized_attributes_rules::LocalizedFieldIds; pub use self::search::facet::{FacetValueHit, SearchForFacetValues}; pub use self::search::similar::Similar; pub use self::search::{ diff --git a/crates/milli/src/localized_attributes_rules.rs b/crates/milli/src/localized_attributes_rules.rs index 2b9bf099c..81015c458 100644 --- a/crates/milli/src/localized_attributes_rules.rs +++ b/crates/milli/src/localized_attributes_rules.rs @@ -4,8 +4,9 @@ use charabia::Language; use serde::{Deserialize, Serialize}; use utoipa::ToSchema; +use crate::attribute_patterns::PatternMatch; use crate::fields_ids_map::FieldsIdsMap; -use crate::FieldId; +use crate::{AttributePatterns, FieldId}; /// A rule that defines which locales are supported for a given attribute. /// @@ -17,18 +18,18 @@ use crate::FieldId; /// The pattern `*attribute_name*` matches any attribute name that contains `attribute_name`. #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, ToSchema)] pub struct LocalizedAttributesRule { - pub attribute_patterns: Vec, + pub attribute_patterns: AttributePatterns, #[schema(value_type = Vec)] pub locales: Vec, } impl LocalizedAttributesRule { pub fn new(attribute_patterns: Vec, locales: Vec) -> Self { - Self { attribute_patterns, locales } + Self { attribute_patterns: AttributePatterns::from(attribute_patterns), locales } } - pub fn match_str(&self, str: &str) -> bool { - self.attribute_patterns.iter().any(|pattern| match_pattern(pattern.as_str(), str)) + pub fn match_str(&self, str: &str) -> PatternMatch { + self.attribute_patterns.match_str(str) } pub fn locales(&self) -> &[Language] { @@ -36,20 +37,6 @@ impl LocalizedAttributesRule { } } -fn match_pattern(pattern: &str, str: &str) -> bool { - if pattern == "*" { - true - } else if pattern.starts_with('*') && pattern.ends_with('*') { - str.contains(&pattern[1..pattern.len() - 1]) - } else if let Some(pattern) = pattern.strip_prefix('*') { - str.ends_with(pattern) - } else if let Some(pattern) = pattern.strip_suffix('*') { - str.starts_with(pattern) - } else { - pattern == str - } -} - #[derive(Debug, Clone, PartialEq, Eq)] pub struct LocalizedFieldIds { field_id_to_locales: HashMap>, @@ -65,13 +52,13 @@ impl LocalizedFieldIds { if let Some(rules) = rules { let fields = fields_ids.filter_map(|field_id| { - fields_ids_map.name(field_id).map(|field_name| (field_id, field_name)) + fields_ids_map.name(field_id).map(|field_name: &str| (field_id, field_name)) }); for (field_id, field_name) in fields { let mut locales = Vec::new(); for rule in rules { - if rule.match_str(field_name) { + if rule.match_str(field_name) == PatternMatch::Match { locales.extend(rule.locales.iter()); // Take the first rule that matches break; @@ -89,10 +76,6 @@ impl LocalizedFieldIds { Self { field_id_to_locales } } - pub fn locales(&self, fields_id: FieldId) -> Option<&[Language]> { - self.field_id_to_locales.get(&fields_id).map(Vec::as_slice) - } - pub fn all_locales(&self) -> Vec { let mut locales = Vec::new(); for field_locales in self.field_id_to_locales.values() { @@ -108,24 +91,3 @@ impl LocalizedFieldIds { locales } } - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_match_pattern() { - assert!(match_pattern("*", "test")); - assert!(match_pattern("test*", "test")); - assert!(match_pattern("test*", "testa")); - assert!(match_pattern("*test", "test")); - assert!(match_pattern("*test", "atest")); - assert!(match_pattern("*test*", "test")); - assert!(match_pattern("*test*", "atesta")); - assert!(match_pattern("*test*", "atest")); - assert!(match_pattern("*test*", "testa")); - assert!(!match_pattern("test*test", "test")); - assert!(!match_pattern("*test", "testa")); - assert!(!match_pattern("test*", "atest")); - } -} From 967033579dd408c71b7056457336b324adea40ad Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Mon, 3 Mar 2025 10:25:32 +0100 Subject: [PATCH 02/35] Refactor search and facet-search **Changes:** The search filters are now using the FilterableAttributesFeatures from the FilterableAttributesRules to know if a field is filterable. Moreover, the FilterableAttributesFeatures is more precise and an error will be returned if an operator is used on a field that doesn't have the related feature. The facet-search is now checking if the feature is allowed in the FilterableAttributesFeatures and an error will be returned if the field doesn't have the related feature. **Impact:** - facet-search is now relying on AttributePatterns to match the locales - search using filters is now relying on FilterableAttributesFeatures - distinct attribute is now relying on FilterableAttributesRules --- crates/filter-parser/src/condition.rs | 19 ++ crates/meilisearch-types/src/error.rs | 1 + crates/meilisearch/src/search/mod.rs | 9 +- crates/milli/src/error.rs | 2 + .../milli/src/filterable_attributes_rules.rs | 104 +++++++ crates/milli/src/index.rs | 90 ++---- crates/milli/src/search/facet/filter.rs | 263 +++++++++++++----- crates/milli/src/search/facet/search.rs | 15 +- crates/milli/src/search/mod.rs | 16 +- 9 files changed, 365 insertions(+), 154 deletions(-) diff --git a/crates/filter-parser/src/condition.rs b/crates/filter-parser/src/condition.rs index 04b6dc266..0fc007bf1 100644 --- a/crates/filter-parser/src/condition.rs +++ b/crates/filter-parser/src/condition.rs @@ -30,6 +30,25 @@ pub enum Condition<'a> { StartsWith { keyword: Token<'a>, word: Token<'a> }, } +impl Condition<'_> { + pub fn operator(&self) -> &str { + match self { + Condition::GreaterThan(_) => ">", + Condition::GreaterThanOrEqual(_) => ">=", + Condition::Equal(_) => "=", + Condition::NotEqual(_) => "!=", + Condition::Null => "IS NULL", + Condition::Empty => "IS EMPTY", + Condition::Exists => "EXISTS", + Condition::LowerThan(_) => "<", + Condition::LowerThanOrEqual(_) => "<=", + Condition::Between { .. } => "TO", + Condition::Contains { .. } => "CONTAINS", + Condition::StartsWith { .. } => "STARTS WITH", + } + } +} + /// condition = value ("==" | ">" ...) value pub fn parse_condition(input: Span) -> IResult { let operator = alt((tag("<="), tag(">="), tag("!="), tag("<"), tag(">"), tag("="))); diff --git a/crates/meilisearch-types/src/error.rs b/crates/meilisearch-types/src/error.rs index f64301b8c..7db4f9d9a 100644 --- a/crates/meilisearch-types/src/error.rs +++ b/crates/meilisearch-types/src/error.rs @@ -414,6 +414,7 @@ impl ErrorCode for milli::Error { UserError::AttributeLimitReached => Code::MaxFieldsLimitExceeded, UserError::InvalidFilter(_) => Code::InvalidSearchFilter, UserError::InvalidFilterExpression(..) => Code::InvalidSearchFilter, + UserError::FilterOperatorNotAllowed { .. } => Code::InvalidSearchFilter, UserError::MissingDocumentId { .. } => Code::MissingDocumentId, UserError::InvalidDocumentId { .. } | UserError::TooManyDocumentIds { .. } => { Code::InvalidDocumentId diff --git a/crates/meilisearch/src/search/mod.rs b/crates/meilisearch/src/search/mod.rs index 2091047fc..a16f4eb6a 100644 --- a/crates/meilisearch/src/search/mod.rs +++ b/crates/meilisearch/src/search/mod.rs @@ -20,7 +20,7 @@ use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy}; use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors; use meilisearch_types::milli::vector::Embedder; use meilisearch_types::milli::{ - FacetValueHit, InternalError, OrderBy, SearchForFacetValues, TimeBudget, + FacetValueHit, InternalError, OrderBy, PatternMatch, SearchForFacetValues, TimeBudget, }; use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS; use meilisearch_types::{milli, Document}; @@ -1538,8 +1538,9 @@ pub fn perform_facet_search( // If the facet string is not localized, we **ignore** the locales provided by the user because the facet data has no locale. // If the user does not provide locales, we use the locales of the facet string. let localized_attributes = index.localized_attributes_rules(&rtxn)?.unwrap_or_default(); - let localized_attributes_locales = - localized_attributes.into_iter().find(|attr| attr.match_str(&facet_name)); + let localized_attributes_locales = localized_attributes + .into_iter() + .find(|attr| attr.match_str(&facet_name) == PatternMatch::Match); let locales = localized_attributes_locales.map(|attr| { attr.locales .into_iter() @@ -1885,7 +1886,7 @@ fn format_fields( let locales = locales.or_else(|| { localized_attributes .iter() - .find(|rule| rule.match_str(key)) + .find(|rule| rule.match_str(key) == PatternMatch::Match) .map(LocalizedAttributesRule::locales) }); diff --git a/crates/milli/src/error.rs b/crates/milli/src/error.rs index c8ed1912f..857a812cd 100644 --- a/crates/milli/src/error.rs +++ b/crates/milli/src/error.rs @@ -138,6 +138,8 @@ and can not be more than 511 bytes.", .document_id.to_string() InvalidFilter(String), #[error("Invalid type for filter subexpression: expected: {}, found: {}.", .0.join(", "), .1)] InvalidFilterExpression(&'static [&'static str], Value), + #[error("Filter operator `{operator}` is not allowed for the attribute `{field}`, allowed operators: {}.", allowed_operators.join(", "))] + FilterOperatorNotAllowed { field: String, allowed_operators: Vec, operator: String }, #[error("Attribute `{}` is not sortable. {}", .field, match .valid_fields.is_empty() { diff --git a/crates/milli/src/filterable_attributes_rules.rs b/crates/milli/src/filterable_attributes_rules.rs index fe603c1c2..0b7c9092b 100644 --- a/crates/milli/src/filterable_attributes_rules.rs +++ b/crates/milli/src/filterable_attributes_rules.rs @@ -202,3 +202,107 @@ impl Default for FilterFeatures { Self { equality: true, comparison: false } } } + +pub fn filtered_matching_field_names<'fim>( + filterable_attributes: &[FilterableAttributesRule], + fields_ids_map: &'fim FieldsIdsMap, + filter: &impl Fn(&FilterableAttributesFeatures) -> bool, +) -> BTreeSet<&'fim str> { + let mut result = BTreeSet::new(); + for (_, field_name) in fields_ids_map.iter() { + for filterable_attribute in filterable_attributes { + if filterable_attribute.match_str(field_name) == PatternMatch::Match { + let features = filterable_attribute.features(); + if filter(&features) { + result.insert(field_name); + } + } + } + } + result +} + +pub fn matching_features( + field_name: &str, + filterable_attributes: &[FilterableAttributesRule], +) -> Option { + for filterable_attribute in filterable_attributes { + if filterable_attribute.match_str(field_name) == PatternMatch::Match { + return Some(filterable_attribute.features()); + } + } + None +} + +pub fn is_field_filterable( + field_name: &str, + filterable_attributes: &[FilterableAttributesRule], +) -> bool { + matching_features(field_name, filterable_attributes) + .map_or(false, |features| features.is_filterable()) +} + +pub fn is_field_facet_searchable( + field_name: &str, + filterable_attributes: &[FilterableAttributesRule], +) -> bool { + matching_features(field_name, filterable_attributes) + .map_or(false, |features| features.is_facet_searchable()) +} + +/// Match a field against a set of filterable, facet searchable fields, distinct field, sortable fields, and asc_desc fields. +pub fn match_faceted_field( + field_name: &str, + filterable_fields: &[FilterableAttributesRule], + sortable_fields: &HashSet, + asc_desc_fields: &HashSet, + distinct_field: &Option, +) -> PatternMatch { + // Check if the field matches any filterable or facet searchable field + let mut selection = match_pattern_by_features(field_name, filterable_fields, &|features| { + features.is_facet_searchable() || features.is_filterable() + }); + + // If the field matches the pattern, return Match + if selection == PatternMatch::Match { + return selection; + } + + match match_distinct_field(distinct_field.as_deref(), field_name) { + PatternMatch::Match => return PatternMatch::Match, + PatternMatch::Parent => selection = PatternMatch::Parent, + PatternMatch::NoMatch => (), + } + + // Otherwise, check if the field matches any sortable/asc_desc field + for pattern in sortable_fields.iter().chain(asc_desc_fields.iter()) { + match match_field_legacy(pattern, field_name) { + PatternMatch::Match => return PatternMatch::Match, + PatternMatch::Parent => selection = PatternMatch::Parent, + PatternMatch::NoMatch => (), + } + } + + selection +} + +fn match_pattern_by_features( + field_name: &str, + filterable_attributes: &[FilterableAttributesRule], + filter: &impl Fn(&FilterableAttributesFeatures) -> bool, +) -> PatternMatch { + let mut selection = PatternMatch::NoMatch; + // Check if the field name matches any pattern that is facet searchable or filterable + for pattern in filterable_attributes { + let features = pattern.features(); + if filter(&features) { + match pattern.match_str(field_name) { + PatternMatch::Match => return PatternMatch::Match, + PatternMatch::Parent => selection = PatternMatch::Parent, + PatternMatch::NoMatch => (), + } + } + } + + selection +} diff --git a/crates/milli/src/index.rs b/crates/milli/src/index.rs index d40ddb15d..186d55809 100644 --- a/crates/milli/src/index.rs +++ b/crates/milli/src/index.rs @@ -945,83 +945,37 @@ impl Index { Ok(fields.into_iter().filter_map(|name| fields_ids_map.id(&name)).collect()) } - /* faceted fields */ - - /// Writes the faceted fields in the database. - pub(crate) fn put_faceted_fields( - &self, - wtxn: &mut RwTxn<'_>, - fields: &HashSet, - ) -> heed::Result<()> { - self.main.remap_types::>().put( - wtxn, - main_key::HIDDEN_FACETED_FIELDS_KEY, - fields, - ) + /// Returns true if the geo feature is enabled. + pub fn is_geo_enabled(&self, rtxn: &RoTxn<'_>) -> Result { + let geo_filter = self.is_geo_filtering_enabled(rtxn)?; + let geo_sortable = self.is_geo_sorting_enabled(rtxn)?; + Ok(geo_filter || geo_sortable) } - /// Returns the faceted fields names. - pub fn faceted_fields(&self, rtxn: &RoTxn<'_>) -> heed::Result> { - Ok(self - .main - .remap_types::>() - .get(rtxn, main_key::HIDDEN_FACETED_FIELDS_KEY)? - .unwrap_or_default()) + /// Returns true if the geo sorting feature is enabled. + pub fn is_geo_sorting_enabled(&self, rtxn: &RoTxn<'_>) -> Result { + let geo_sortable = self.sortable_fields(rtxn)?.contains(RESERVED_GEO_FIELD_NAME); + Ok(geo_sortable) } - /// Identical to `faceted_fields`, but returns ids instead. - pub fn faceted_fields_ids(&self, rtxn: &RoTxn<'_>) -> Result> { - let fields = self.faceted_fields(rtxn)?; - let fields_ids_map = self.fields_ids_map(rtxn)?; - - let mut fields_ids = HashSet::new(); - for name in fields { - if let Some(field_id) = fields_ids_map.id(&name) { - fields_ids.insert(field_id); - } - } - - Ok(fields_ids) + /// Returns true if the geo filtering feature is enabled. + pub fn is_geo_filtering_enabled(&self, rtxn: &RoTxn<'_>) -> Result { + let geo_filter = + self.filterable_attributes_rules(rtxn)?.iter().any(|field| field.has_geo()); + Ok(geo_filter) } - /* faceted documents ids */ - - /// Returns the user defined faceted fields names. - /// - /// The user faceted fields are the union of all the filterable, sortable, distinct, and Asc/Desc fields. - pub fn user_defined_faceted_fields(&self, rtxn: &RoTxn<'_>) -> Result> { - let filterable_fields = self.filterable_fields(rtxn)?; - let sortable_fields = self.sortable_fields(rtxn)?; - let distinct_field = self.distinct_field(rtxn)?; - let asc_desc_fields = - self.criteria(rtxn)?.into_iter().filter_map(|criterion| match criterion { + pub fn asc_desc_fields(&self, rtxn: &RoTxn<'_>) -> Result> { + let asc_desc_fields = self + .criteria(rtxn)? + .into_iter() + .filter_map(|criterion| match criterion { Criterion::Asc(field) | Criterion::Desc(field) => Some(field), _otherwise => None, - }); + }) + .collect(); - let mut faceted_fields = filterable_fields; - faceted_fields.extend(sortable_fields); - faceted_fields.extend(asc_desc_fields); - if let Some(field) = distinct_field { - faceted_fields.insert(field.to_owned()); - } - - Ok(faceted_fields) - } - - /// Identical to `user_defined_faceted_fields`, but returns ids instead. - pub fn user_defined_faceted_fields_ids(&self, rtxn: &RoTxn<'_>) -> Result> { - let fields = self.user_defined_faceted_fields(rtxn)?; - let fields_ids_map = self.fields_ids_map(rtxn)?; - - let mut fields_ids = HashSet::new(); - for name in fields { - if let Some(field_id) = fields_ids_map.id(&name) { - fields_ids.insert(field_id); - } - } - - Ok(fields_ids) + Ok(asc_desc_fields) } /* faceted documents ids */ diff --git a/crates/milli/src/search/facet/filter.rs b/crates/milli/src/search/facet/filter.rs index 76f9ed6ff..fa3e4ea28 100644 --- a/crates/milli/src/search/facet/filter.rs +++ b/crates/milli/src/search/facet/filter.rs @@ -1,4 +1,4 @@ -use std::collections::HashSet; +use std::collections::BTreeSet; use std::fmt::{Debug, Display}; use std::ops::Bound::{self, Excluded, Included}; @@ -12,13 +12,16 @@ use serde_json::Value; use super::facet_range_search; use crate::constants::RESERVED_GEO_FIELD_NAME; use crate::error::{Error, UserError}; +use crate::filterable_attributes_rules::{ + filtered_matching_field_names, is_field_filterable, matching_features, +}; use crate::heed_codec::facet::{ FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, OrderedF64Codec, }; use crate::index::db_name::FACET_ID_STRING_DOCIDS; use crate::{ - distance_between_two_points, lat_lng_to_xyz, FieldId, Index, InternalError, Result, - SerializationError, + distance_between_two_points, lat_lng_to_xyz, FieldId, FilterableAttributesFeatures, + FilterableAttributesRule, Index, InternalError, Result, SerializationError, }; /// The maximum number of filters the filter AST can process. @@ -60,7 +63,7 @@ impl Display for BadGeoError { #[derive(Debug)] enum FilterError<'a> { - AttributeNotFilterable { attribute: &'a str, filterable_fields: HashSet }, + AttributeNotFilterable { attribute: &'a str, filterable_fields: BTreeSet<&'a str> }, ParseGeoError(BadGeoError), TooDeep, } @@ -230,17 +233,22 @@ impl<'a> Filter<'a> { impl<'a> Filter<'a> { pub fn evaluate(&self, rtxn: &heed::RoTxn<'_>, index: &Index) -> Result { // to avoid doing this for each recursive call we're going to do it ONCE ahead of time - let filterable_fields = index.filterable_fields(rtxn)?; + let filterable_attributes_rules = index.filterable_attributes_rules(rtxn)?; for fid in self.condition.fids(MAX_FILTER_DEPTH) { let attribute = fid.value(); - if !crate::is_faceted(attribute, &filterable_fields) { + if !is_field_filterable(attribute, &filterable_attributes_rules) { + let fields_ids_map = index.fields_ids_map(rtxn)?; return Err(fid.as_external_error(FilterError::AttributeNotFilterable { attribute, - filterable_fields, + filterable_fields: filtered_matching_field_names( + &filterable_attributes_rules, + &fields_ids_map, + &|features| features.is_filterable(), + ), }))?; } } - self.inner_evaluate(rtxn, index, &filterable_fields, None) + self.inner_evaluate(rtxn, index, &filterable_attributes_rules, None) } fn evaluate_operator( @@ -249,6 +257,7 @@ impl<'a> Filter<'a> { field_id: FieldId, universe: Option<&RoaringBitmap>, operator: &Condition<'a>, + features: &FilterableAttributesFeatures, ) -> Result { let numbers_db = index.facet_id_f64_docids; let strings_db = index.facet_id_string_docids; @@ -258,6 +267,28 @@ impl<'a> Filter<'a> { // field id and the level. let (left, right) = match operator { + // return an error if the filter is not allowed for this field + Condition::GreaterThan(_) + | Condition::GreaterThanOrEqual(_) + | Condition::LowerThan(_) + | Condition::LowerThanOrEqual(_) + | Condition::Between { .. } + if !features.is_filterable_comparison() => + { + return Err(generate_filter_error(rtxn, index, field_id, operator, features)); + } + Condition::Empty if !features.is_filterable_empty() => { + return Err(generate_filter_error(rtxn, index, field_id, operator, features)); + } + Condition::Null if !features.is_filterable_null() => { + return Err(generate_filter_error(rtxn, index, field_id, operator, features)); + } + Condition::Exists if !features.is_filterable_exists() => { + return Err(generate_filter_error(rtxn, index, field_id, operator, features)); + } + Condition::Equal(_) | Condition::NotEqual(_) if !features.is_filterable_equality() => { + return Err(generate_filter_error(rtxn, index, field_id, operator, features)); + } Condition::GreaterThan(val) => { (Excluded(val.parse_finite_float()?), Included(f64::MAX)) } @@ -307,7 +338,8 @@ impl<'a> Filter<'a> { } Condition::NotEqual(val) => { let operator = Condition::Equal(val.clone()); - let docids = Self::evaluate_operator(rtxn, index, field_id, None, &operator)?; + let docids = + Self::evaluate_operator(rtxn, index, field_id, None, &operator, features)?; let all_ids = index.documents_ids(rtxn)?; return Ok(all_ids - docids); } @@ -409,7 +441,7 @@ impl<'a> Filter<'a> { &self, rtxn: &heed::RoTxn<'_>, index: &Index, - filterable_fields: &HashSet, + filterable_fields: &[FilterableAttributesRule], universe: Option<&RoaringBitmap>, ) -> Result { if universe.map_or(false, |u| u.is_empty()) { @@ -434,36 +466,56 @@ impl<'a> Filter<'a> { } } FilterCondition::In { fid, els } => { - if crate::is_faceted(fid.value(), filterable_fields) { - let field_ids_map = index.fields_ids_map(rtxn)?; - if let Some(fid) = field_ids_map.id(fid.value()) { - els.iter() - .map(|el| Condition::Equal(el.clone())) - .map(|op| Self::evaluate_operator(rtxn, index, fid, universe, &op)) - .union() - } else { - Ok(RoaringBitmap::new()) + match matching_features(fid.value(), filterable_fields) { + Some(features) if features.is_filterable() => { + let field_ids_map = index.fields_ids_map(rtxn)?; + if let Some(fid) = field_ids_map.id(fid.value()) { + els.iter() + .map(|el| Condition::Equal(el.clone())) + .map(|op| { + Self::evaluate_operator( + rtxn, index, fid, universe, &op, &features, + ) + }) + .union() + } else { + Ok(RoaringBitmap::new()) + } + } + _ => { + let field_ids_map = index.fields_ids_map(rtxn)?; + Err(fid.as_external_error(FilterError::AttributeNotFilterable { + attribute: fid.value(), + filterable_fields: filtered_matching_field_names( + filterable_fields, + &field_ids_map, + &|features| features.is_filterable(), + ), + }))? } - } else { - Err(fid.as_external_error(FilterError::AttributeNotFilterable { - attribute: fid.value(), - filterable_fields: filterable_fields.clone(), - }))? } } FilterCondition::Condition { fid, op } => { - if crate::is_faceted(fid.value(), filterable_fields) { - let field_ids_map = index.fields_ids_map(rtxn)?; - if let Some(fid) = field_ids_map.id(fid.value()) { - Self::evaluate_operator(rtxn, index, fid, universe, op) - } else { - Ok(RoaringBitmap::new()) + match matching_features(fid.value(), filterable_fields) { + Some(features) if features.is_filterable() => { + let field_ids_map = index.fields_ids_map(rtxn)?; + if let Some(fid) = field_ids_map.id(fid.value()) { + Self::evaluate_operator(rtxn, index, fid, universe, op, &features) + } else { + Ok(RoaringBitmap::new()) + } + } + _ => { + let field_ids_map = index.fields_ids_map(rtxn)?; + Err(fid.as_external_error(FilterError::AttributeNotFilterable { + attribute: fid.value(), + filterable_fields: filtered_matching_field_names( + filterable_fields, + &field_ids_map, + &|features| features.is_filterable(), + ), + }))? } - } else { - Err(fid.as_external_error(FilterError::AttributeNotFilterable { - attribute: fid.value(), - filterable_fields: filterable_fields.clone(), - }))? } } FilterCondition::Or(subfilters) => subfilters @@ -502,7 +554,7 @@ impl<'a> Filter<'a> { } } FilterCondition::GeoLowerThan { point, radius } => { - if filterable_fields.contains(RESERVED_GEO_FIELD_NAME) { + if index.is_geo_filtering_enabled(rtxn)? { let base_point: [f64; 2] = [point[0].parse_finite_float()?, point[1].parse_finite_float()?]; if !(-90.0..=90.0).contains(&base_point[0]) { @@ -530,14 +582,19 @@ impl<'a> Filter<'a> { Ok(result) } else { + let field_ids_map = index.fields_ids_map(rtxn)?; Err(point[0].as_external_error(FilterError::AttributeNotFilterable { attribute: RESERVED_GEO_FIELD_NAME, - filterable_fields: filterable_fields.clone(), + filterable_fields: filtered_matching_field_names( + filterable_fields, + &field_ids_map, + &|features| features.is_filterable(), + ), }))? } } FilterCondition::GeoBoundingBox { top_right_point, bottom_left_point } => { - if filterable_fields.contains(RESERVED_GEO_FIELD_NAME) { + if index.is_geo_filtering_enabled(rtxn)? { let top_right: [f64; 2] = [ top_right_point[0].parse_finite_float()?, top_right_point[1].parse_finite_float()?, @@ -662,10 +719,15 @@ impl<'a> Filter<'a> { Ok(selected_lat & selected_lng) } else { + let field_ids_map = index.fields_ids_map(rtxn)?; Err(top_right_point[0].as_external_error( FilterError::AttributeNotFilterable { attribute: RESERVED_GEO_FIELD_NAME, - filterable_fields: filterable_fields.clone(), + filterable_fields: filtered_matching_field_names( + filterable_fields, + &field_ids_map, + &|features| features.is_filterable(), + ), }, ))? } @@ -674,6 +736,26 @@ impl<'a> Filter<'a> { } } +fn generate_filter_error( + rtxn: &heed::RoTxn<'_>, + index: &Index, + field_id: FieldId, + operator: &Condition<'_>, + features: &FilterableAttributesFeatures, +) -> Error { + match index.fields_ids_map(rtxn) { + Ok(fields_ids_map) => { + let field = fields_ids_map.name(field_id).unwrap_or_default(); + Error::UserError(UserError::FilterOperatorNotAllowed { + field: field.to_string(), + allowed_operators: features.allowed_filter_operators(), + operator: operator.operator().to_string(), + }) + } + Err(e) => e.into(), + } +} + impl<'a> From> for Filter<'a> { fn from(fc: FilterCondition<'a>) -> Self { Self { condition: fc } @@ -687,12 +769,12 @@ mod tests { use big_s::S; use either::Either; - use maplit::hashset; + use meili_snap::snapshot; use roaring::RoaringBitmap; use crate::constants::RESERVED_GEO_FIELD_NAME; use crate::index::tests::TempIndex; - use crate::Filter; + use crate::{Filter, FilterableAttributesRule}; #[test] fn empty_db() { @@ -700,7 +782,9 @@ mod tests { //Set the filterable fields to be the channel. index .update_settings(|settings| { - settings.set_filterable_fields(hashset! { S("PrIcE") }); + settings.set_filterable_fields(vec![FilterableAttributesRule::Field( + "PrIcE".to_string(), + )]); }) .unwrap(); @@ -784,27 +868,32 @@ mod tests { let rtxn = index.read_txn().unwrap(); let filter = Filter::from_str("_geoRadius(42, 150, 10)").unwrap().unwrap(); let error = filter.evaluate(&rtxn, &index).unwrap_err(); - assert!(error.to_string().starts_with( - "Attribute `_geo` is not filterable. This index does not have configured filterable attributes." - )); + snapshot!(error.to_string(), @r###" + Attribute `_geo` is not filterable. This index does not have configured filterable attributes. + 12:14 _geoRadius(42, 150, 10) + "###); let filter = Filter::from_str("_geoBoundingBox([42, 150], [30, 10])").unwrap().unwrap(); let error = filter.evaluate(&rtxn, &index).unwrap_err(); - assert!(error.to_string().starts_with( - "Attribute `_geo` is not filterable. This index does not have configured filterable attributes." - )); + snapshot!(error.to_string(), @r###" + Attribute `_geo` is not filterable. This index does not have configured filterable attributes. + 18:20 _geoBoundingBox([42, 150], [30, 10]) + "###); let filter = Filter::from_str("dog = \"bernese mountain\"").unwrap().unwrap(); let error = filter.evaluate(&rtxn, &index).unwrap_err(); - assert!(error.to_string().starts_with( - "Attribute `dog` is not filterable. This index does not have configured filterable attributes." - )); + snapshot!(error.to_string(), @r###" + Attribute `dog` is not filterable. This index does not have configured filterable attributes. + 1:4 dog = "bernese mountain" + "###); drop(rtxn); index .update_settings(|settings| { settings.set_searchable_fields(vec![S("title")]); - settings.set_filterable_fields(hashset! { S("title") }); + settings.set_filterable_fields(vec![FilterableAttributesRule::Field( + "title".to_string(), + )]); }) .unwrap(); @@ -812,39 +901,45 @@ mod tests { let filter = Filter::from_str("_geoRadius(-100, 150, 10)").unwrap().unwrap(); let error = filter.evaluate(&rtxn, &index).unwrap_err(); - assert!(error.to_string().starts_with( - "Attribute `_geo` is not filterable. Available filterable attributes are: `title`." - )); + snapshot!(error.to_string(), @r###" + Attribute `_geo` is not filterable. This index does not have configured filterable attributes. + 12:16 _geoRadius(-100, 150, 10) + "###); let filter = Filter::from_str("_geoBoundingBox([42, 150], [30, 10])").unwrap().unwrap(); let error = filter.evaluate(&rtxn, &index).unwrap_err(); - assert!(error.to_string().starts_with( - "Attribute `_geo` is not filterable. Available filterable attributes are: `title`." - )); + snapshot!(error.to_string(), @r###" + Attribute `_geo` is not filterable. This index does not have configured filterable attributes. + 18:20 _geoBoundingBox([42, 150], [30, 10]) + "###); let filter = Filter::from_str("name = 12").unwrap().unwrap(); let error = filter.evaluate(&rtxn, &index).unwrap_err(); - assert!(error.to_string().starts_with( - "Attribute `name` is not filterable. Available filterable attributes are: `title`." - )); + snapshot!(error.to_string(), @r###" + Attribute `name` is not filterable. This index does not have configured filterable attributes. + 1:5 name = 12 + "###); let filter = Filter::from_str("title = \"test\" AND name = 12").unwrap().unwrap(); let error = filter.evaluate(&rtxn, &index).unwrap_err(); - assert!(error.to_string().starts_with( - "Attribute `name` is not filterable. Available filterable attributes are: `title`." - )); + snapshot!(error.to_string(), @r###" + Attribute `name` is not filterable. This index does not have configured filterable attributes. + 20:24 title = "test" AND name = 12 + "###); let filter = Filter::from_str("title = \"test\" AND name IN [12]").unwrap().unwrap(); let error = filter.evaluate(&rtxn, &index).unwrap_err(); - assert!(error.to_string().starts_with( - "Attribute `name` is not filterable. Available filterable attributes are: `title`." - )); + snapshot!(error.to_string(), @r###" + Attribute `name` is not filterable. This index does not have configured filterable attributes. + 20:24 title = "test" AND name IN [12] + "###); let filter = Filter::from_str("title = \"test\" AND name != 12").unwrap().unwrap(); let error = filter.evaluate(&rtxn, &index).unwrap_err(); - assert!(error.to_string().starts_with( - "Attribute `name` is not filterable. Available filterable attributes are: `title`." - )); + snapshot!(error.to_string(), @r###" + Attribute `name` is not filterable. This index does not have configured filterable attributes. + 20:24 title = "test" AND name != 12 + "###); } #[test] @@ -870,7 +965,9 @@ mod tests { index .update_settings(|settings| { - settings.set_filterable_fields(hashset!(S("monitor_diagonal"))); + settings.set_filterable_fields(vec![FilterableAttributesRule::Field( + "monitor_diagonal".to_string(), + )]); }) .unwrap(); @@ -901,7 +998,9 @@ mod tests { index .update_settings(|settings| { - settings.set_filterable_fields(hashset! { S(RESERVED_GEO_FIELD_NAME) }); + settings.set_filterable_fields(vec![FilterableAttributesRule::Field(S( + RESERVED_GEO_FIELD_NAME, + ))]); }) .unwrap(); @@ -948,7 +1047,10 @@ mod tests { index .update_settings(|settings| { settings.set_searchable_fields(vec![S(RESERVED_GEO_FIELD_NAME), S("price")]); // to keep the fields order - settings.set_filterable_fields(hashset! { S(RESERVED_GEO_FIELD_NAME), S("price") }); + settings.set_filterable_fields(vec![ + FilterableAttributesRule::Field(S(RESERVED_GEO_FIELD_NAME)), + FilterableAttributesRule::Field("price".to_string()), + ]); }) .unwrap(); @@ -998,7 +1100,10 @@ mod tests { index .update_settings(|settings| { settings.set_searchable_fields(vec![S(RESERVED_GEO_FIELD_NAME), S("price")]); // to keep the fields order - settings.set_filterable_fields(hashset! { S(RESERVED_GEO_FIELD_NAME), S("price") }); + settings.set_filterable_fields(vec![ + FilterableAttributesRule::Field(S(RESERVED_GEO_FIELD_NAME)), + FilterableAttributesRule::Field("price".to_string()), + ]); }) .unwrap(); @@ -1108,7 +1213,9 @@ mod tests { index .update_settings(|settings| { settings.set_searchable_fields(vec![S("price")]); // to keep the fields order - settings.set_filterable_fields(hashset! { S("price") }); + settings.set_filterable_fields(vec![FilterableAttributesRule::Field( + "price".to_string(), + )]); }) .unwrap(); index @@ -1164,7 +1271,11 @@ mod tests { index .update_settings(|settings| { settings.set_primary_key("id".to_owned()); - settings.set_filterable_fields(hashset! { S("id"), S("one"), S("two") }); + settings.set_filterable_fields(vec![ + FilterableAttributesRule::Field("id".to_string()), + FilterableAttributesRule::Field("one".to_string()), + FilterableAttributesRule::Field("two".to_string()), + ]); }) .unwrap(); diff --git a/crates/milli/src/search/facet/search.rs b/crates/milli/src/search/facet/search.rs index cdba7ee16..a11e5cd49 100644 --- a/crates/milli/src/search/facet/search.rs +++ b/crates/milli/src/search/facet/search.rs @@ -10,6 +10,9 @@ use roaring::RoaringBitmap; use tracing::error; use crate::error::UserError; +use crate::filterable_attributes_rules::{ + filtered_matching_field_names, is_field_facet_searchable, +}; use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue}; use crate::search::build_dfa; use crate::{DocumentId, FieldId, OrderBy, Result, Search}; @@ -73,10 +76,16 @@ impl<'a> SearchForFacetValues<'a> { let index = self.search_query.index; let rtxn = self.search_query.rtxn; - let filterable_fields = index.filterable_fields(rtxn)?; - if !filterable_fields.contains(&self.facet) { + let filterable_attributes_rules = index.filterable_attributes_rules(rtxn)?; + if !is_field_facet_searchable(&self.facet, &filterable_attributes_rules) { + let fields_ids_map = index.fields_ids_map(rtxn)?; + let matching_field_names = filtered_matching_field_names( + &filterable_attributes_rules, + &fields_ids_map, + &|features| features.is_facet_searchable(), + ); let (valid_fields, hidden_fields) = - index.remove_hidden_fields(rtxn, filterable_fields)?; + index.remove_hidden_fields(rtxn, matching_field_names)?; return Err(UserError::InvalidFacetSearchFacetName { field: self.facet.clone(), diff --git a/crates/milli/src/search/mod.rs b/crates/milli/src/search/mod.rs index d5b05f515..15f3b1b4a 100644 --- a/crates/milli/src/search/mod.rs +++ b/crates/milli/src/search/mod.rs @@ -9,6 +9,7 @@ use roaring::bitmap::RoaringBitmap; pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET}; pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords}; use self::new::{execute_vector_search, PartialSearchResult}; +use crate::filterable_attributes_rules::{filtered_matching_field_names, is_field_filterable}; use crate::score_details::{ScoreDetails, ScoringStrategy}; use crate::vector::Embedder; use crate::{ @@ -187,10 +188,19 @@ impl<'a> Search<'a> { } if let Some(distinct) = &self.distinct { - let filterable_fields = ctx.index.filterable_fields(ctx.txn)?; - if !crate::is_faceted(distinct, &filterable_fields) { + let filterable_fields = ctx.index.filterable_attributes_rules(ctx.txn)?; + // check if the distinct field is in the filterable fields + if !is_field_filterable(distinct, &filterable_fields) { + // if not, remove the hidden fields from the filterable fields to generate the error message + let fields_ids_map = ctx.index.fields_ids_map(ctx.txn)?; + let matching_field_names = filtered_matching_field_names( + &filterable_fields, + &fields_ids_map, + &|features| features.is_filterable(), + ); let (valid_fields, hidden_fields) = - ctx.index.remove_hidden_fields(ctx.txn, filterable_fields)?; + ctx.index.remove_hidden_fields(ctx.txn, matching_field_names)?; + // and return the error return Err(Error::UserError(UserError::InvalidDistinctAttribute { field: distinct.clone(), valid_fields, From 4f7ece24118bba005d9717067cbd289c1a45a565 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Mon, 3 Mar 2025 10:29:33 +0100 Subject: [PATCH 03/35] Refactor the FieldIdMapWithMetadata **Changes:** The FieldIdMapWithMetadata structure now stores more information about fields. The metadata_for_field function computes all the needed information relying on the user provided data instead of the enriched data (searchable/sortable) which may solve an indexing bug on sortable attributes that was not matching the nested fields. The FieldIdMapWithMetadata structure was duplicated in the embeddings as FieldsIdsMapWithMetadata, so the FieldsIdsMapWithMetadata has been removed in favor of FieldIdMapWithMetadata. The Facet distribution is now relying on the FieldIdMapWithMetadata with metadata to match is a field can be faceted. **Impact:** - searchable attributes matching - searchable attributes weight computation - sortable attributes matching - faceted fields matching - prompt computing - facet distribution --- crates/milli/src/fields_ids_map/global.rs | 5 + crates/milli/src/fields_ids_map/metadata.rs | 196 ++++++++++++++++-- crates/milli/src/index.rs | 34 +-- crates/milli/src/prompt/fields.rs | 21 +- crates/milli/src/prompt/mod.rs | 43 +--- .../src/search/facet/facet_distribution.rs | 125 ++++++----- 6 files changed, 281 insertions(+), 143 deletions(-) diff --git a/crates/milli/src/fields_ids_map/global.rs b/crates/milli/src/fields_ids_map/global.rs index 2ffc45eb7..e5f1212df 100644 --- a/crates/milli/src/fields_ids_map/global.rs +++ b/crates/milli/src/fields_ids_map/global.rs @@ -105,6 +105,11 @@ impl<'indexing> GlobalFieldsIdsMap<'indexing> { self.local.name(id) } + + /// Get the metadata of a field based on its id. + pub fn metadata(&self, id: FieldId) -> Option { + self.local.metadata(id).or_else(|| self.global.read().unwrap().metadata(id)) + } } impl<'indexing> MutFieldIdMapper for GlobalFieldsIdsMap<'indexing> { diff --git a/crates/milli/src/fields_ids_map/metadata.rs b/crates/milli/src/fields_ids_map/metadata.rs index 65a1111fa..fd333c3c6 100644 --- a/crates/milli/src/fields_ids_map/metadata.rs +++ b/crates/milli/src/fields_ids_map/metadata.rs @@ -5,14 +5,29 @@ use charabia::Language; use heed::RoTxn; use super::FieldsIdsMap; -use crate::{FieldId, Index, LocalizedAttributesRule, Result}; +use crate::attribute_patterns::{match_field_legacy, PatternMatch}; +use crate::constants::{RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME}; +use crate::{ + is_faceted_by, FieldId, FilterableAttributesFeatures, FilterableAttributesRule, Index, + LocalizedAttributesRule, Result, Weight, +}; #[derive(Debug, Clone, Copy)] pub struct Metadata { - pub searchable: bool, - pub filterable: bool, + /// The weight as defined in the FieldidsWeightsMap of the searchable attribute if it is searchable. + pub searchable: Option, + /// The field is part of the sortable attributes. pub sortable: bool, - localized_attributes_rule_id: Option, + /// The field is defined as the distinct attribute. + pub distinct: bool, + /// The field has been defined as asc/desc in the ranking rules. + pub asc_desc: bool, + /// The field is a geo field. + pub geo: bool, + /// The id of the localized attributes rule if the field is localized. + pub localized_attributes_rule_id: Option, + /// The id of the filterable attributes rule if the field is filterable. + pub filterable_attributes_rule_id: Option, } #[derive(Debug, Clone)] @@ -106,76 +121,215 @@ impl Metadata { let rule = rules.get((localized_attributes_rule_id - 1) as usize).unwrap(); Some(rule.locales()) } + + pub fn filterable_attributes<'rules>( + &self, + rules: &'rules [FilterableAttributesRule], + ) -> Option<&'rules FilterableAttributesRule> { + let filterable_attributes_rule_id = self.filterable_attributes_rule_id?.get(); + // - 1: `filterable_attributes_rule_id` is NonZero + let rule = rules.get((filterable_attributes_rule_id - 1) as usize).unwrap(); + Some(rule) + } + + pub fn filterable_attributes_features( + &self, + rules: &[FilterableAttributesRule], + ) -> FilterableAttributesFeatures { + self.filterable_attributes(rules) + .map(|rule| rule.features()) + // if there is no filterable attributes rule, return no features + .unwrap_or_else(FilterableAttributesFeatures::no_features) + } + + pub fn is_sortable(&self) -> bool { + self.sortable + } + + pub fn is_searchable(&self) -> bool { + self.searchable.is_some() + } + + pub fn searchable_weight(&self) -> Option { + self.searchable + } + + pub fn is_distinct(&self) -> bool { + self.distinct + } + + pub fn is_asc_desc(&self) -> bool { + self.asc_desc + } + + pub fn is_geo(&self) -> bool { + self.geo + } + + /// Returns `true` if the field is part of the facet databases. (sortable, distinct, asc_desc, filterable or facet searchable) + pub fn is_faceted(&self, rules: &[FilterableAttributesRule]) -> bool { + if self.is_distinct() || self.is_sortable() || self.is_asc_desc() { + return true; + } + + let features = self.filterable_attributes_features(rules); + if features.is_filterable() || features.is_facet_searchable() { + return true; + } + + false + } + + pub fn require_facet_level_database(&self, rules: &[FilterableAttributesRule]) -> bool { + let features = self.filterable_attributes_features(rules); + + self.is_sortable() || self.is_asc_desc() || features.is_filterable_comparison() + } } #[derive(Debug, Clone)] pub struct MetadataBuilder { - searchable_attributes: Vec, - filterable_attributes: HashSet, + searchable_attributes: Option>, + filterable_attributes: Vec, sortable_attributes: HashSet, localized_attributes: Option>, + distinct_attribute: Option, + asc_desc_attributes: HashSet, } impl MetadataBuilder { pub fn from_index(index: &Index, rtxn: &RoTxn) -> Result { - let searchable_attributes = - index.searchable_fields(rtxn)?.into_iter().map(|s| s.to_string()).collect(); - let filterable_attributes = index.filterable_fields(rtxn)?; + let searchable_attributes = match index.user_defined_searchable_fields(rtxn)? { + Some(fields) if fields.contains(&"*") => None, + None => None, + Some(fields) => Some(fields.into_iter().map(|s| s.to_string()).collect()), + }; + let filterable_attributes = index.filterable_attributes_rules(rtxn)?; let sortable_attributes = index.sortable_fields(rtxn)?; let localized_attributes = index.localized_attributes_rules(rtxn)?; + let distinct_attribute = index.distinct_field(rtxn)?.map(|s| s.to_string()); + let asc_desc_attributes = index.asc_desc_fields(rtxn)?; Ok(Self { searchable_attributes, filterable_attributes, sortable_attributes, localized_attributes, + distinct_attribute, + asc_desc_attributes, }) } + #[cfg(test)] + /// Build a new `MetadataBuilder` from the given parameters. + /// + /// This is used for testing, prefer using `MetadataBuilder::from_index` instead. pub fn new( - searchable_attributes: Vec, - filterable_attributes: HashSet, + searchable_attributes: Option>, + filterable_attributes: Vec, sortable_attributes: HashSet, localized_attributes: Option>, + distinct_attribute: Option, + asc_desc_attributes: HashSet, ) -> Self { + let searchable_attributes = match searchable_attributes { + Some(fields) if fields.iter().any(|f| f == "*") => None, + None => None, + Some(fields) => Some(fields), + }; + Self { searchable_attributes, filterable_attributes, sortable_attributes, localized_attributes, + distinct_attribute, + asc_desc_attributes, } } pub fn metadata_for_field(&self, field: &str) -> Metadata { - let searchable = self - .searchable_attributes + if is_faceted_by(field, RESERVED_VECTORS_FIELD_NAME) { + // Vectors fields are not searchable, filterable, distinct or asc_desc + return Metadata { + searchable: None, + sortable: false, + distinct: false, + asc_desc: false, + geo: false, + localized_attributes_rule_id: None, + filterable_attributes_rule_id: None, + }; + } + + // A field is sortable if it is faceted by a sortable attribute + let sortable = self + .sortable_attributes .iter() - .any(|attribute| attribute == "*" || attribute == field); + .any(|pattern| match_field_legacy(pattern, field) == PatternMatch::Match); - let filterable = self.filterable_attributes.contains(field); + let filterable_attributes_rule_id = self + .filterable_attributes + .iter() + .position(|attribute| attribute.match_str(field) == PatternMatch::Match) + // saturating_add(1): make `id` `NonZero` + .map(|id| NonZeroU16::new(id.saturating_add(1).try_into().unwrap()).unwrap()); - let sortable = self.sortable_attributes.contains(field); + if match_field_legacy(RESERVED_GEO_FIELD_NAME, field) == PatternMatch::Match { + // Geo fields are not searchable, distinct or asc_desc + return Metadata { + searchable: None, + sortable, + distinct: false, + asc_desc: false, + geo: true, + localized_attributes_rule_id: None, + filterable_attributes_rule_id, + }; + } + + let searchable = match &self.searchable_attributes { + // A field is searchable if it is faceted by a searchable attribute + Some(attributes) => attributes + .iter() + .enumerate() + .find(|(_i, pattern)| is_faceted_by(field, pattern)) + .map(|(i, _)| i as u16), + None => Some(0), + }; + + let distinct = + self.distinct_attribute.as_ref().is_some_and(|distinct_field| field == distinct_field); + let asc_desc = self.asc_desc_attributes.contains(field); let localized_attributes_rule_id = self .localized_attributes .iter() .flat_map(|v| v.iter()) - .position(|rule| rule.match_str(field)) + .position(|rule| rule.match_str(field) == PatternMatch::Match) // saturating_add(1): make `id` `NonZero` .map(|id| NonZeroU16::new(id.saturating_add(1).try_into().unwrap()).unwrap()); - Metadata { searchable, filterable, sortable, localized_attributes_rule_id } + Metadata { + searchable, + sortable, + distinct, + asc_desc, + geo: false, + localized_attributes_rule_id, + filterable_attributes_rule_id, + } } - pub fn searchable_attributes(&self) -> &[String] { - self.searchable_attributes.as_slice() + pub fn searchable_attributes(&self) -> Option<&[String]> { + self.searchable_attributes.as_deref() } pub fn sortable_attributes(&self) -> &HashSet { &self.sortable_attributes } - pub fn filterable_attributes(&self) -> &HashSet { + pub fn filterable_attributes(&self) -> &[FilterableAttributesRule] { &self.filterable_attributes } diff --git a/crates/milli/src/index.rs b/crates/milli/src/index.rs index 186d55809..75f4a8c17 100644 --- a/crates/milli/src/index.rs +++ b/crates/milli/src/index.rs @@ -1,6 +1,5 @@ use std::borrow::Cow; use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet}; -use std::convert::TryInto; use std::fs::File; use std::path::Path; @@ -10,10 +9,11 @@ use roaring::RoaringBitmap; use rstar::RTree; use serde::{Deserialize, Serialize}; -use crate::constants::{self, RESERVED_VECTORS_FIELD_NAME}; +use crate::constants::{self, RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME}; use crate::database_stats::DatabaseStats; use crate::documents::PrimaryKey; use crate::error::{InternalError, UserError}; +use crate::fields_ids_map::metadata::FieldIdMapWithMetadata; use crate::fields_ids_map::FieldsIdsMap; use crate::heed_codec::facet::{ FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, @@ -27,8 +27,9 @@ use crate::vector::{ArroyStats, ArroyWrapper, Embedding, EmbeddingConfig}; use crate::{ default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec, - FieldidsWeightsMap, GeoPoint, LocalizedAttributesRule, ObkvCodec, Result, RoaringBitmapCodec, - RoaringBitmapLenCodec, Search, U8StrStrCodec, Weight, BEU16, BEU32, BEU64, + FieldidsWeightsMap, FilterableAttributesRule, GeoPoint, LocalizedAttributesRule, ObkvCodec, + Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, Weight, BEU16, BEU32, + BEU64, }; pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5; @@ -738,8 +739,7 @@ impl Index { &self, wtxn: &mut RwTxn<'_>, user_fields: &[&str], - non_searchable_fields_ids: &[FieldId], - fields_ids_map: &FieldsIdsMap, + fields_ids_map: &FieldIdMapWithMetadata, ) -> Result<()> { // We can write the user defined searchable fields as-is. self.put_user_defined_searchable_fields(wtxn, user_fields)?; @@ -747,29 +747,17 @@ impl Index { let mut weights = FieldidsWeightsMap::default(); // Now we generate the real searchable fields: - // 1. Take the user defined searchable fields as-is to keep the priority defined by the attributes criterion. - // 2. Iterate over the user defined searchable fields. - // 3. If a user defined field is a subset of a field defined in the fields_ids_map - // (ie doggo.name is a subset of doggo) right after doggo and with the same weight. let mut real_fields = Vec::new(); - - for (id, field_from_map) in fields_ids_map.iter() { - for (weight, user_field) in user_fields.iter().enumerate() { - if crate::is_faceted_by(field_from_map, user_field) - && !real_fields.contains(&field_from_map) - && !non_searchable_fields_ids.contains(&id) - { - real_fields.push(field_from_map); - - let weight: u16 = - weight.try_into().map_err(|_| UserError::AttributeLimitReached)?; - weights.insert(id, weight); - } + for (id, field_from_map, metadata) in fields_ids_map.iter() { + if let Some(weight) = metadata.searchable_weight() { + real_fields.push(field_from_map); + weights.insert(id, weight); } } self.put_searchable_fields(wtxn, &real_fields)?; self.put_fieldids_weights_map(wtxn, &weights)?; + Ok(()) } diff --git a/crates/milli/src/prompt/fields.rs b/crates/milli/src/prompt/fields.rs index ab15c31b0..ffafffd63 100644 --- a/crates/milli/src/prompt/fields.rs +++ b/crates/milli/src/prompt/fields.rs @@ -7,14 +7,14 @@ use liquid::model::{ }; use liquid::{ObjectView, ValueView}; -use super::{FieldMetadata, FieldsIdsMapWithMetadata}; +use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, Metadata}; use crate::GlobalFieldsIdsMap; #[derive(Debug, Clone, Copy)] pub struct FieldValue<'a, D: ObjectView> { name: &'a str, document: &'a D, - metadata: FieldMetadata, + metadata: Metadata, } impl<'a, D: ObjectView> ValueView for FieldValue<'a, D> { @@ -67,7 +67,10 @@ impl<'a, D: ObjectView> FieldValue<'a, D> { } pub fn is_searchable(&self) -> &bool { - &self.metadata.searchable + match self.metadata.is_searchable() { + true => &true, + false => &false, + } } pub fn is_empty(&self) -> bool { @@ -125,15 +128,11 @@ pub struct BorrowedFields<'a, 'map, D: ObjectView> { } impl<'a, D: ObjectView> OwnedFields<'a, D> { - pub fn new(document: &'a D, field_id_map: &'a FieldsIdsMapWithMetadata<'a>) -> Self { + pub fn new(document: &'a D, field_id_map: &'a FieldIdMapWithMetadata) -> Self { Self( std::iter::repeat(document) .zip(field_id_map.iter()) - .map(|(document, (fid, name))| FieldValue { - document, - name, - metadata: field_id_map.metadata(fid).unwrap_or_default(), - }) + .map(|(document, (_fid, name, metadata))| FieldValue { document, name, metadata }) .collect(), ) } @@ -187,7 +186,7 @@ impl<'a, 'map, D: ObjectView> ArrayView for BorrowedFields<'a, 'map, D> { let fv = self.doc_alloc.alloc(FieldValue { name: self.doc_alloc.alloc_str(&k), document: self.document, - metadata: FieldMetadata { searchable: metadata.searchable }, + metadata, }); fv as _ })) @@ -207,7 +206,7 @@ impl<'a, 'map, D: ObjectView> ArrayView for BorrowedFields<'a, 'map, D> { let fv = self.doc_alloc.alloc(FieldValue { name: self.doc_alloc.alloc_str(&key), document: self.document, - metadata: FieldMetadata { searchable: metadata.searchable }, + metadata, }); Some(fv as _) } diff --git a/crates/milli/src/prompt/mod.rs b/crates/milli/src/prompt/mod.rs index 3eb91611e..a5cb8de48 100644 --- a/crates/milli/src/prompt/mod.rs +++ b/crates/milli/src/prompt/mod.rs @@ -5,11 +5,9 @@ mod fields; mod template_checker; use std::cell::RefCell; -use std::collections::BTreeMap; use std::convert::TryFrom; use std::fmt::Debug; use std::num::NonZeroUsize; -use std::ops::Deref; use bumpalo::Bump; use document::ParseableDocument; @@ -18,8 +16,9 @@ use fields::{BorrowedFields, OwnedFields}; use self::context::Context; use self::document::Document; +use crate::fields_ids_map::metadata::FieldIdMapWithMetadata; use crate::update::del_add::DelAdd; -use crate::{FieldId, FieldsIdsMap, GlobalFieldsIdsMap}; +use crate::GlobalFieldsIdsMap; pub struct Prompt { template: liquid::Template, @@ -145,9 +144,9 @@ impl Prompt { &self, document: &obkv::KvReaderU16, side: DelAdd, - field_id_map: &FieldsIdsMapWithMetadata, + field_id_map: &FieldIdMapWithMetadata, ) -> Result { - let document = Document::new(document, side, field_id_map); + let document = Document::new(document, side, field_id_map.as_fields_ids_map()); let fields = OwnedFields::new(&document, field_id_map); let context = Context::new(&document, &fields); @@ -172,40 +171,6 @@ fn truncate(s: &mut String, max_bytes: usize) { } } -pub struct FieldsIdsMapWithMetadata<'a> { - fields_ids_map: &'a FieldsIdsMap, - metadata: BTreeMap, -} - -impl<'a> FieldsIdsMapWithMetadata<'a> { - pub fn new(fields_ids_map: &'a FieldsIdsMap, searchable_fields_ids: &'_ [FieldId]) -> Self { - let mut metadata: BTreeMap = - fields_ids_map.ids().map(|id| (id, Default::default())).collect(); - for searchable_field_id in searchable_fields_ids { - let Some(metadata) = metadata.get_mut(searchable_field_id) else { continue }; - metadata.searchable = true; - } - Self { fields_ids_map, metadata } - } - - pub fn metadata(&self, field_id: FieldId) -> Option { - self.metadata.get(&field_id).copied() - } -} - -impl<'a> Deref for FieldsIdsMapWithMetadata<'a> { - type Target = FieldsIdsMap; - - fn deref(&self) -> &Self::Target { - self.fields_ids_map - } -} - -#[derive(Debug, Default, Clone, Copy)] -pub struct FieldMetadata { - pub searchable: bool, -} - #[cfg(test)] mod test { use super::Prompt; diff --git a/crates/milli/src/search/facet/facet_distribution.rs b/crates/milli/src/search/facet/facet_distribution.rs index ee0fad535..b165d4e80 100644 --- a/crates/milli/src/search/facet/facet_distribution.rs +++ b/crates/milli/src/search/facet/facet_distribution.rs @@ -1,4 +1,4 @@ -use std::collections::{BTreeMap, HashMap, HashSet}; +use std::collections::{BTreeMap, BTreeSet, HashMap}; use std::fmt::Display; use std::ops::ControlFlow; use std::{fmt, mem}; @@ -9,8 +9,9 @@ use indexmap::IndexMap; use roaring::RoaringBitmap; use serde::{Deserialize, Serialize}; -use crate::error::UserError; +use crate::attribute_patterns::match_field_legacy; use crate::facet::FacetType; +use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, Metadata, MetadataBuilder}; use crate::heed_codec::facet::{ FacetGroupKeyCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, OrderedF64Codec, }; @@ -18,7 +19,7 @@ use crate::heed_codec::{BytesRefCodec, StrRefCodec}; use crate::search::facet::facet_distribution_iter::{ count_iterate_over_facet_distribution, lexicographically_iterate_over_facet_distribution, }; -use crate::{FieldId, Index, Result}; +use crate::{Error, FieldId, FilterableAttributesRule, Index, PatternMatch, Result, UserError}; /// The default number of values by facets that will /// be fetched from the key-value store. @@ -287,37 +288,23 @@ impl<'a> FacetDistribution<'a> { } pub fn compute_stats(&self) -> Result> { - let fields_ids_map = self.index.fields_ids_map(self.rtxn)?; - let filterable_fields = self.index.filterable_fields(self.rtxn)?; let candidates = if let Some(candidates) = self.candidates.clone() { candidates } else { return Ok(Default::default()); }; - let fields = match &self.facets { - Some(facets) => { - let invalid_fields: HashSet<_> = facets - .iter() - .map(|(name, _)| name) - .filter(|facet| !crate::is_faceted(facet, &filterable_fields)) - .collect(); - if !invalid_fields.is_empty() { - return Err(UserError::InvalidFacetsDistribution { - invalid_facets_name: invalid_fields.into_iter().cloned().collect(), - valid_facets_name: filterable_fields.into_iter().collect(), - } - .into()); - } else { - facets.iter().map(|(name, _)| name).cloned().collect() - } - } - None => filterable_fields, - }; + let fields_ids_map = self.index.fields_ids_map(self.rtxn)?; + let fields_ids_map = FieldIdMapWithMetadata::new( + fields_ids_map, + MetadataBuilder::from_index(self.index, self.rtxn)?, + ); + let filterable_attributes_rules = self.index.filterable_attributes_rules(self.rtxn)?; + self.check_faceted_fields(&fields_ids_map, &filterable_attributes_rules)?; let mut distribution = BTreeMap::new(); - for (fid, name) in fields_ids_map.iter() { - if crate::is_faceted(name, &fields) { + for (fid, name, metadata) in fields_ids_map.iter() { + if self.select_field(name, &metadata, &filterable_attributes_rules) { let min_value = if let Some(min_value) = crate::search::facet::facet_min_value( self.index, self.rtxn, @@ -348,31 +335,16 @@ impl<'a> FacetDistribution<'a> { pub fn execute(&self) -> Result>> { let fields_ids_map = self.index.fields_ids_map(self.rtxn)?; - let filterable_fields = self.index.filterable_fields(self.rtxn)?; - - let fields = match self.facets { - Some(ref facets) => { - let invalid_fields: HashSet<_> = facets - .iter() - .map(|(name, _)| name) - .filter(|facet| !crate::is_faceted(facet, &filterable_fields)) - .collect(); - if !invalid_fields.is_empty() { - return Err(UserError::InvalidFacetsDistribution { - invalid_facets_name: invalid_fields.into_iter().cloned().collect(), - valid_facets_name: filterable_fields.into_iter().collect(), - } - .into()); - } else { - facets.iter().map(|(name, _)| name).cloned().collect() - } - } - None => filterable_fields, - }; + let fields_ids_map = FieldIdMapWithMetadata::new( + fields_ids_map, + MetadataBuilder::from_index(self.index, self.rtxn)?, + ); + let filterable_attributes_rules = self.index.filterable_attributes_rules(self.rtxn)?; + self.check_faceted_fields(&fields_ids_map, &filterable_attributes_rules)?; let mut distribution = BTreeMap::new(); - for (fid, name) in fields_ids_map.iter() { - if crate::is_faceted(name, &fields) { + for (fid, name, metadata) in fields_ids_map.iter() { + if self.select_field(name, &metadata, &filterable_attributes_rules) { let order_by = self .facets .as_ref() @@ -385,6 +357,61 @@ impl<'a> FacetDistribution<'a> { Ok(distribution) } + + /// Select a field if it is faceted and in the facets. + fn select_field( + &self, + name: &str, + metadata: &Metadata, + filterable_attributes_rules: &[FilterableAttributesRule], + ) -> bool { + match &self.facets { + Some(facets) => { + // The list of facets provided by the user is a legacy pattern ("dog.age" must be selected with "dog"). + facets.keys().any(|key| match_field_legacy(key, name) == PatternMatch::Match) + } + None => metadata.is_faceted(filterable_attributes_rules), + } + } + + /// Check if the fields in the facets are valid faceted fields. + fn check_faceted_fields( + &self, + fields_ids_map: &FieldIdMapWithMetadata, + filterable_attributes_rules: &[FilterableAttributesRule], + ) -> Result<()> { + let mut invalid_facets = BTreeSet::new(); + if let Some(facets) = &self.facets { + for (field, _) in facets { + let is_valid_faceted_field = + fields_ids_map.id_with_metadata(field).map_or(false, |(_, metadata)| { + metadata.is_faceted(filterable_attributes_rules) + }); + if !is_valid_faceted_field { + invalid_facets.insert(field.to_string()); + } + } + } + + if !invalid_facets.is_empty() { + let valid_facets_name = fields_ids_map + .iter() + .filter_map(|(_, name, metadata)| { + if metadata.is_faceted(filterable_attributes_rules) { + Some(name.to_string()) + } else { + None + } + }) + .collect(); + return Err(Error::UserError(UserError::InvalidFacetsDistribution { + invalid_facets_name: invalid_facets, + valid_facets_name, + })); + } + + Ok(()) + } } impl fmt::Debug for FacetDistribution<'_> { From 286d310287837a188098124a26da9cdafc5c6f3a Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Thu, 27 Feb 2025 14:58:22 +0100 Subject: [PATCH 04/35] Fix inconsistency in attribute ranking rule computation **Changes:** The building of the Attributes ranking rule graph was comparing fieldids with weights which doesn't make sense and may be bug prone, we are now comparing fieldids with fieldids. **Impact:** - search: Attribute ranking rule --- crates/milli/src/fieldids_weights_map.rs | 5 +++++ crates/milli/src/search/new/ranking_rule_graph/fid/mod.rs | 6 +++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/crates/milli/src/fieldids_weights_map.rs b/crates/milli/src/fieldids_weights_map.rs index 57c99f77f..f23bc1512 100644 --- a/crates/milli/src/fieldids_weights_map.rs +++ b/crates/milli/src/fieldids_weights_map.rs @@ -48,6 +48,11 @@ impl FieldidsWeightsMap { self.map.values().copied().max() } + /// Returns the field id with the highest weight. + pub fn max_weight_fid(&self) -> Option<(FieldId, Weight)> { + self.map.iter().max_by_key(|(_, weight)| *weight).map(|(fid, weight)| (*fid, *weight)) + } + /// Return an iterator visiting all field ids in arbitrary order. pub fn ids(&self) -> impl Iterator + '_ { self.map.keys().copied() diff --git a/crates/milli/src/search/new/ranking_rule_graph/fid/mod.rs b/crates/milli/src/search/new/ranking_rule_graph/fid/mod.rs index 67775ddea..62d75d2ac 100644 --- a/crates/milli/src/search/new/ranking_rule_graph/fid/mod.rs +++ b/crates/milli/src/search/new/ranking_rule_graph/fid/mod.rs @@ -88,10 +88,10 @@ impl RankingRuleGraphTrait for FidGraph { } // always lookup the max_fid if we don't already and add an artificial condition for max scoring - let max_weight: Option = weights_map.max_weight(); + let max_weight_fid = weights_map.max_weight_fid(); - if let Some(max_weight) = max_weight { - if !all_fields.contains(&max_weight) { + if let Some((max_fid, max_weight)) = max_weight_fid { + if !all_fields.contains(&max_fid) { edges.push(( max_weight as u32 * term.term_ids.len() as u32, // TODO improve the fid score i.e. fid^10. conditions_interner.insert(FidCondition { From 659855c88e33149e51b4daecc9531181d69fe845 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Mon, 3 Mar 2025 10:32:02 +0100 Subject: [PATCH 05/35] Refactor Settings Indexing process **Changes:** The transform structure is now relying on FieldIdMapWithMetadata and AttributePatterns to prepare the obkv documents during a settings reindexing. The InnerIndexSettingsDiff and InnerIndexSettings structs are now relying on FieldIdMapWithMetadata, FilterableAttributesRule and AttributePatterns to define the field and the databases that should be reindexed. The faceted_fields_ids, localized_searchable_fields_ids and localized_faceted_fields_ids have been removed in favor of the FieldIdMapWithMetadata. We are now relying on the FieldIdMapWithMetadata to retain vectors_fids from the facets and the searchables. The searchable database computing is now relying on the FieldIdMapWithMetadata to know if a field is searchable and retrieve the locales. The facet database computing is now relying on the FieldIdMapWithMetadata to compute the facet databases, the facet-search and retrieve the locales. The facet level database computing is now relying on the FieldIdMapWithMetadata and the facet level database are cleared depending on the settings differences (clear_facet_levels_based_on_settings_diff). The vector point extraction uses the FieldIdMapWithMetadata instead of FieldsIdsMapWithMetadata. **Impact:** - Dump import - Settings update --- crates/milli/src/update/del_add.rs | 11 + crates/milli/src/update/facet/bulk.rs | 21 +- crates/milli/src/update/facet/mod.rs | 57 +++- .../src/update/index_documents/enrich.rs | 7 +- .../extract/extract_docid_word_positions.rs | 19 +- .../extract/extract_facet_string_docids.rs | 49 +++- .../extract/extract_fid_docid_facet_values.rs | 4 +- .../extract/extract_vector_points.rs | 25 +- .../milli/src/update/index_documents/mod.rs | 109 +++++-- .../src/update/index_documents/transform.rs | 73 +++-- .../src/update/index_documents/typed_chunk.rs | 4 +- crates/milli/src/update/settings.rs | 268 ++++++++---------- 12 files changed, 375 insertions(+), 272 deletions(-) diff --git a/crates/milli/src/update/del_add.rs b/crates/milli/src/update/del_add.rs index 97ff86f2a..6825e2bd3 100644 --- a/crates/milli/src/update/del_add.rs +++ b/crates/milli/src/update/del_add.rs @@ -81,6 +81,17 @@ pub enum DelAddOperation { DeletionAndAddition, } +impl DelAddOperation { + /// Merge two DelAddOperation enum variants. + pub fn merge(self, other: Self) -> Self { + match (self, other) { + (Self::Deletion, Self::Deletion) => Self::Deletion, + (Self::Addition, Self::Addition) => Self::Addition, + _ => Self::DeletionAndAddition, + } + } +} + /// Creates a Kv> from two Kv /// /// putting each deletion obkv's keys under an DelAdd::Deletion diff --git a/crates/milli/src/update/facet/bulk.rs b/crates/milli/src/update/facet/bulk.rs index 1ab8740ed..5de0ff4ed 100644 --- a/crates/milli/src/update/facet/bulk.rs +++ b/crates/milli/src/update/facet/bulk.rs @@ -6,7 +6,7 @@ use heed::types::Bytes; use heed::{BytesDecode, BytesEncode, Error, PutFlags, RoTxn, RwTxn}; use roaring::RoaringBitmap; -use super::{FACET_GROUP_SIZE, FACET_MIN_LEVEL_SIZE}; +use super::{clear_facet_levels, FACET_GROUP_SIZE, FACET_MIN_LEVEL_SIZE}; use crate::facet::FacetType; use crate::heed_codec::facet::{ FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, @@ -97,9 +97,7 @@ pub(crate) struct FacetsUpdateBulkInner { impl FacetsUpdateBulkInner { pub fn update(mut self, wtxn: &mut RwTxn<'_>, field_ids: &[u16]) -> Result<()> { self.update_level0(wtxn)?; - for &field_id in field_ids.iter() { - self.clear_levels(wtxn, field_id)?; - } + clear_facet_levels(wtxn, &self.db.remap_data_type(), field_ids)?; for &field_id in field_ids.iter() { let level_readers = self.compute_levels_for_field_id(field_id, wtxn)?; @@ -114,14 +112,6 @@ impl FacetsUpdateBulkInner { Ok(()) } - fn clear_levels(&self, wtxn: &mut heed::RwTxn<'_>, field_id: FieldId) -> Result<()> { - let left = FacetGroupKey::<&[u8]> { field_id, level: 1, left_bound: &[] }; - let right = FacetGroupKey::<&[u8]> { field_id, level: u8::MAX, left_bound: &[] }; - let range = left..=right; - self.db.delete_range(wtxn, &range).map(drop)?; - Ok(()) - } - fn update_level0(&mut self, wtxn: &mut RwTxn<'_>) -> Result<()> { let delta_data = match self.delta_data.take() { Some(x) => x, @@ -365,8 +355,6 @@ impl FacetsUpdateBulkInner { mod tests { use std::iter::once; - use big_s::S; - use maplit::hashset; use roaring::RoaringBitmap; use crate::documents::mmap_from_objects; @@ -374,7 +362,7 @@ mod tests { use crate::heed_codec::StrRefCodec; use crate::index::tests::TempIndex; use crate::update::facet::test_helpers::{ordered_string, FacetIndex}; - use crate::{db_snap, milli_snap}; + use crate::{db_snap, milli_snap, FilterableAttributesRule}; #[test] fn insert() { @@ -474,7 +462,8 @@ mod tests { index .update_settings(|settings| { settings.set_primary_key("id".to_owned()); - settings.set_filterable_fields(hashset! { S("id") }); + settings + .set_filterable_fields(vec![FilterableAttributesRule::Field("id".to_string())]); }) .unwrap(); diff --git a/crates/milli/src/update/facet/mod.rs b/crates/milli/src/update/facet/mod.rs index dbacf6248..027bb355e 100644 --- a/crates/milli/src/update/facet/mod.rs +++ b/crates/milli/src/update/facet/mod.rs @@ -89,6 +89,7 @@ use time::OffsetDateTime; use tracing::debug; use self::incremental::FacetsUpdateIncremental; +use super::settings::{InnerIndexSettings, InnerIndexSettingsDiff}; use super::{FacetsUpdateBulk, MergeDeladdBtreesetString, MergeDeladdCboRoaringBitmaps}; use crate::facet::FacetType; use crate::heed_codec::facet::{ @@ -147,7 +148,11 @@ impl<'i> FacetsUpdate<'i> { } } - pub fn execute(self, wtxn: &mut heed::RwTxn<'_>) -> Result<()> { + pub fn execute( + self, + wtxn: &mut heed::RwTxn<'_>, + new_settings: &InnerIndexSettings, + ) -> Result<()> { if self.data_size == 0 { return Ok(()); } @@ -156,8 +161,7 @@ impl<'i> FacetsUpdate<'i> { // See self::comparison_bench::benchmark_facet_indexing if self.data_size >= (self.database.len(wtxn)? / 500) { - let field_ids = - self.index.faceted_fields_ids(wtxn)?.iter().copied().collect::>(); + let field_ids = facet_levels_field_ids(new_settings); let bulk_update = FacetsUpdateBulk::new( self.index, field_ids, @@ -291,6 +295,53 @@ fn index_facet_search( Ok(()) } +/// Clear all the levels greater than 0 for given field ids. +pub fn clear_facet_levels<'a, I>( + wtxn: &mut heed::RwTxn<'_>, + db: &heed::Database, DecodeIgnore>, + field_ids: I, +) -> Result<()> +where + I: IntoIterator, +{ + for field_id in field_ids { + let field_id = *field_id; + let left = FacetGroupKey::<&[u8]> { field_id, level: 1, left_bound: &[] }; + let right = FacetGroupKey::<&[u8]> { field_id, level: u8::MAX, left_bound: &[] }; + let range = left..=right; + db.delete_range(wtxn, &range).map(drop)?; + } + Ok(()) +} + +pub fn clear_facet_levels_based_on_settings_diff( + wtxn: &mut heed::RwTxn<'_>, + index: &Index, + settings_diff: &InnerIndexSettingsDiff, +) -> Result<()> { + let new_field_ids: BTreeSet<_> = facet_levels_field_ids(&settings_diff.new); + let old_field_ids: BTreeSet<_> = facet_levels_field_ids(&settings_diff.old); + + let field_ids_to_clear: Vec<_> = old_field_ids.difference(&new_field_ids).copied().collect(); + clear_facet_levels(wtxn, &index.facet_id_string_docids.remap_types(), &field_ids_to_clear)?; + clear_facet_levels(wtxn, &index.facet_id_f64_docids.remap_types(), &field_ids_to_clear)?; + Ok(()) +} + +fn facet_levels_field_ids(settings: &InnerIndexSettings) -> B +where + B: FromIterator, +{ + settings + .fields_ids_map + .iter_id_metadata() + .filter(|(_, metadata)| { + metadata.require_facet_level_database(&settings.filterable_attributes_rules) + }) + .map(|(id, _)| id) + .collect() +} + #[cfg(test)] pub(crate) mod test_helpers { use std::cell::Cell; diff --git a/crates/milli/src/update/index_documents/enrich.rs b/crates/milli/src/update/index_documents/enrich.rs index c35701961..1f15dd570 100644 --- a/crates/milli/src/update/index_documents/enrich.rs +++ b/crates/milli/src/update/index_documents/enrich.rs @@ -95,12 +95,7 @@ pub fn enrich_documents_batch( // If the settings specifies that a _geo field must be used therefore we must check the // validity of it in all the documents of this batch and this is when we return `Some`. let geo_field_id = match documents_batch_index.id(RESERVED_GEO_FIELD_NAME) { - Some(geo_field_id) - if index.sortable_fields(rtxn)?.contains(RESERVED_GEO_FIELD_NAME) - || index.filterable_fields(rtxn)?.contains(RESERVED_GEO_FIELD_NAME) => - { - Some(geo_field_id) - } + Some(geo_field_id) if index.is_geo_enabled(rtxn)? => Some(geo_field_id), _otherwise => None, }; diff --git a/crates/milli/src/update/index_documents/extract/extract_docid_word_positions.rs b/crates/milli/src/update/index_documents/extract/extract_docid_word_positions.rs index 606ae6b54..d502e69cc 100644 --- a/crates/milli/src/update/index_documents/extract/extract_docid_word_positions.rs +++ b/crates/milli/src/update/index_documents/extract/extract_docid_word_positions.rs @@ -150,9 +150,14 @@ fn searchable_fields_changed( obkv: &KvReader, settings_diff: &InnerIndexSettingsDiff, ) -> bool { - let searchable_fields = &settings_diff.new.searchable_fields_ids; for (field_id, field_bytes) in obkv.iter() { - if searchable_fields.contains(&field_id) { + let Some(metadata) = settings_diff.new.fields_ids_map.metadata(field_id) else { + // If the field id is not in the fields ids map, skip it. + // This happens for the vectors sub-fields. for example: + // "_vectors": { "manual": [1, 2, 3]} -> "_vectors.manual" is not registered. + continue; + }; + if metadata.is_searchable() { let del_add = KvReaderDelAdd::from_slice(field_bytes); match (del_add.get(DelAdd::Deletion), del_add.get(DelAdd::Addition)) { // if both fields are None, check the next field. @@ -200,8 +205,14 @@ fn tokens_from_document<'a>( buffers.obkv_buffer.clear(); let mut document_writer = KvWriterU16::new(&mut buffers.obkv_buffer); for (field_id, field_bytes) in obkv.iter() { + let Some(metadata) = settings.fields_ids_map.metadata(field_id) else { + // If the field id is not in the fields ids map, skip it. + // This happens for the vectors sub-fields. for example: + // "_vectors": { "manual": [1, 2, 3]} -> "_vectors.manual" is not registered. + continue; + }; // if field is searchable. - if settings.searchable_fields_ids.contains(&field_id) { + if metadata.is_searchable() { // extract deletion or addition only. if let Some(field_bytes) = KvReaderDelAdd::from_slice(field_bytes).get(del_add) { // parse json. @@ -216,7 +227,7 @@ fn tokens_from_document<'a>( buffers.field_buffer.clear(); if let Some(field) = json_to_string(&value, &mut buffers.field_buffer) { // create an iterator of token with their positions. - let locales = settings.localized_searchable_fields_ids.locales(field_id); + let locales = metadata.locales(&settings.localized_attributes_rules); let tokens = process_tokens(tokenizer.tokenize_with_allow_list(field, locales)) .take_while(|(p, _)| (*p as u32) < max_positions_per_attributes); diff --git a/crates/milli/src/update/index_documents/extract/extract_facet_string_docids.rs b/crates/milli/src/update/index_documents/extract/extract_facet_string_docids.rs index d330ea5a0..994125c50 100644 --- a/crates/milli/src/update/index_documents/extract/extract_facet_string_docids.rs +++ b/crates/milli/src/update/index_documents/extract/extract_facet_string_docids.rs @@ -12,12 +12,11 @@ use heed::BytesEncode; use super::helpers::{create_sorter, sorter_into_reader, try_split_array_at, GrenadParameters}; use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec}; use crate::heed_codec::{BEU16StrCodec, StrRefCodec}; -use crate::localized_attributes_rules::LocalizedFieldIds; use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd}; use crate::update::index_documents::helpers::{ MergeDeladdBtreesetString, MergeDeladdCboRoaringBitmaps, }; -use crate::update::settings::InnerIndexSettingsDiff; +use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff}; use crate::{FieldId, Result, MAX_FACET_VALUE_LENGTH}; /// Extracts the facet string and the documents ids where this facet string appear. @@ -33,13 +32,10 @@ pub fn extract_facet_string_docids( if settings_diff.settings_update_only() { extract_facet_string_docids_settings(docid_fid_facet_string, indexer, settings_diff) } else { - let localized_field_ids = &settings_diff.new.localized_faceted_fields_ids; - let facet_search = settings_diff.new.facet_search; extract_facet_string_docids_document_update( docid_fid_facet_string, indexer, - localized_field_ids, - facet_search, + &settings_diff.new, ) } } @@ -52,8 +48,7 @@ pub fn extract_facet_string_docids( fn extract_facet_string_docids_document_update( docid_fid_facet_string: grenad::Reader, indexer: GrenadParameters, - localized_field_ids: &LocalizedFieldIds, - facet_search: bool, + settings: &InnerIndexSettings, ) -> Result<(grenad::Reader>, grenad::Reader>)> { let max_memory = indexer.max_memory_by_thread(); @@ -92,6 +87,14 @@ fn extract_facet_string_docids_document_update( let (field_id_bytes, bytes) = try_split_array_at(key).unwrap(); let field_id = FieldId::from_be_bytes(field_id_bytes); + let Some(metadata) = settings.fields_ids_map.metadata(field_id) else { + unreachable!("metadata not found for field_id: {}", field_id) + }; + + if !metadata.is_faceted(&settings.filterable_attributes_rules) { + continue; + } + let (document_id_bytes, normalized_value_bytes) = try_split_array_at::<_, 4>(bytes).unwrap(); let document_id = u32::from_be_bytes(document_id_bytes); @@ -99,8 +102,10 @@ fn extract_facet_string_docids_document_update( let normalized_value = str::from_utf8(normalized_value_bytes)?; // Facet search normalization - if facet_search { - let locales = localized_field_ids.locales(field_id); + let features = + metadata.filterable_attributes_features(&settings.filterable_attributes_rules); + if features.is_facet_searchable() { + let locales = metadata.locales(&settings.localized_attributes_rules); let hyper_normalized_value = normalize_facet_string(normalized_value, locales); let set = BTreeSet::from_iter(std::iter::once(normalized_value)); @@ -178,8 +183,15 @@ fn extract_facet_string_docids_settings( let (field_id_bytes, bytes) = try_split_array_at(key).unwrap(); let field_id = FieldId::from_be_bytes(field_id_bytes); - let old_locales = settings_diff.old.localized_faceted_fields_ids.locales(field_id); - let new_locales = settings_diff.new.localized_faceted_fields_ids.locales(field_id); + let Some(old_metadata) = settings_diff.old.fields_ids_map.metadata(field_id) else { + unreachable!("old metadata not found for field_id: {}", field_id) + }; + let Some(new_metadata) = settings_diff.new.fields_ids_map.metadata(field_id) else { + unreachable!("new metadata not found for field_id: {}", field_id) + }; + + let old_locales = old_metadata.locales(&settings_diff.old.localized_attributes_rules); + let new_locales = new_metadata.locales(&settings_diff.new.localized_attributes_rules); let are_same_locales = old_locales == new_locales; let reindex_facet_search = @@ -197,10 +209,15 @@ fn extract_facet_string_docids_settings( // Facet search normalization if settings_diff.new.facet_search { + let new_filterable_features = new_metadata + .filterable_attributes_features(&settings_diff.new.filterable_attributes_rules); let new_hyper_normalized_value = normalize_facet_string(normalized_value, new_locales); let old_hyper_normalized_value; + let old_filterable_features = old_metadata + .filterable_attributes_features(&settings_diff.old.filterable_attributes_rules); let old_hyper_normalized_value = if !settings_diff.old.facet_search || deladd_reader.get(DelAdd::Deletion).is_none() + || !old_filterable_features.is_facet_searchable() { // if the facet search is disabled in the old settings or if no facet string is deleted, // we don't need to normalize the facet string. @@ -215,7 +232,9 @@ fn extract_facet_string_docids_settings( let set = BTreeSet::from_iter(std::iter::once(normalized_value)); // if the facet string is the same, we can put the deletion and addition in the same obkv. - if old_hyper_normalized_value == Some(&new_hyper_normalized_value) { + if old_hyper_normalized_value == Some(&new_hyper_normalized_value) + && new_filterable_features.is_facet_searchable() + { // nothing to do if we delete and re-add the value. if is_same_value { continue; @@ -249,7 +268,9 @@ fn extract_facet_string_docids_settings( } // addition - if deladd_reader.get(DelAdd::Addition).is_some() { + if new_filterable_features.is_facet_searchable() + && deladd_reader.get(DelAdd::Addition).is_some() + { // insert new value let val = SerdeJson::bytes_encode(&set).map_err(heed::Error::Encoding)?; buffer.clear(); diff --git a/crates/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs b/crates/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs index 88c02fe70..de87c5a7c 100644 --- a/crates/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs +++ b/crates/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs @@ -76,9 +76,9 @@ pub fn extract_fid_docid_facet_values( let mut strings_key_buffer = Vec::new(); let old_faceted_fids: BTreeSet<_> = - settings_diff.old.faceted_fields_ids.iter().copied().collect(); + settings_diff.list_faceted_fields_from_fid_map(DelAdd::Deletion); let new_faceted_fids: BTreeSet<_> = - settings_diff.new.faceted_fields_ids.iter().copied().collect(); + settings_diff.list_faceted_fields_from_fid_map(DelAdd::Addition); if !settings_diff.settings_update_only || settings_diff.reindex_facets() { let mut cursor = obkv_documents.into_cursor()?; diff --git a/crates/milli/src/update/index_documents/extract/extract_vector_points.rs b/crates/milli/src/update/index_documents/extract/extract_vector_points.rs index 9103e8324..560b73834 100644 --- a/crates/milli/src/update/index_documents/extract/extract_vector_points.rs +++ b/crates/milli/src/update/index_documents/extract/extract_vector_points.rs @@ -15,8 +15,9 @@ use serde_json::Value; use super::helpers::{create_writer, writer_into_reader, GrenadParameters}; use crate::constants::RESERVED_VECTORS_FIELD_NAME; use crate::error::FaultSource; +use crate::fields_ids_map::metadata::FieldIdMapWithMetadata; use crate::index::IndexEmbeddingConfig; -use crate::prompt::{FieldsIdsMapWithMetadata, Prompt}; +use crate::prompt::Prompt; use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd}; use crate::update::settings::InnerIndexSettingsDiff; use crate::vector::error::{EmbedErrorKind, PossibleEmbeddingMistakes, UnusedVectorsDistribution}; @@ -190,12 +191,8 @@ pub fn extract_vector_points( let reindex_vectors = settings_diff.reindex_vectors(); let old_fields_ids_map = &settings_diff.old.fields_ids_map; - let old_fields_ids_map = - FieldsIdsMapWithMetadata::new(old_fields_ids_map, &settings_diff.old.searchable_fields_ids); let new_fields_ids_map = &settings_diff.new.fields_ids_map; - let new_fields_ids_map = - FieldsIdsMapWithMetadata::new(new_fields_ids_map, &settings_diff.new.searchable_fields_ids); // the vector field id may have changed let old_vectors_fid = old_fields_ids_map.id(RESERVED_VECTORS_FIELD_NAME); @@ -383,7 +380,7 @@ pub fn extract_vector_points( ); continue; } - regenerate_prompt(obkv, prompt, &new_fields_ids_map)? + regenerate_prompt(obkv, prompt, new_fields_ids_map)? } }, // prompt regeneration is only triggered for existing embedders @@ -400,7 +397,7 @@ pub fn extract_vector_points( regenerate_if_prompt_changed( obkv, (old_prompt, prompt), - (&old_fields_ids_map, &new_fields_ids_map), + (old_fields_ids_map, new_fields_ids_map), )? } else { // we can simply ignore user provided vectors as they are not regenerated and are @@ -416,7 +413,7 @@ pub fn extract_vector_points( prompt, (add_to_user_provided, remove_from_user_provided), (old, new), - (&old_fields_ids_map, &new_fields_ids_map), + (old_fields_ids_map, new_fields_ids_map), document_id, embedder_name, embedder_is_manual, @@ -486,10 +483,7 @@ fn extract_vector_document_diff( prompt: &Prompt, (add_to_user_provided, remove_from_user_provided): (&mut RoaringBitmap, &mut RoaringBitmap), (old, new): (VectorState, VectorState), - (old_fields_ids_map, new_fields_ids_map): ( - &FieldsIdsMapWithMetadata, - &FieldsIdsMapWithMetadata, - ), + (old_fields_ids_map, new_fields_ids_map): (&FieldIdMapWithMetadata, &FieldIdMapWithMetadata), document_id: impl Fn() -> Value, embedder_name: &str, embedder_is_manual: bool, @@ -611,10 +605,7 @@ fn extract_vector_document_diff( fn regenerate_if_prompt_changed( obkv: &obkv::KvReader, (old_prompt, new_prompt): (&Prompt, &Prompt), - (old_fields_ids_map, new_fields_ids_map): ( - &FieldsIdsMapWithMetadata, - &FieldsIdsMapWithMetadata, - ), + (old_fields_ids_map, new_fields_ids_map): (&FieldIdMapWithMetadata, &FieldIdMapWithMetadata), ) -> Result { let old_prompt = old_prompt .render_kvdeladd(obkv, DelAdd::Deletion, old_fields_ids_map) @@ -630,7 +621,7 @@ fn regenerate_if_prompt_changed( fn regenerate_prompt( obkv: &obkv::KvReader, prompt: &Prompt, - new_fields_ids_map: &FieldsIdsMapWithMetadata, + new_fields_ids_map: &FieldIdMapWithMetadata, ) -> Result { let prompt = prompt.render_kvdeladd(obkv, DelAdd::Addition, new_fields_ids_map)?; diff --git a/crates/milli/src/update/index_documents/mod.rs b/crates/milli/src/update/index_documents/mod.rs index 86f2ed4af..19ab1ff34 100644 --- a/crates/milli/src/update/index_documents/mod.rs +++ b/crates/milli/src/update/index_documents/mod.rs @@ -26,6 +26,7 @@ use typed_chunk::{write_typed_chunk_into_index, ChunkAccumulator, TypedChunk}; pub use self::enrich::{extract_finite_float_from_value, DocumentId}; pub use self::helpers::*; pub use self::transform::{Transform, TransformOutput}; +use super::facet::clear_facet_levels_based_on_settings_diff; use super::new::StdResult; use crate::documents::{obkv_to_object, DocumentsBatchReader}; use crate::error::{Error, InternalError}; @@ -215,9 +216,8 @@ where flattened_documents, } = output; - // update the internal facet and searchable list, + // update the searchable list, // because they might have changed due to the nested documents flattening. - settings_diff.new.recompute_facets(self.wtxn, self.index)?; settings_diff.new.recompute_searchables(self.wtxn, self.index)?; let settings_diff = Arc::new(settings_diff); @@ -465,6 +465,11 @@ where } } + // If the settings are only being updated, we may have to clear some of the facet levels. + if settings_diff.settings_update_only() { + clear_facet_levels_based_on_settings_diff(self.wtxn, self.index, &settings_diff)?; + } + Ok(()) }).map_err(InternalError::from)??; @@ -765,18 +770,19 @@ mod tests { use bumpalo::Bump; use fst::IntoStreamer; use heed::RwTxn; - use maplit::hashset; + use maplit::{btreeset, hashset}; use super::*; use crate::constants::RESERVED_GEO_FIELD_NAME; use crate::documents::mmap_from_objects; + use crate::filterable_attributes_rules::filtered_matching_field_names; use crate::index::tests::TempIndex; use crate::index::IndexEmbeddingConfig; use crate::progress::Progress; use crate::search::TermsMatchingStrategy; use crate::update::new::indexer; use crate::update::Setting; - use crate::{all_obkv_to_json, db_snap, Filter, Search, UserError}; + use crate::{all_obkv_to_json, db_snap, Filter, FilterableAttributesRule, Search, UserError}; #[test] fn simple_document_replacement() { @@ -1006,7 +1012,9 @@ mod tests { index .update_settings(|settings| { - settings.set_filterable_fields(hashset!(S(RESERVED_GEO_FIELD_NAME))); + settings.set_filterable_fields(vec![FilterableAttributesRule::Field( + RESERVED_GEO_FIELD_NAME.to_string(), + )]); }) .unwrap(); } @@ -1018,7 +1026,9 @@ mod tests { index .update_settings(|settings| { - settings.set_filterable_fields(hashset!(S(RESERVED_GEO_FIELD_NAME))); + settings.set_filterable_fields(vec![FilterableAttributesRule::Field( + RESERVED_GEO_FIELD_NAME.to_string(), + )]); }) .unwrap(); @@ -1234,15 +1244,24 @@ mod tests { let searchable_fields = vec![S("title"), S("nested.object"), S("nested.machin")]; settings.set_searchable_fields(searchable_fields); - let faceted_fields = hashset!(S("title"), S("nested.object"), S("nested.machin")); + let faceted_fields = vec![ + FilterableAttributesRule::Field("title".to_string()), + FilterableAttributesRule::Field("nested.object".to_string()), + FilterableAttributesRule::Field("nested.machin".to_string()), + ]; settings.set_filterable_fields(faceted_fields); }) .unwrap(); let rtxn = index.read_txn().unwrap(); - let facets = index.faceted_fields(&rtxn).unwrap(); - assert_eq!(facets, hashset!(S("title"), S("nested.object"), S("nested.machin"))); + let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap(); + let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let facets = + filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| { + features.is_filterable() + }); + assert_eq!(facets, btreeset!("title", "nested.object", "nested.machin")); // testing the simple query search let mut search = crate::Search::new(&rtxn, &index); @@ -1438,7 +1457,9 @@ mod tests { index .update_settings(|settings| { - settings.set_filterable_fields(hashset!(String::from("dog"))); + settings.set_filterable_fields(vec![FilterableAttributesRule::Field( + "dog".to_string(), + )]); }) .unwrap(); @@ -1457,9 +1478,14 @@ mod tests { let rtxn = index.read_txn().unwrap(); - let hidden = index.faceted_fields(&rtxn).unwrap(); + let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap(); + let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let facets = + filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| { + features.is_filterable() + }); - assert_eq!(hidden, hashset!(S("dog"), S("dog.race"), S("dog.race.bernese mountain"))); + assert_eq!(facets, btreeset!("dog", "dog.race", "dog.race.bernese mountain")); for (s, i) in [("zeroth", 0), ("first", 1), ("second", 2), ("third", 3)] { let mut search = crate::Search::new(&rtxn, &index); @@ -1480,9 +1506,14 @@ mod tests { let rtxn = index.read_txn().unwrap(); - let facets = index.faceted_fields(&rtxn).unwrap(); + let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap(); + let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let facets = + filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| { + features.is_filterable() + }); - assert_eq!(facets, hashset!()); + assert_eq!(facets, btreeset!()); // update the settings to test the sortable index @@ -1506,10 +1537,6 @@ mod tests { let rtxn = index.read_txn().unwrap(); - let facets = index.faceted_fields(&rtxn).unwrap(); - - assert_eq!(facets, hashset!(S("dog.race"), S("dog.race.bernese mountain"))); - let mut search = crate::Search::new(&rtxn, &index); search.sort_criteria(vec![crate::AscDesc::Asc(crate::Member::Field(S( "dog.race.bernese mountain", @@ -1717,8 +1744,13 @@ mod tests { let check_ok = |index: &Index| { let rtxn = index.read_txn().unwrap(); - let facets = index.faceted_fields(&rtxn).unwrap(); - assert_eq!(facets, hashset!(S("colour"), S("colour.green"), S("colour.green.blue"))); + let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap(); + let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let facets = + filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| { + features.is_filterable() + }); + assert_eq!(facets, btreeset!("colour", "colour.green", "colour.green.blue")); let colour_id = index.fields_ids_map(&rtxn).unwrap().id("colour").unwrap(); let colour_green_id = index.fields_ids_map(&rtxn).unwrap().id("colour.green").unwrap(); @@ -1738,7 +1770,7 @@ mod tests { assert_eq!(bitmap_colour_blue.into_iter().collect::>(), vec![7]); }; - let faceted_fields = hashset!(S("colour")); + let faceted_fields = vec![FilterableAttributesRule::Field("colour".to_string())]; let index = TempIndex::new(); index.add_documents(content()).unwrap(); @@ -1823,8 +1855,13 @@ mod tests { let check_ok = |index: &Index| { let rtxn = index.read_txn().unwrap(); - let facets = index.faceted_fields(&rtxn).unwrap(); - assert_eq!(facets, hashset!(S("colour"), S("colour.green"), S("colour.green.blue"))); + let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap(); + let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let facets = + filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| { + features.is_filterable() + }); + assert_eq!(facets, btreeset!("colour", "colour.green", "colour.green.blue")); let colour_id = index.fields_ids_map(&rtxn).unwrap().id("colour").unwrap(); let colour_green_id = index.fields_ids_map(&rtxn).unwrap().id("colour.green").unwrap(); @@ -1844,7 +1881,7 @@ mod tests { assert_eq!(bitmap_colour_blue.into_iter().collect::>(), vec![3]); }; - let faceted_fields = hashset!(S("colour")); + let faceted_fields = vec![FilterableAttributesRule::Field("colour".to_string())]; let index = TempIndex::new(); index.add_documents(content()).unwrap(); @@ -1887,8 +1924,13 @@ mod tests { let check_ok = |index: &Index| { let rtxn = index.read_txn().unwrap(); - let facets = index.faceted_fields(&rtxn).unwrap(); - assert_eq!(facets, hashset!(S("tags"), S("tags.green"), S("tags.green.blue"))); + let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap(); + let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let facets = + filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| { + features.is_filterable() + }); + assert_eq!(facets, btreeset!("tags", "tags.green", "tags.green.blue")); let tags_id = index.fields_ids_map(&rtxn).unwrap().id("tags").unwrap(); let tags_green_id = index.fields_ids_map(&rtxn).unwrap().id("tags.green").unwrap(); @@ -1907,7 +1949,7 @@ mod tests { assert_eq!(bitmap_tags_blue.into_iter().collect::>(), vec![12]); }; - let faceted_fields = hashset!(S("tags")); + let faceted_fields = vec![FilterableAttributesRule::Field("tags".to_string())]; let index = TempIndex::new(); index.add_documents(content()).unwrap(); @@ -2259,7 +2301,9 @@ mod tests { index .update_settings(|settings| { - settings.set_filterable_fields(hashset! { S("title") }); + settings.set_filterable_fields(vec![FilterableAttributesRule::Field( + "title".to_string(), + )]); }) .unwrap(); @@ -3115,7 +3159,10 @@ mod tests { index .update_settings_using_wtxn(&mut wtxn, |settings| { settings.set_primary_key(S("docid")); - settings.set_filterable_fields(hashset! { S("label"), S("label2") }); + settings.set_filterable_fields(vec![ + FilterableAttributesRule::Field("label".to_string()), + FilterableAttributesRule::Field("label2".to_string()), + ]); }) .unwrap(); wtxn.commit().unwrap(); @@ -3294,7 +3341,9 @@ mod tests { index .update_settings_using_wtxn(&mut wtxn, |settings| { settings.set_primary_key(S("id")); - settings.set_filterable_fields(hashset!(S(RESERVED_GEO_FIELD_NAME))); + settings.set_filterable_fields(vec![FilterableAttributesRule::Field( + RESERVED_GEO_FIELD_NAME.to_string(), + )]); settings.set_sortable_fields(hashset!(S(RESERVED_GEO_FIELD_NAME))); }) .unwrap(); diff --git a/crates/milli/src/update/index_documents/transform.rs b/crates/milli/src/update/index_documents/transform.rs index d87524a34..b2ee21cbf 100644 --- a/crates/milli/src/update/index_documents/transform.rs +++ b/crates/milli/src/update/index_documents/transform.rs @@ -1,7 +1,7 @@ use std::borrow::Cow; use std::collections::btree_map::Entry as BEntry; use std::collections::hash_map::Entry as HEntry; -use std::collections::{BTreeMap, HashMap, HashSet}; +use std::collections::{BTreeMap, HashMap}; use std::fs::File; use std::io::{Read, Seek}; @@ -18,8 +18,10 @@ use super::helpers::{ ObkvsMergeAdditionsAndDeletions, }; use super::{create_writer, IndexDocumentsMethod, IndexerConfig, KeepFirst}; +use crate::attribute_patterns::PatternMatch; use crate::documents::{DocumentsBatchIndex, EnrichedDocument, EnrichedDocumentsBatchReader}; use crate::error::{Error, InternalError, UserError}; +use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder}; use crate::index::{db_name, main_key}; use crate::update::del_add::{ into_del_add_obkv, into_del_add_obkv_conditional_operation, DelAdd, DelAddOperation, @@ -31,9 +33,7 @@ use crate::update::{AvailableIds, UpdateIndexingStep}; use crate::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors}; use crate::vector::settings::WriteBackToDocuments; use crate::vector::ArroyWrapper; -use crate::{ - is_faceted_by, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result, -}; +use crate::{FieldDistribution, FieldId, FieldIdMapMissingEntry, Index, Result}; pub struct TransformOutput { pub primary_key: String, @@ -52,7 +52,7 @@ pub struct TransformOutput { /// containing all those documents. pub struct Transform<'a, 'i> { pub index: &'i Index, - fields_ids_map: FieldsIdsMap, + fields_ids_map: FieldIdMapWithMetadata, indexer_settings: &'a IndexerConfig, pub index_documents_method: IndexDocumentsMethod, @@ -84,7 +84,7 @@ pub enum Operation { /// /// If new fields are present in the addition, they are added to the index field ids map. fn create_fields_mapping( - index_field_map: &mut FieldsIdsMap, + index_field_map: &mut FieldIdMapWithMetadata, batch_field_map: &DocumentsBatchIndex, ) -> Result> { batch_field_map @@ -141,10 +141,13 @@ impl<'a, 'i> Transform<'a, 'i> { true, ); let documents_ids = index.documents_ids(wtxn)?; + let fields_ids_map = index.fields_ids_map(wtxn)?; + let builder = MetadataBuilder::from_index(index, wtxn)?; + let fields_ids_map = FieldIdMapWithMetadata::new(fields_ids_map, builder); Ok(Transform { index, - fields_ids_map: index.fields_ids_map(wtxn)?, + fields_ids_map, indexer_settings, available_documents_ids: AvailableIds::new(&documents_ids), original_sorter, @@ -354,7 +357,7 @@ impl<'a, 'i> Transform<'a, 'i> { documents_seen: documents_count, }); - self.index.put_fields_ids_map(wtxn, &self.fields_ids_map)?; + self.index.put_fields_ids_map(wtxn, self.fields_ids_map.as_fields_ids_map())?; self.index.put_primary_key(wtxn, &primary_key)?; self.documents_count += documents_count; // Now that we have a valid sorter that contains the user id and the obkv we @@ -371,7 +374,7 @@ impl<'a, 'i> Transform<'a, 'i> { )] fn flatten_from_fields_ids_map( obkv: &KvReader, - fields_ids_map: &mut FieldsIdsMap, + fields_ids_map: &mut FieldIdMapWithMetadata, ) -> Result>> { if obkv .iter() @@ -657,7 +660,6 @@ impl<'a, 'i> Transform<'a, 'i> { fn rebind_existing_document( old_obkv: &KvReader, settings_diff: &InnerIndexSettingsDiff, - modified_faceted_fields: &HashSet, mut injected_vectors: serde_json::Map, old_vectors_fid: Option, original_obkv_buffer: Option<&mut Vec>, @@ -667,23 +669,26 @@ impl<'a, 'i> Transform<'a, 'i> { let is_primary_key = |id: FieldId| -> bool { settings_diff.primary_key_id == Some(id) }; // If only a faceted field has been added, keep only this field. - let global_facet_settings_changed = settings_diff.global_facet_settings_changed(); let facet_fids_changed = settings_diff.facet_fids_changed(); - let necessary_faceted_field = - |id: FieldId| -> bool { + + let necessary_faceted_field = |id: FieldId| -> Option { + if facet_fids_changed { let field_name = settings_diff.new.fields_ids_map.name(id).unwrap(); - if global_facet_settings_changed { - settings_diff.new.user_defined_faceted_fields.iter().any(|long| { - is_faceted_by(long, field_name) || is_faceted_by(field_name, long) - }) - } else if facet_fids_changed { - modified_faceted_fields.iter().any(|long| { - is_faceted_by(long, field_name) || is_faceted_by(field_name, long) - }) - } else { - false + // if the faceted fields changed, we need to keep all the field that are + // faceted in the old or new settings. + match ( + settings_diff.old.match_faceted_field(field_name), + settings_diff.new.match_faceted_field(field_name), + ) { + (PatternMatch::NoMatch, PatternMatch::NoMatch) => None, + (PatternMatch::NoMatch, _) => Some(DelAddOperation::Addition), + (_, PatternMatch::NoMatch) => Some(DelAddOperation::Deletion), + (_, _) => Some(DelAddOperation::DeletionAndAddition), } - }; + } else { + None + } + }; // Alway provide all fields when vectors are involved because // we need the fields for the prompt/templating. @@ -734,12 +739,22 @@ impl<'a, 'i> Transform<'a, 'i> { } } - if is_primary_key(id) || necessary_faceted_field(id) || reindex_vectors { + if is_primary_key(id) || reindex_vectors { operations.insert(id, DelAddOperation::DeletionAndAddition); obkv_writer.insert(id, val)?; - } else if let Some(operation) = settings_diff.reindex_searchable_id(id) { - operations.insert(id, operation); - obkv_writer.insert(id, val)?; + } else { + let facet_operation = necessary_faceted_field(id); + let searchable_operation = settings_diff.reindex_searchable_id(id); + let operation = facet_operation + // TODO: replace `zip.map` with `zip_with` once stable + .zip(searchable_operation) + .map(|(op1, op2)| op1.merge(op2)) + .or(facet_operation) + .or(searchable_operation); + if let Some(operation) = operation { + operations.insert(id, operation); + obkv_writer.insert(id, val)?; + } } } if !injected_vectors.is_empty() { @@ -856,7 +871,6 @@ impl<'a, 'i> Transform<'a, 'i> { }; if original_sorter.is_some() || flattened_sorter.is_some() { - let modified_faceted_fields = settings_diff.modified_faceted_fields(); let mut original_obkv_buffer = Vec::new(); let mut flattened_obkv_buffer = Vec::new(); let mut document_sorter_key_buffer = Vec::new(); @@ -897,7 +911,6 @@ impl<'a, 'i> Transform<'a, 'i> { Self::rebind_existing_document( old_obkv, &settings_diff, - &modified_faceted_fields, injected_vectors, old_vectors_fid, Some(&mut original_obkv_buffer).filter(|_| original_sorter.is_some()), diff --git a/crates/milli/src/update/index_documents/typed_chunk.rs b/crates/milli/src/update/index_documents/typed_chunk.rs index 0809d9601..10dbdc834 100644 --- a/crates/milli/src/update/index_documents/typed_chunk.rs +++ b/crates/milli/src/update/index_documents/typed_chunk.rs @@ -365,7 +365,7 @@ pub(crate) fn write_typed_chunk_into_index( let merger = builder.build(); let indexer = FacetsUpdate::new(index, FacetType::Number, merger, None, data_size); - indexer.execute(wtxn)?; + indexer.execute(wtxn, &settings_diff.new)?; is_merged_database = true; } TypedChunk::FieldIdFacetStringDocids(_) => { @@ -401,7 +401,7 @@ pub(crate) fn write_typed_chunk_into_index( Some(normalized_facet_id_string_merger), data_size, ); - indexer.execute(wtxn)?; + indexer.execute(wtxn, &settings_diff.new)?; is_merged_database = true; } TypedChunk::FieldIdFacetExistsDocids(_) => { diff --git a/crates/milli/src/update/settings.rs b/crates/milli/src/update/settings.rs index 0d0648fc8..d38fdf138 100644 --- a/crates/milli/src/update/settings.rs +++ b/crates/milli/src/update/settings.rs @@ -6,17 +6,20 @@ use std::sync::Arc; use charabia::{Normalize, Tokenizer, TokenizerBuilder}; use deserr::{DeserializeError, Deserr}; -use itertools::{EitherOrBoth, Itertools}; +use itertools::{merge_join_by, EitherOrBoth, Itertools}; use roaring::RoaringBitmap; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use time::OffsetDateTime; -use super::del_add::DelAddOperation; +use super::del_add::{DelAdd, DelAddOperation}; use super::index_documents::{IndexDocumentsConfig, Transform}; use super::IndexerConfig; -use crate::constants::{RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME}; +use crate::attribute_patterns::PatternMatch; +use crate::constants::RESERVED_GEO_FIELD_NAME; use crate::criterion::Criterion; use crate::error::UserError; +use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder}; +use crate::filterable_attributes_rules::match_faceted_field; use crate::index::{ IndexEmbeddingConfig, PrefixSearch, DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS, @@ -31,7 +34,7 @@ use crate::vector::settings::{ WriteBackToDocuments, }; use crate::vector::{Embedder, EmbeddingConfig, EmbeddingConfigs}; -use crate::{FieldId, FieldsIdsMap, Index, LocalizedAttributesRule, LocalizedFieldIds, Result}; +use crate::{FieldId, FilterableAttributesRule, Index, LocalizedAttributesRule, Result}; #[derive(Debug, Clone, PartialEq, Eq, Copy)] pub enum Setting { @@ -155,7 +158,7 @@ pub struct Settings<'a, 't, 'i> { searchable_fields: Setting>, displayed_fields: Setting>, - filterable_fields: Setting>, + filterable_fields: Setting>, sortable_fields: Setting>, criteria: Setting>, stop_words: Setting>, @@ -241,8 +244,8 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { self.filterable_fields = Setting::Reset; } - pub fn set_filterable_fields(&mut self, names: HashSet) { - self.filterable_fields = Setting::Set(names); + pub fn set_filterable_fields(&mut self, rules: Vec) { + self.filterable_fields = Setting::Set(rules); } pub fn set_sortable_fields(&mut self, names: HashSet) { @@ -516,7 +519,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { } /// Updates the index's searchable attributes. - fn update_searchable(&mut self) -> Result { + fn update_user_defined_searchable_attributes(&mut self) -> Result { match self.searchable_fields { Setting::Set(ref fields) => { // Check to see if the searchable fields changed before doing anything else @@ -529,26 +532,10 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { return Ok(false); } - // Since we're updating the settings we can only add new fields at the end of the field id map - let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?; // fields are deduplicated, only the first occurrence is taken into account let names = fields.iter().unique().map(String::as_str).collect::>(); - // Add all the searchable attributes to the field map, and then add the - // remaining fields from the old field map to the new one - for name in names.iter() { - // The fields ids map won't change the field id of already present elements thus only the - // new fields will be inserted. - fields_ids_map.insert(name).ok_or(UserError::AttributeLimitReached)?; - } - - self.index.put_all_searchable_fields_from_fields_ids_map( - self.wtxn, - &names, - &fields_ids_map.nested_ids(RESERVED_VECTORS_FIELD_NAME), - &fields_ids_map, - )?; - self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?; + self.index.put_user_defined_searchable_fields(self.wtxn, &names)?; Ok(true) } Setting::Reset => Ok(self.index.delete_all_searchable_fields(self.wtxn)?), @@ -760,14 +747,10 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { fn update_filterable(&mut self) -> Result<()> { match self.filterable_fields { Setting::Set(ref fields) => { - let mut new_facets = HashSet::new(); - for name in fields { - new_facets.insert(name.clone()); - } - self.index.put_filterable_fields(self.wtxn, &new_facets)?; + self.index.put_filterable_attributes_rules(self.wtxn, fields)?; } Setting::Reset => { - self.index.delete_filterable_fields(self.wtxn)?; + self.index.delete_filterable_attributes_rules(self.wtxn)?; } Setting::NotSet => (), } @@ -1257,7 +1240,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { self.update_separator_tokens()?; self.update_dictionary()?; self.update_synonyms()?; - self.update_searchable()?; + self.update_user_defined_searchable_attributes()?; self.update_exact_attributes()?; self.update_proximity_precision()?; self.update_prefix_search()?; @@ -1267,7 +1250,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { let embedding_config_updates = self.update_embedding_configs()?; let mut new_inner_settings = InnerIndexSettings::from_index(self.index, self.wtxn, None)?; - new_inner_settings.recompute_facets(self.wtxn, self.index)?; + new_inner_settings.recompute_searchables(self.wtxn, self.index)?; let primary_key_id = self .index @@ -1319,8 +1302,8 @@ impl InnerIndexSettingsDiff { settings_update_only: bool, ) -> Self { let only_additional_fields = match ( - &old_settings.user_defined_searchable_fields, - &new_settings.user_defined_searchable_fields, + &old_settings.user_defined_searchable_attributes, + &new_settings.user_defined_searchable_attributes, ) { (None, None) | (Some(_), None) | (None, Some(_)) => None, // None means * (Some(old), Some(new)) => { @@ -1342,14 +1325,14 @@ impl InnerIndexSettingsDiff { || old_settings.dictionary != new_settings.dictionary || old_settings.proximity_precision != new_settings.proximity_precision || old_settings.prefix_search != new_settings.prefix_search - || old_settings.localized_searchable_fields_ids - != new_settings.localized_searchable_fields_ids + || old_settings.localized_attributes_rules + != new_settings.localized_attributes_rules }; let cache_exact_attributes = old_settings.exact_attributes != new_settings.exact_attributes; - let cache_user_defined_searchables = old_settings.user_defined_searchable_fields - != new_settings.user_defined_searchable_fields; + let cache_user_defined_searchables = old_settings.user_defined_searchable_attributes + != new_settings.user_defined_searchable_attributes; // if the user-defined searchables changed, then we need to reindex prompts. if cache_user_defined_searchables { @@ -1432,30 +1415,70 @@ impl InnerIndexSettingsDiff { } } + /// List the faceted fields from the inner fid map. + /// This is used to list the faceted fields when we are reindexing, + /// but it can't be used in document addition because the field id map must be exhaustive. + pub fn list_faceted_fields_from_fid_map(&self, del_add: DelAdd) -> BTreeSet { + let settings = match del_add { + DelAdd::Deletion => &self.old, + DelAdd::Addition => &self.new, + }; + + settings + .fields_ids_map + .iter_id_metadata() + .filter(|(_, metadata)| metadata.is_faceted(&settings.filterable_attributes_rules)) + .map(|(id, _)| id) + .collect() + } + pub fn facet_fids_changed(&self) -> bool { - let existing_fields = &self.new.existing_fields; - if existing_fields.iter().any(|field| field.contains('.')) { - return true; + for eob in merge_join_by( + self.old.fields_ids_map.iter().filter(|(_, _, metadata)| { + metadata.is_faceted(&self.old.filterable_attributes_rules) + }), + self.new.fields_ids_map.iter().filter(|(_, _, metadata)| { + metadata.is_faceted(&self.new.filterable_attributes_rules) + }), + |(old_fid, _, _), (new_fid, _, _)| old_fid.cmp(new_fid), + ) { + match eob { + // If there is a difference, we need to reindex facet databases. + EitherOrBoth::Left(_) | EitherOrBoth::Right(_) => return true, + // If the field is faceted in both old and new settings, we check the facet-searchable and facet level database. + EitherOrBoth::Both((_, _, old_metadata), (_, _, new_metadata)) => { + // Check if the field is facet-searchable in the old and new settings. + // If there is a difference, we need to reindex facet-search database. + let old_filterable_features = old_metadata + .filterable_attributes_features(&self.old.filterable_attributes_rules); + let new_filterable_features = new_metadata + .filterable_attributes_features(&self.new.filterable_attributes_rules); + let is_old_facet_searchable = + old_filterable_features.is_facet_searchable() && self.old.facet_search; + let is_new_facet_searchable = + new_filterable_features.is_facet_searchable() && self.new.facet_search; + if is_old_facet_searchable != is_new_facet_searchable { + return true; + } + + // Check if the field needs a facet level database in the old and new settings. + // If there is a difference, we need to reindex facet level databases. + let old_facet_level_database = old_metadata + .require_facet_level_database(&self.old.filterable_attributes_rules); + let new_facet_level_database = new_metadata + .require_facet_level_database(&self.new.filterable_attributes_rules); + if old_facet_level_database != new_facet_level_database { + return true; + } + } + } } - let old_faceted_fields = &self.old.user_defined_faceted_fields; - if old_faceted_fields.iter().any(|field| field.contains('.')) { - return true; - } - - // If there is new faceted fields we indicate that we must reindex as we must - // index new fields as facets. It means that the distinct attribute, - // an Asc/Desc criterion or a filtered attribute as be added or removed. - let new_faceted_fields = &self.new.user_defined_faceted_fields; - if new_faceted_fields.iter().any(|field| field.contains('.')) { - return true; - } - - (existing_fields - old_faceted_fields) != (existing_fields - new_faceted_fields) + false } pub fn global_facet_settings_changed(&self) -> bool { - self.old.localized_faceted_fields_ids != self.new.localized_faceted_fields_ids + self.old.localized_attributes_rules != self.new.localized_attributes_rules || self.old.facet_search != self.new.facet_search } @@ -1475,10 +1498,6 @@ impl InnerIndexSettingsDiff { self.old.geo_fields_ids != self.new.geo_fields_ids || (!self.settings_update_only && self.new.geo_fields_ids.is_some()) } - - pub fn modified_faceted_fields(&self) -> HashSet { - &self.old.user_defined_faceted_fields ^ &self.new.user_defined_faceted_fields - } } #[derive(Clone)] @@ -1486,20 +1505,17 @@ pub(crate) struct InnerIndexSettings { pub stop_words: Option>>, pub allowed_separators: Option>, pub dictionary: Option>, - pub fields_ids_map: FieldsIdsMap, - pub user_defined_faceted_fields: HashSet, - pub user_defined_searchable_fields: Option>, - pub faceted_fields_ids: HashSet, - pub searchable_fields_ids: Vec, + pub fields_ids_map: FieldIdMapWithMetadata, + pub localized_attributes_rules: Vec, + pub filterable_attributes_rules: Vec, + pub asc_desc_fields: HashSet, + pub distinct_field: Option, + pub user_defined_searchable_attributes: Option>, + pub sortable_fields: HashSet, pub exact_attributes: HashSet, pub proximity_precision: ProximityPrecision, pub embedding_configs: EmbeddingConfigs, - pub existing_fields: HashSet, pub geo_fields_ids: Option<(FieldId, FieldId)>, - pub non_searchable_fields_ids: Vec, - pub non_faceted_fields_ids: Vec, - pub localized_searchable_fields_ids: LocalizedFieldIds, - pub localized_faceted_fields_ids: LocalizedFieldIds, pub prefix_search: PrefixSearch, pub facet_search: bool, } @@ -1515,12 +1531,6 @@ impl InnerIndexSettings { let allowed_separators = index.allowed_separators(rtxn)?; let dictionary = index.dictionary(rtxn)?; let mut fields_ids_map = index.fields_ids_map(rtxn)?; - let user_defined_searchable_fields = index.user_defined_searchable_fields(rtxn)?; - let user_defined_searchable_fields = - user_defined_searchable_fields.map(|sf| sf.into_iter().map(String::from).collect()); - let user_defined_faceted_fields = index.user_defined_faceted_fields(rtxn)?; - let mut searchable_fields_ids = index.searchable_fields_ids(rtxn)?; - let mut faceted_fields_ids = index.faceted_fields_ids(rtxn)?; let exact_attributes = index.exact_attributes_ids(rtxn)?; let proximity_precision = index.proximity_precision(rtxn)?.unwrap_or_default(); let embedding_configs = match embedding_configs { @@ -1529,87 +1539,57 @@ impl InnerIndexSettings { }; let prefix_search = index.prefix_search(rtxn)?.unwrap_or_default(); let facet_search = index.facet_search(rtxn)?; - let existing_fields: HashSet<_> = index - .field_distribution(rtxn)? - .into_iter() - .filter_map(|(field, count)| (count != 0).then_some(field)) - .collect(); - // index.fields_ids_map($a)? ==>> fields_ids_map let geo_fields_ids = match fields_ids_map.id(RESERVED_GEO_FIELD_NAME) { - Some(gfid) => { - let is_sortable = index.sortable_fields_ids(rtxn)?.contains(&gfid); - let is_filterable = index.filterable_fields_ids(rtxn)?.contains(&gfid); + Some(_) if index.is_geo_enabled(rtxn)? => { // if `_geo` is faceted then we get the `lat` and `lng` - if is_sortable || is_filterable { - let field_ids = fields_ids_map - .insert("_geo.lat") - .zip(fields_ids_map.insert("_geo.lng")) - .ok_or(UserError::AttributeLimitReached)?; - Some(field_ids) - } else { - None - } + let field_ids = fields_ids_map + .insert("_geo.lat") + .zip(fields_ids_map.insert("_geo.lng")) + .ok_or(UserError::AttributeLimitReached)?; + Some(field_ids) } - None => None, + _ => None, }; - let localized_attributes_rules = index.localized_attributes_rules(rtxn)?; - let localized_searchable_fields_ids = LocalizedFieldIds::new( - &localized_attributes_rules, - &fields_ids_map, - searchable_fields_ids.iter().cloned(), - ); - let localized_faceted_fields_ids = LocalizedFieldIds::new( - &localized_attributes_rules, - &fields_ids_map, - faceted_fields_ids.iter().cloned(), - ); - - let vectors_fids = fields_ids_map.nested_ids(RESERVED_VECTORS_FIELD_NAME); - searchable_fields_ids.retain(|id| !vectors_fids.contains(id)); - faceted_fields_ids.retain(|id| !vectors_fids.contains(id)); + let localized_attributes_rules = + index.localized_attributes_rules(rtxn)?.unwrap_or_default(); + let filterable_attributes_rules = index.filterable_attributes_rules(rtxn)?; + let sortable_fields = index.sortable_fields(rtxn)?; + let asc_desc_fields = index.asc_desc_fields(rtxn)?; + let distinct_field = index.distinct_field(rtxn)?.map(|f| f.to_string()); + let user_defined_searchable_attributes = index + .user_defined_searchable_fields(rtxn)? + .map(|fields| fields.into_iter().map(|f| f.to_string()).collect()); + let builder = MetadataBuilder::from_index(index, rtxn)?; + let fields_ids_map = FieldIdMapWithMetadata::new(fields_ids_map, builder); Ok(Self { stop_words, allowed_separators, dictionary, fields_ids_map, - user_defined_faceted_fields, - user_defined_searchable_fields, - faceted_fields_ids, - searchable_fields_ids, + localized_attributes_rules, + filterable_attributes_rules, + asc_desc_fields, + distinct_field, + user_defined_searchable_attributes, + sortable_fields, exact_attributes, proximity_precision, embedding_configs, - existing_fields, geo_fields_ids, - non_searchable_fields_ids: vectors_fids.clone(), - non_faceted_fields_ids: vectors_fids.clone(), - localized_searchable_fields_ids, - localized_faceted_fields_ids, prefix_search, facet_search, }) } - // find and insert the new field ids - pub fn recompute_facets(&mut self, wtxn: &mut heed::RwTxn<'_>, index: &Index) -> Result<()> { - let new_facets = self - .fields_ids_map - .iter() - .filter(|(fid, _field)| !self.non_faceted_fields_ids.contains(fid)) - .filter(|(_fid, field)| crate::is_faceted(field, &self.user_defined_faceted_fields)) - .map(|(_fid, field)| field.to_string()) - .collect(); - index.put_faceted_fields(wtxn, &new_facets)?; - - self.faceted_fields_ids = index.faceted_fields_ids(wtxn)?; - let localized_attributes_rules = index.localized_attributes_rules(wtxn)?; - self.localized_faceted_fields_ids = LocalizedFieldIds::new( - &localized_attributes_rules, - &self.fields_ids_map, - self.faceted_fields_ids.iter().cloned(), - ); - Ok(()) + pub fn match_faceted_field(&self, field: &str) -> PatternMatch { + match_faceted_field( + field, + &self.filterable_attributes_rules, + &self.sortable_fields, + &self.asc_desc_fields, + &self.distinct_field, + ) } // find and insert the new field ids @@ -1619,7 +1599,7 @@ impl InnerIndexSettings { index: &Index, ) -> Result<()> { let searchable_fields = self - .user_defined_searchable_fields + .user_defined_searchable_attributes .as_ref() .map(|searchable| searchable.iter().map(|s| s.as_str()).collect::>()); @@ -1628,17 +1608,9 @@ impl InnerIndexSettings { index.put_all_searchable_fields_from_fields_ids_map( wtxn, &searchable_fields, - &self.non_searchable_fields_ids, &self.fields_ids_map, )?; } - self.searchable_fields_ids = index.searchable_fields_ids(wtxn)?; - let localized_attributes_rules = index.localized_attributes_rules(wtxn)?; - self.localized_searchable_fields_ids = LocalizedFieldIds::new( - &localized_attributes_rules, - &self.fields_ids_map, - self.searchable_fields_ids.iter().cloned(), - ); Ok(()) } From 95bccaf5f565678a5b7ae3fca302d19268107d90 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Mon, 3 Mar 2025 10:30:42 +0100 Subject: [PATCH 06/35] Refactor Document indexing process (Facets) **Changes:** The Documents changes now take a selector closure instead of a list of field to match the field to extract. The seek_leaf_values_in_object function now uses a selector closure of a list of field to match the field to extract The facet database extraction is now relying on the FilterableAttributesRule to match the field to extract. The facet-search database extraction is now relying on the FieldIdMapWithMetadata to select the field to index. The facet level database extraction is now relying on the FieldIdMapWithMetadata to select the field to index. **Important:** Because the filterable attributes are patterns now, the fieldIdMap will only register the fields that exists in at least one document. if a field doesn't exist in any document, it will not be registered even if it has been specified in the filterable fields. **Impact:** - Document Addition/modification facet indexing - Document deletion facet indexing --- .../milli/src/update/new/document_change.rs | 8 +- .../new/extract/faceted/extract_facets.rs | 123 ++++++++++++++---- .../new/extract/faceted/facet_document.rs | 94 ++++++++----- .../milli/src/update/new/extract/geo/mod.rs | 5 +- crates/milli/src/update/new/extract/mod.rs | 120 +++-------------- .../src/update/new/facet_search_builder.rs | 36 ++++- .../src/update/new/indexer/post_processing.rs | 25 +++- crates/milli/src/update/new/indexer/write.rs | 1 - 8 files changed, 233 insertions(+), 179 deletions(-) diff --git a/crates/milli/src/update/new/document_change.rs b/crates/milli/src/update/new/document_change.rs index 2de9f384b..c790b4d32 100644 --- a/crates/milli/src/update/new/document_change.rs +++ b/crates/milli/src/update/new/document_change.rs @@ -4,10 +4,10 @@ use heed::RoTxn; use super::document::{ Document as _, DocumentFromDb, DocumentFromVersions, MergedDocument, Versions, }; -use super::extract::perm_json_p; use super::vector_document::{ MergedVectorDocument, VectorDocumentFromDb, VectorDocumentFromVersions, }; +use crate::attribute_patterns::PatternMatch; use crate::documents::FieldIdMapper; use crate::vector::EmbeddingConfigs; use crate::{DocumentId, Index, Result}; @@ -173,7 +173,7 @@ impl<'doc> Update<'doc> { /// Otherwise `false`. pub fn has_changed_for_fields<'t, Mapper: FieldIdMapper>( &self, - fields: Option<&[&str]>, + selector: &mut impl FnMut(&str) -> PatternMatch, rtxn: &'t RoTxn, index: &'t Index, mapper: &'t Mapper, @@ -185,7 +185,7 @@ impl<'doc> Update<'doc> { for entry in self.only_changed_fields().iter_top_level_fields() { let (key, updated_value) = entry?; - if perm_json_p::select_field(key, fields, &[]) == perm_json_p::Selection::Skip { + if selector(key) == PatternMatch::NoMatch { continue; } @@ -229,7 +229,7 @@ impl<'doc> Update<'doc> { for entry in current.iter_top_level_fields() { let (key, _) = entry?; - if perm_json_p::select_field(key, fields, &[]) == perm_json_p::Selection::Skip { + if selector(key) == PatternMatch::NoMatch { continue; } current_selected_field_count += 1; diff --git a/crates/milli/src/update/new/extract/faceted/extract_facets.rs b/crates/milli/src/update/new/extract/faceted/extract_facets.rs index 41b6a12a2..3201e23f9 100644 --- a/crates/milli/src/update/new/extract/faceted/extract_facets.rs +++ b/crates/milli/src/update/new/extract/faceted/extract_facets.rs @@ -5,12 +5,13 @@ use std::ops::DerefMut as _; use bumpalo::collections::Vec as BVec; use bumpalo::Bump; use hashbrown::HashMap; -use heed::RoTxn; use serde_json::Value; use super::super::cache::BalancedCaches; use super::facet_document::extract_document_facets; use super::FacetKind; +use crate::fields_ids_map::metadata::Metadata; +use crate::filterable_attributes_rules::match_faceted_field; use crate::heed_codec::facet::OrderedF64Codec; use crate::update::del_add::DelAdd; use crate::update::new::channel::FieldIdDocidFacetSender; @@ -23,13 +24,17 @@ use crate::update::new::steps::IndexingStep; use crate::update::new::thread_local::{FullySend, ThreadLocal}; use crate::update::new::DocumentChange; use crate::update::GrenadParameters; -use crate::{DocumentId, FieldId, Index, Result, MAX_FACET_VALUE_LENGTH}; +use crate::{DocumentId, FieldId, FilterableAttributesRule, Result, MAX_FACET_VALUE_LENGTH}; pub struct FacetedExtractorData<'a, 'b> { - attributes_to_extract: &'a [&'a str], sender: &'a FieldIdDocidFacetSender<'a, 'b>, grenad_parameters: &'a GrenadParameters, buckets: usize, + filterable_attributes: Vec, + sortable_fields: HashSet, + asc_desc_fields: HashSet, + distinct_field: Option, + is_geo_enabled: bool, } impl<'a, 'b, 'extractor> Extractor<'extractor> for FacetedExtractorData<'a, 'b> { @@ -52,7 +57,11 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for FacetedExtractorData<'a, 'b> let change = change?; FacetedDocidsExtractor::extract_document_change( context, - self.attributes_to_extract, + &self.filterable_attributes, + &self.sortable_fields, + &self.asc_desc_fields, + &self.distinct_field, + self.is_geo_enabled, change, self.sender, )? @@ -64,13 +73,18 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for FacetedExtractorData<'a, 'b> pub struct FacetedDocidsExtractor; impl FacetedDocidsExtractor { + #[allow(clippy::too_many_arguments)] fn extract_document_change( context: &DocumentChangeContext>, - attributes_to_extract: &[&str], + filterable_attributes: &[FilterableAttributesRule], + sortable_fields: &HashSet, + asc_desc_fields: &HashSet, + distinct_field: &Option, + is_geo_enabled: bool, document_change: DocumentChange, sender: &FieldIdDocidFacetSender, ) -> Result<()> { - let index = &context.index; + let index = context.index; let rtxn = &context.rtxn; let mut new_fields_ids_map = context.new_fields_ids_map.borrow_mut_or_yield(); let mut cached_sorter = context.data.borrow_mut_or_yield(); @@ -78,11 +92,15 @@ impl FacetedDocidsExtractor { let docid = document_change.docid(); let res = match document_change { DocumentChange::Deletion(inner) => extract_document_facets( - attributes_to_extract, inner.current(rtxn, index, context.db_fields_ids_map)?, inner.external_document_id(), new_fields_ids_map.deref_mut(), - &mut |fid, depth, value| { + filterable_attributes, + sortable_fields, + asc_desc_fields, + distinct_field, + is_geo_enabled, + &mut |fid, meta, depth, value| { Self::facet_fn_with_options( &context.doc_alloc, cached_sorter.deref_mut(), @@ -91,6 +109,8 @@ impl FacetedDocidsExtractor { DelAddFacetValue::insert_del, docid, fid, + meta, + filterable_attributes, depth, value, ) @@ -98,7 +118,15 @@ impl FacetedDocidsExtractor { ), DocumentChange::Update(inner) => { if !inner.has_changed_for_fields( - Some(attributes_to_extract), + &mut |field_name| { + match_faceted_field( + field_name, + filterable_attributes, + sortable_fields, + asc_desc_fields, + distinct_field, + ) + }, rtxn, index, context.db_fields_ids_map, @@ -107,11 +135,15 @@ impl FacetedDocidsExtractor { } extract_document_facets( - attributes_to_extract, inner.current(rtxn, index, context.db_fields_ids_map)?, inner.external_document_id(), new_fields_ids_map.deref_mut(), - &mut |fid, depth, value| { + filterable_attributes, + sortable_fields, + asc_desc_fields, + distinct_field, + is_geo_enabled, + &mut |fid, meta, depth, value| { Self::facet_fn_with_options( &context.doc_alloc, cached_sorter.deref_mut(), @@ -120,6 +152,8 @@ impl FacetedDocidsExtractor { DelAddFacetValue::insert_del, docid, fid, + meta, + filterable_attributes, depth, value, ) @@ -127,11 +161,15 @@ impl FacetedDocidsExtractor { )?; extract_document_facets( - attributes_to_extract, inner.merged(rtxn, index, context.db_fields_ids_map)?, inner.external_document_id(), new_fields_ids_map.deref_mut(), - &mut |fid, depth, value| { + filterable_attributes, + sortable_fields, + asc_desc_fields, + distinct_field, + is_geo_enabled, + &mut |fid, meta, depth, value| { Self::facet_fn_with_options( &context.doc_alloc, cached_sorter.deref_mut(), @@ -140,6 +178,8 @@ impl FacetedDocidsExtractor { DelAddFacetValue::insert_add, docid, fid, + meta, + filterable_attributes, depth, value, ) @@ -147,11 +187,15 @@ impl FacetedDocidsExtractor { ) } DocumentChange::Insertion(inner) => extract_document_facets( - attributes_to_extract, inner.inserted(), inner.external_document_id(), new_fields_ids_map.deref_mut(), - &mut |fid, depth, value| { + filterable_attributes, + sortable_fields, + asc_desc_fields, + distinct_field, + is_geo_enabled, + &mut |fid, meta, depth, value| { Self::facet_fn_with_options( &context.doc_alloc, cached_sorter.deref_mut(), @@ -160,6 +204,8 @@ impl FacetedDocidsExtractor { DelAddFacetValue::insert_add, docid, fid, + meta, + filterable_attributes, depth, value, ) @@ -180,9 +226,18 @@ impl FacetedDocidsExtractor { facet_fn: impl Fn(&mut DelAddFacetValue<'doc>, FieldId, BVec<'doc, u8>, FacetKind), docid: DocumentId, fid: FieldId, + meta: Metadata, + filterable_attributes: &[FilterableAttributesRule], depth: perm_json_p::Depth, value: &Value, ) -> Result<()> { + // if the field is not faceted, do nothing + if !meta.is_faceted(filterable_attributes) { + return Ok(()); + } + + let features = meta.filterable_attributes_features(filterable_attributes); + let mut buffer = BVec::new_in(doc_alloc); // Exists // key: fid @@ -246,7 +301,9 @@ impl FacetedDocidsExtractor { } // Null // key: fid - Value::Null if depth == perm_json_p::Depth::OnBaseKey => { + Value::Null + if depth == perm_json_p::Depth::OnBaseKey && features.is_filterable_null() => + { buffer.clear(); buffer.push(FacetKind::Null as u8); buffer.extend_from_slice(&fid.to_be_bytes()); @@ -254,19 +311,29 @@ impl FacetedDocidsExtractor { } // Empty // key: fid - Value::Array(a) if a.is_empty() && depth == perm_json_p::Depth::OnBaseKey => { + Value::Array(a) + if a.is_empty() + && depth == perm_json_p::Depth::OnBaseKey + && features.is_filterable_empty() => + { buffer.clear(); buffer.push(FacetKind::Empty as u8); buffer.extend_from_slice(&fid.to_be_bytes()); cache_fn(cached_sorter, &buffer, docid) } - Value::String(_) if depth == perm_json_p::Depth::OnBaseKey => { + Value::String(_) + if depth == perm_json_p::Depth::OnBaseKey && features.is_filterable_empty() => + { buffer.clear(); buffer.push(FacetKind::Empty as u8); buffer.extend_from_slice(&fid.to_be_bytes()); cache_fn(cached_sorter, &buffer, docid) } - Value::Object(o) if o.is_empty() && depth == perm_json_p::Depth::OnBaseKey => { + Value::Object(o) + if o.is_empty() + && depth == perm_json_p::Depth::OnBaseKey + && features.is_filterable_empty() => + { buffer.clear(); buffer.push(FacetKind::Empty as u8); buffer.extend_from_slice(&fid.to_be_bytes()); @@ -276,10 +343,6 @@ impl FacetedDocidsExtractor { _ => Ok(()), } } - - fn attributes_to_extract<'a>(rtxn: &'a RoTxn, index: &'a Index) -> Result> { - index.user_defined_faceted_fields(rtxn) - } } struct DelAddFacetValue<'doc> { @@ -399,9 +462,11 @@ impl FacetedDocidsExtractor { { let index = indexing_context.index; let rtxn = index.read_txn()?; - let attributes_to_extract = Self::attributes_to_extract(&rtxn, index)?; - let attributes_to_extract: Vec<_> = - attributes_to_extract.iter().map(|s| s.as_ref()).collect(); + let filterable_attributes = index.filterable_attributes_rules(&rtxn)?; + let sortable_fields = index.sortable_fields(&rtxn)?; + let asc_desc_fields = index.asc_desc_fields(&rtxn)?; + let distinct_field = index.distinct_field(&rtxn)?.map(|s| s.to_string()); + let is_geo_enabled = index.is_geo_enabled(&rtxn)?; let datastore = ThreadLocal::new(); { @@ -410,10 +475,14 @@ impl FacetedDocidsExtractor { let _entered = span.enter(); let extractor = FacetedExtractorData { - attributes_to_extract: &attributes_to_extract, grenad_parameters: indexing_context.grenad_parameters, buckets: rayon::current_num_threads(), sender, + filterable_attributes, + sortable_fields, + asc_desc_fields, + distinct_field, + is_geo_enabled, }; extract( document_changes, diff --git a/crates/milli/src/update/new/extract/faceted/facet_document.rs b/crates/milli/src/update/new/extract/faceted/facet_document.rs index 8d582d103..e74131402 100644 --- a/crates/milli/src/update/new/extract/faceted/facet_document.rs +++ b/crates/milli/src/update/new/extract/faceted/facet_document.rs @@ -1,46 +1,80 @@ +use std::collections::HashSet; + use serde_json::Value; -use crate::constants::RESERVED_GEO_FIELD_NAME; +use crate::attribute_patterns::PatternMatch; +use crate::fields_ids_map::metadata::Metadata; use crate::update::new::document::Document; use crate::update::new::extract::geo::extract_geo_coordinates; use crate::update::new::extract::perm_json_p; -use crate::{FieldId, GlobalFieldsIdsMap, InternalError, Result, UserError}; +use crate::{ + FieldId, FilterableAttributesRule, GlobalFieldsIdsMap, InternalError, Result, UserError, +}; +use crate::filterable_attributes_rules::match_faceted_field; + +#[allow(clippy::too_many_arguments)] pub fn extract_document_facets<'doc>( - attributes_to_extract: &[&str], document: impl Document<'doc>, external_document_id: &str, field_id_map: &mut GlobalFieldsIdsMap, - facet_fn: &mut impl FnMut(FieldId, perm_json_p::Depth, &Value) -> Result<()>, + filterable_attributes: &[FilterableAttributesRule], + sortable_fields: &HashSet, + asc_desc_fields: &HashSet, + distinct_field: &Option, + is_geo_enabled: bool, + facet_fn: &mut impl FnMut(FieldId, Metadata, perm_json_p::Depth, &Value) -> Result<()>, ) -> Result<()> { + // return the match result for the given field name. + let match_field = |field_name: &str| -> PatternMatch { + match_faceted_field( + field_name, + filterable_attributes, + sortable_fields, + asc_desc_fields, + distinct_field, + ) + }; + + // extract the field if it is faceted (facet searchable, filterable, sortable) + let mut extract_field = |name: &str, depth: perm_json_p::Depth, value: &Value| -> Result<()> { + match field_id_map.id_with_metadata_or_insert(name) { + Some((field_id, meta)) => { + facet_fn(field_id, meta, depth, value)?; + + Ok(()) + } + None => Err(UserError::AttributeLimitReached.into()), + } + }; + for res in document.iter_top_level_fields() { let (field_name, value) = res?; + let selection = match_field(field_name); - let mut tokenize_field = - |name: &str, depth: perm_json_p::Depth, value: &Value| match field_id_map - .id_or_insert(name) - { - Some(field_id) => facet_fn(field_id, depth, value), - None => Err(UserError::AttributeLimitReached.into()), - }; + // extract the field if it matches a pattern and if it is faceted (facet searchable, filterable, sortable) + let mut match_and_extract = |name: &str, depth: perm_json_p::Depth, value: &Value| { + let selection = match_field(name); + if selection == PatternMatch::Match { + extract_field(name, depth, value)?; + } - // if the current field is searchable or contains a searchable attribute - let selection = perm_json_p::select_field(field_name, Some(attributes_to_extract), &[]); - if selection != perm_json_p::Selection::Skip { + Ok(selection) + }; + + if selection != PatternMatch::NoMatch { // parse json. match serde_json::value::to_value(value).map_err(InternalError::SerdeJson)? { Value::Object(object) => { perm_json_p::seek_leaf_values_in_object( &object, - Some(attributes_to_extract), - &[], // skip no attributes field_name, perm_json_p::Depth::OnBaseKey, - &mut tokenize_field, + &mut match_and_extract, )?; - if selection == perm_json_p::Selection::Select { - tokenize_field( + if selection == PatternMatch::Match { + extract_field( field_name, perm_json_p::Depth::OnBaseKey, &Value::Object(object), @@ -50,36 +84,34 @@ pub fn extract_document_facets<'doc>( Value::Array(array) => { perm_json_p::seek_leaf_values_in_array( &array, - Some(attributes_to_extract), - &[], // skip no attributes field_name, perm_json_p::Depth::OnBaseKey, - &mut tokenize_field, + &mut match_and_extract, )?; - if selection == perm_json_p::Selection::Select { - tokenize_field( + if selection == PatternMatch::Match { + extract_field( field_name, perm_json_p::Depth::OnBaseKey, &Value::Array(array), )?; } } - value => tokenize_field(field_name, perm_json_p::Depth::OnBaseKey, &value)?, + value => extract_field(field_name, perm_json_p::Depth::OnBaseKey, &value)?, } } } - if attributes_to_extract.contains(&RESERVED_GEO_FIELD_NAME) { + if is_geo_enabled { if let Some(geo_value) = document.geo_field()? { if let Some([lat, lng]) = extract_geo_coordinates(external_document_id, geo_value)? { - let (lat_fid, lng_fid) = field_id_map - .id_or_insert("_geo.lat") - .zip(field_id_map.id_or_insert("_geo.lng")) + let ((lat_fid, lat_meta), (lng_fid, lng_meta)) = field_id_map + .id_with_metadata_or_insert("_geo.lat") + .zip(field_id_map.id_with_metadata_or_insert("_geo.lng")) .ok_or(UserError::AttributeLimitReached)?; - facet_fn(lat_fid, perm_json_p::Depth::OnBaseKey, &lat.into())?; - facet_fn(lng_fid, perm_json_p::Depth::OnBaseKey, &lng.into())?; + facet_fn(lat_fid, lat_meta, perm_json_p::Depth::OnBaseKey, &lat.into())?; + facet_fn(lng_fid, lng_meta, perm_json_p::Depth::OnBaseKey, &lng.into())?; } } } diff --git a/crates/milli/src/update/new/extract/geo/mod.rs b/crates/milli/src/update/new/extract/geo/mod.rs index f2af0b229..d51fd9d36 100644 --- a/crates/milli/src/update/new/extract/geo/mod.rs +++ b/crates/milli/src/update/new/extract/geo/mod.rs @@ -9,7 +9,6 @@ use heed::RoTxn; use serde_json::value::RawValue; use serde_json::Value; -use crate::constants::RESERVED_GEO_FIELD_NAME; use crate::error::GeoError; use crate::update::new::document::Document; use crate::update::new::indexer::document_changes::{DocumentChangeContext, Extractor}; @@ -29,9 +28,7 @@ impl GeoExtractor { index: &Index, grenad_parameters: GrenadParameters, ) -> Result> { - let is_sortable = index.sortable_fields(rtxn)?.contains(RESERVED_GEO_FIELD_NAME); - let is_filterable = index.filterable_fields(rtxn)?.contains(RESERVED_GEO_FIELD_NAME); - if is_sortable || is_filterable { + if index.is_geo_enabled(rtxn)? { Ok(Some(GeoExtractor { grenad_parameters })) } else { Ok(None) diff --git a/crates/milli/src/update/new/extract/mod.rs b/crates/milli/src/update/new/extract/mod.rs index aa0a3d333..a8264ba4a 100644 --- a/crates/milli/src/update/new/extract/mod.rs +++ b/crates/milli/src/update/new/extract/mod.rs @@ -5,7 +5,6 @@ mod geo; mod searchable; mod vectors; -use bumpalo::Bump; pub use cache::{ merge_caches_sorted, transpose_and_freeze_caches, BalancedCaches, DelAddRoaringBitmap, }; @@ -15,27 +14,11 @@ pub use geo::*; pub use searchable::*; pub use vectors::EmbeddingExtractor; -use super::indexer::document_changes::{DocumentChanges, IndexingContext}; -use super::steps::IndexingStep; -use super::thread_local::{FullySend, ThreadLocal}; -use crate::Result; - -pub trait DocidsExtractor { - fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>( - document_changes: &DC, - indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>, - extractor_allocs: &'extractor mut ThreadLocal>, - step: IndexingStep, - ) -> Result>> - where - MSP: Fn() -> bool + Sync; -} - /// TODO move in permissive json pointer pub mod perm_json_p { use serde_json::{Map, Value}; - use crate::Result; + use crate::{attribute_patterns::PatternMatch, Result}; const SPLIT_SYMBOL: char = '.'; /// Returns `true` if the `selector` match the `key`. @@ -68,11 +51,9 @@ pub mod perm_json_p { pub fn seek_leaf_values_in_object( value: &Map, - selectors: Option<&[&str]>, - skip_selectors: &[&str], base_key: &str, base_depth: Depth, - seeker: &mut impl FnMut(&str, Depth, &Value) -> Result<()>, + seeker: &mut impl FnMut(&str, Depth, &Value) -> Result, ) -> Result<()> { if value.is_empty() { seeker(base_key, base_depth, &Value::Object(Map::with_capacity(0)))?; @@ -85,40 +66,16 @@ pub mod perm_json_p { format!("{}{}{}", base_key, SPLIT_SYMBOL, key) }; - // here if the user only specified `doggo` we need to iterate in all the fields of `doggo` - // so we check the contained_in on both side - let selection = select_field(&base_key, selectors, skip_selectors); - if selection != Selection::Skip { + let selection = seeker(&base_key, Depth::OnBaseKey, value)?; + if selection != PatternMatch::NoMatch { match value { Value::Object(object) => { - if selection == Selection::Select { - seeker(&base_key, Depth::OnBaseKey, value)?; - } - - seek_leaf_values_in_object( - object, - selectors, - skip_selectors, - &base_key, - Depth::OnBaseKey, - seeker, - ) + seek_leaf_values_in_object(object, &base_key, Depth::OnBaseKey, seeker) } Value::Array(array) => { - if selection == Selection::Select { - seeker(&base_key, Depth::OnBaseKey, value)?; - } - - seek_leaf_values_in_array( - array, - selectors, - skip_selectors, - &base_key, - Depth::OnBaseKey, - seeker, - ) + seek_leaf_values_in_array(array, &base_key, Depth::OnBaseKey, seeker) } - value => seeker(&base_key, Depth::OnBaseKey, value), + _ => Ok(()), }?; } } @@ -128,11 +85,9 @@ pub mod perm_json_p { pub fn seek_leaf_values_in_array( values: &[Value], - selectors: Option<&[&str]>, - skip_selectors: &[&str], base_key: &str, base_depth: Depth, - seeker: &mut impl FnMut(&str, Depth, &Value) -> Result<()>, + seeker: &mut impl FnMut(&str, Depth, &Value) -> Result, ) -> Result<()> { if values.is_empty() { seeker(base_key, base_depth, &Value::Array(vec![]))?; @@ -140,61 +95,16 @@ pub mod perm_json_p { for value in values { match value { - Value::Object(object) => seek_leaf_values_in_object( - object, - selectors, - skip_selectors, - base_key, - Depth::InsideArray, - seeker, - ), - Value::Array(array) => seek_leaf_values_in_array( - array, - selectors, - skip_selectors, - base_key, - Depth::InsideArray, - seeker, - ), - value => seeker(base_key, Depth::InsideArray, value), + Value::Object(object) => { + seek_leaf_values_in_object(object, base_key, Depth::InsideArray, seeker) + } + Value::Array(array) => { + seek_leaf_values_in_array(array, base_key, Depth::InsideArray, seeker) + } + value => seeker(base_key, Depth::InsideArray, value).map(|_| ()), }?; } Ok(()) } - - pub fn select_field( - field_name: &str, - selectors: Option<&[&str]>, - skip_selectors: &[&str], - ) -> Selection { - if skip_selectors.iter().any(|skip_selector| { - contained_in(skip_selector, field_name) || contained_in(field_name, skip_selector) - }) { - Selection::Skip - } else if let Some(selectors) = selectors { - let mut selection = Selection::Skip; - for selector in selectors { - if contained_in(field_name, selector) { - selection = Selection::Select; - break; - } else if contained_in(selector, field_name) { - selection = Selection::Parent; - } - } - selection - } else { - Selection::Select - } - } - - #[derive(Debug, Clone, Copy, PartialEq, Eq)] - pub enum Selection { - /// The field is a parent of the of a nested field that must be selected - Parent, - /// The field must be selected - Select, - /// The field must be skipped - Skip, - } } diff --git a/crates/milli/src/update/new/facet_search_builder.rs b/crates/milli/src/update/new/facet_search_builder.rs index d1ff6096d..6e9ffa1ed 100644 --- a/crates/milli/src/update/new/facet_search_builder.rs +++ b/crates/milli/src/update/new/facet_search_builder.rs @@ -9,12 +9,14 @@ use heed::{BytesDecode, BytesEncode, RoTxn, RwTxn}; use super::fst_merger_builder::FstMergerBuilder; use super::KvReaderDelAdd; +use crate::attribute_patterns::PatternMatch; use crate::heed_codec::facet::FacetGroupKey; use crate::update::del_add::{DelAdd, KvWriterDelAdd}; use crate::update::{create_sorter, MergeDeladdBtreesetString}; use crate::{ - BEU16StrCodec, FieldId, GlobalFieldsIdsMap, Index, LocalizedAttributesRule, Result, - MAX_FACET_VALUE_LENGTH, + BEU16StrCodec, FieldId, FieldIdMapMissingEntry, FilterableAttributesFeatures, + FilterableAttributesRule, GlobalFieldsIdsMap, Index, InternalError, LocalizedAttributesRule, + Result, MAX_FACET_VALUE_LENGTH, }; pub struct FacetSearchBuilder<'indexer> { @@ -22,6 +24,7 @@ pub struct FacetSearchBuilder<'indexer> { normalized_facet_string_docids_sorter: Sorter, global_fields_ids_map: GlobalFieldsIdsMap<'indexer>, localized_attributes_rules: Vec, + filterable_attributes_rules: Vec, // Buffered data below buffer: Vec, localized_field_ids: HashMap>>, @@ -31,6 +34,7 @@ impl<'indexer> FacetSearchBuilder<'indexer> { pub fn new( global_fields_ids_map: GlobalFieldsIdsMap<'indexer>, localized_attributes_rules: Vec, + filterable_attributes_rules: Vec, ) -> Self { let registered_facets = HashMap::new(); let normalized_facet_string_docids_sorter = create_sorter( @@ -49,6 +53,7 @@ impl<'indexer> FacetSearchBuilder<'indexer> { buffer: Vec::new(), global_fields_ids_map, localized_attributes_rules, + filterable_attributes_rules, localized_field_ids: HashMap::new(), } } @@ -60,6 +65,13 @@ impl<'indexer> FacetSearchBuilder<'indexer> { ) -> Result<()> { let FacetGroupKey { field_id, level: _level, left_bound } = facet_key; + let filterable_attributes_features = self.filterable_attributes_features(field_id)?; + + // if facet search is disabled, we don't need to register the facet + if !filterable_attributes_features.is_facet_searchable() { + return Ok(()); + }; + if deladd == DelAdd::Addition { self.registered_facets.entry(field_id).and_modify(|count| *count += 1).or_insert(1); } @@ -83,6 +95,24 @@ impl<'indexer> FacetSearchBuilder<'indexer> { Ok(()) } + fn filterable_attributes_features( + &mut self, + field_id: u16, + ) -> Result { + let Some(filterable_attributes_features) = + self.global_fields_ids_map.metadata(field_id).map(|metadata| { + metadata.filterable_attributes_features(&self.filterable_attributes_rules) + }) + else { + return Err(InternalError::FieldIdMapMissingEntry(FieldIdMapMissingEntry::FieldId { + field_id, + process: "facet_search_builder::register_from_key", + }) + .into()); + }; + Ok(filterable_attributes_features) + } + fn locales(&mut self, field_id: FieldId) -> Option<&[Language]> { if let Entry::Vacant(e) = self.localized_field_ids.entry(field_id) { let Some(field_name) = self.global_fields_ids_map.name(field_id) else { @@ -92,7 +122,7 @@ impl<'indexer> FacetSearchBuilder<'indexer> { let locales = self .localized_attributes_rules .iter() - .find(|rule| rule.match_str(field_name)) + .find(|rule| rule.match_str(field_name) == PatternMatch::Match) .map(|rule| rule.locales.clone()); e.insert(locales); diff --git a/crates/milli/src/update/new/indexer/post_processing.rs b/crates/milli/src/update/new/indexer/post_processing.rs index 201ab9ec9..4ea749a85 100644 --- a/crates/milli/src/update/new/indexer/post_processing.rs +++ b/crates/milli/src/update/new/indexer/post_processing.rs @@ -33,10 +33,8 @@ where { let index = indexing_context.index; indexing_context.progress.update_progress(IndexingStep::PostProcessingFacets); - if index.facet_search(wtxn)? { - compute_facet_search_database(index, wtxn, global_fields_ids_map)?; - } - compute_facet_level_database(index, wtxn, facet_field_ids_delta)?; + compute_facet_level_database(index, wtxn, facet_field_ids_delta, &global_fields_ids_map)?; + compute_facet_search_database(index, wtxn, global_fields_ids_map)?; indexing_context.progress.update_progress(IndexingStep::PostProcessingWords); if let Some(prefix_delta) = compute_word_fst(index, wtxn)? { compute_prefix_database(index, wtxn, prefix_delta, indexing_context.grenad_parameters)?; @@ -116,10 +114,18 @@ fn compute_facet_search_database( global_fields_ids_map: GlobalFieldsIdsMap, ) -> Result<()> { let rtxn = index.read_txn()?; + + // if the facet search is not enabled, we can skip the rest of the function + if !index.facet_search(wtxn)? { + return Ok(()); + } + let localized_attributes_rules = index.localized_attributes_rules(&rtxn)?; + let filterable_attributes_rules = index.filterable_attributes_rules(&rtxn)?; let mut facet_search_builder = FacetSearchBuilder::new( global_fields_ids_map, localized_attributes_rules.unwrap_or_default(), + filterable_attributes_rules, ); let previous_facet_id_string_docids = index @@ -164,8 +170,19 @@ fn compute_facet_level_database( index: &Index, wtxn: &mut RwTxn, mut facet_field_ids_delta: FacetFieldIdsDelta, + global_fields_ids_map: &GlobalFieldsIdsMap, ) -> Result<()> { + let rtxn = index.read_txn()?; + let filterable_attributes_rules = index.filterable_attributes_rules(&rtxn)?; for (fid, delta) in facet_field_ids_delta.consume_facet_string_delta() { + // skip field ids that should not be facet leveled + let Some(metadata) = global_fields_ids_map.metadata(fid) else { + continue; + }; + if !metadata.require_facet_level_database(&filterable_attributes_rules) { + continue; + } + let span = tracing::trace_span!(target: "indexing::facet_field_ids", "string"); let _entered = span.enter(); match delta { diff --git a/crates/milli/src/update/new/indexer/write.rs b/crates/milli/src/update/new/indexer/write.rs index 723e018a1..a8bd3217f 100644 --- a/crates/milli/src/update/new/indexer/write.rs +++ b/crates/milli/src/update/new/indexer/write.rs @@ -137,7 +137,6 @@ pub(super) fn update_index( index.put_primary_key(wtxn, new_primary_key.name())?; } let mut inner_index_settings = InnerIndexSettings::from_index(index, wtxn, Some(embedders))?; - inner_index_settings.recompute_facets(wtxn, index)?; inner_index_settings.recompute_searchables(wtxn, index)?; index.put_field_distribution(wtxn, &field_distribution)?; index.put_documents_ids(wtxn, &document_ids)?; From ae8d453868bb927591f732723674ef26f7fa9f58 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Mon, 3 Mar 2025 10:32:42 +0100 Subject: [PATCH 07/35] Refactor Document indexing process (searchables) **Changes:** The searchable database extraction is now relying on the AttributePatterns and FieldIdMapWithMetadata to match the field to extract. Remove the SearchableExtractor trait to make the code less complex. **Impact:** - Document Addition/modification searchable indexing - Document deletion searchable indexing --- .../extract/searchable/extract_word_docids.rs | 75 ++++----- .../extract_word_pair_proximity_docids.rs | 117 +++++++++++-- .../src/update/new/extract/searchable/mod.rs | 149 ++-------------- .../extract/searchable/tokenize_document.rs | 159 ++++++++++-------- .../milli/src/update/new/indexer/extract.rs | 2 +- 5 files changed, 239 insertions(+), 263 deletions(-) diff --git a/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs b/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs index 49259cd64..444c3f7d5 100644 --- a/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs +++ b/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs @@ -5,8 +5,8 @@ use std::ops::DerefMut as _; use bumpalo::collections::vec::Vec as BumpVec; use bumpalo::Bump; -use heed::RoTxn; +use super::match_searchable_field; use super::tokenize_document::{tokenizer_builder, DocumentTokenizer}; use crate::update::new::extract::cache::BalancedCaches; use crate::update::new::extract::perm_json_p::contained_in; @@ -17,8 +17,7 @@ use crate::update::new::ref_cell_ext::RefCellExt as _; use crate::update::new::steps::IndexingStep; use crate::update::new::thread_local::{FullySend, MostlySend, ThreadLocal}; use crate::update::new::DocumentChange; -use crate::update::GrenadParameters; -use crate::{bucketed_position, DocumentId, FieldId, Index, Result, MAX_POSITION_PER_ATTRIBUTE}; +use crate::{bucketed_position, DocumentId, FieldId, Result, MAX_POSITION_PER_ATTRIBUTE}; const MAX_COUNTED_WORDS: usize = 30; @@ -207,9 +206,10 @@ impl<'extractor> WordDocidsCaches<'extractor> { } pub struct WordDocidsExtractorData<'a> { - tokenizer: &'a DocumentTokenizer<'a>, - grenad_parameters: &'a GrenadParameters, + tokenizer: DocumentTokenizer<'a>, + max_memory_by_thread: Option, buckets: usize, + searchable_attributes: Option>, } impl<'a, 'extractor> Extractor<'extractor> for WordDocidsExtractorData<'a> { @@ -218,7 +218,7 @@ impl<'a, 'extractor> Extractor<'extractor> for WordDocidsExtractorData<'a> { fn init_data(&self, extractor_alloc: &'extractor Bump) -> Result { Ok(RefCell::new(Some(WordDocidsBalancedCaches::new_in( self.buckets, - self.grenad_parameters.max_memory_by_thread(), + self.max_memory_by_thread, extractor_alloc, )))) } @@ -230,7 +230,12 @@ impl<'a, 'extractor> Extractor<'extractor> for WordDocidsExtractorData<'a> { ) -> Result<()> { for change in changes { let change = change?; - WordDocidsExtractors::extract_document_change(context, self.tokenizer, change)?; + WordDocidsExtractors::extract_document_change( + context, + &self.tokenizer, + self.searchable_attributes.as_deref(), + change, + )?; } Ok(()) } @@ -248,52 +253,42 @@ impl WordDocidsExtractors { where MSP: Fn() -> bool + Sync, { - let index = indexing_context.index; - let rtxn = index.read_txn()?; - - let stop_words = index.stop_words(&rtxn)?; - let allowed_separators = index.allowed_separators(&rtxn)?; + // Warning: this is duplicated code from extract_word_pair_proximity_docids.rs + let rtxn = indexing_context.index.read_txn()?; + let stop_words = indexing_context.index.stop_words(&rtxn)?; + let allowed_separators = indexing_context.index.allowed_separators(&rtxn)?; let allowed_separators: Option> = allowed_separators.as_ref().map(|s| s.iter().map(String::as_str).collect()); - let dictionary = index.dictionary(&rtxn)?; + let dictionary = indexing_context.index.dictionary(&rtxn)?; let dictionary: Option> = dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect()); - let builder = tokenizer_builder( + let mut builder = tokenizer_builder( stop_words.as_ref(), allowed_separators.as_deref(), dictionary.as_deref(), ); - let tokenizer = builder.into_tokenizer(); - - let attributes_to_extract = Self::attributes_to_extract(&rtxn, index)?; - let attributes_to_skip = Self::attributes_to_skip(&rtxn, index)?; + let tokenizer = builder.build(); let localized_attributes_rules = - index.localized_attributes_rules(&rtxn)?.unwrap_or_default(); - + indexing_context.index.localized_attributes_rules(&rtxn)?.unwrap_or_default(); let document_tokenizer = DocumentTokenizer { tokenizer: &tokenizer, - attribute_to_extract: attributes_to_extract.as_deref(), - attribute_to_skip: attributes_to_skip.as_slice(), localized_attributes_rules: &localized_attributes_rules, max_positions_per_attributes: MAX_POSITION_PER_ATTRIBUTE, }; - + let extractor_data = WordDocidsExtractorData { + tokenizer: document_tokenizer, + max_memory_by_thread: indexing_context.grenad_parameters.max_memory_by_thread(), + buckets: rayon::current_num_threads(), + searchable_attributes: indexing_context.index.user_defined_searchable_fields(&rtxn)?, + }; let datastore = ThreadLocal::new(); - { let span = tracing::trace_span!(target: "indexing::documents::extract", "docids_extraction"); let _entered = span.enter(); - - let extractor = WordDocidsExtractorData { - tokenizer: &document_tokenizer, - grenad_parameters: indexing_context.grenad_parameters, - buckets: rayon::current_num_threads(), - }; - extract( document_changes, - &extractor, + &extractor_data, indexing_context, extractor_allocs, &datastore, @@ -312,6 +307,7 @@ impl WordDocidsExtractors { fn extract_document_change( context: &DocumentChangeContext>>, document_tokenizer: &DocumentTokenizer, + searchable_attributes: Option<&[&str]>, document_change: DocumentChange, ) -> Result<()> { let index = &context.index; @@ -345,7 +341,9 @@ impl WordDocidsExtractors { } DocumentChange::Update(inner) => { if !inner.has_changed_for_fields( - document_tokenizer.attribute_to_extract, + &mut |field_name: &str| { + match_searchable_field(field_name, searchable_attributes) + }, &context.rtxn, context.index, context.db_fields_ids_map, @@ -408,15 +406,4 @@ impl WordDocidsExtractors { let mut buffer = BumpVec::with_capacity_in(buffer_size, &context.doc_alloc); cached_sorter.flush_fid_word_count(&mut buffer) } - - fn attributes_to_extract<'a>( - rtxn: &'a RoTxn, - index: &'a Index, - ) -> Result>> { - index.user_defined_searchable_fields(rtxn).map_err(Into::into) - } - - fn attributes_to_skip<'a>(_rtxn: &'a RoTxn, _index: &'a Index) -> Result> { - Ok(Vec::new()) - } } diff --git a/crates/milli/src/update/new/extract/searchable/extract_word_pair_proximity_docids.rs b/crates/milli/src/update/new/extract/searchable/extract_word_pair_proximity_docids.rs index e58c0efd2..0724b0513 100644 --- a/crates/milli/src/update/new/extract/searchable/extract_word_pair_proximity_docids.rs +++ b/crates/milli/src/update/new/extract/searchable/extract_word_pair_proximity_docids.rs @@ -2,30 +2,114 @@ use std::cell::RefCell; use std::collections::VecDeque; use std::rc::Rc; -use heed::RoTxn; +use bumpalo::Bump; -use super::tokenize_document::DocumentTokenizer; -use super::SearchableExtractor; +use super::match_searchable_field; +use super::tokenize_document::{tokenizer_builder, DocumentTokenizer}; use crate::proximity::{index_proximity, MAX_DISTANCE}; use crate::update::new::document::Document; use crate::update::new::extract::cache::BalancedCaches; -use crate::update::new::indexer::document_changes::DocumentChangeContext; +use crate::update::new::indexer::document_changes::{ + extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext, +}; use crate::update::new::ref_cell_ext::RefCellExt as _; +use crate::update::new::steps::IndexingStep; +use crate::update::new::thread_local::{FullySend, ThreadLocal}; use crate::update::new::DocumentChange; -use crate::{FieldId, GlobalFieldsIdsMap, Index, Result}; +use crate::{FieldId, GlobalFieldsIdsMap, Result, MAX_POSITION_PER_ATTRIBUTE}; + +pub struct WordPairProximityDocidsExtractorData<'a> { + tokenizer: DocumentTokenizer<'a>, + searchable_attributes: Option>, + max_memory_by_thread: Option, + buckets: usize, +} + +impl<'a, 'extractor> Extractor<'extractor> for WordPairProximityDocidsExtractorData<'a> { + type Data = RefCell>; + + fn init_data(&self, extractor_alloc: &'extractor Bump) -> Result { + Ok(RefCell::new(BalancedCaches::new_in( + self.buckets, + self.max_memory_by_thread, + extractor_alloc, + ))) + } + + fn process<'doc>( + &self, + changes: impl Iterator>>, + context: &DocumentChangeContext, + ) -> Result<()> { + for change in changes { + let change = change?; + WordPairProximityDocidsExtractor::extract_document_change( + context, + &self.tokenizer, + self.searchable_attributes.as_deref(), + change, + )?; + } + Ok(()) + } +} pub struct WordPairProximityDocidsExtractor; -impl SearchableExtractor for WordPairProximityDocidsExtractor { - fn attributes_to_extract<'a>( - rtxn: &'a RoTxn, - index: &'a Index, - ) -> Result>> { - index.user_defined_searchable_fields(rtxn).map_err(Into::into) - } +impl WordPairProximityDocidsExtractor { + pub fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>( + document_changes: &DC, + indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>, + extractor_allocs: &'extractor mut ThreadLocal>, + step: IndexingStep, + ) -> Result>> + where + MSP: Fn() -> bool + Sync, + { + // Warning: this is duplicated code from extract_word_docids.rs + let rtxn = indexing_context.index.read_txn()?; + let stop_words = indexing_context.index.stop_words(&rtxn)?; + let allowed_separators = indexing_context.index.allowed_separators(&rtxn)?; + let allowed_separators: Option> = + allowed_separators.as_ref().map(|s| s.iter().map(String::as_str).collect()); + let dictionary = indexing_context.index.dictionary(&rtxn)?; + let dictionary: Option> = + dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect()); + let mut builder = tokenizer_builder( + stop_words.as_ref(), + allowed_separators.as_deref(), + dictionary.as_deref(), + ); + let tokenizer = builder.build(); + let localized_attributes_rules = + indexing_context.index.localized_attributes_rules(&rtxn)?.unwrap_or_default(); + let document_tokenizer = DocumentTokenizer { + tokenizer: &tokenizer, + localized_attributes_rules: &localized_attributes_rules, + max_positions_per_attributes: MAX_POSITION_PER_ATTRIBUTE, + }; + let extractor_data = WordPairProximityDocidsExtractorData { + tokenizer: document_tokenizer, + searchable_attributes: indexing_context.index.user_defined_searchable_fields(&rtxn)?, + max_memory_by_thread: indexing_context.grenad_parameters.max_memory_by_thread(), + buckets: rayon::current_num_threads(), + }; + let datastore = ThreadLocal::new(); + { + let span = + tracing::trace_span!(target: "indexing::documents::extract", "docids_extraction"); + let _entered = span.enter(); + extract( + document_changes, + &extractor_data, + indexing_context, + extractor_allocs, + &datastore, + step, + )?; + } - fn attributes_to_skip<'a>(_rtxn: &'a RoTxn, _index: &'a Index) -> Result> { - Ok(Vec::new()) + Ok(datastore.into_iter().map(RefCell::into_inner).collect()) } // This method is reimplemented to count the number of words in the document in each field @@ -34,6 +118,7 @@ impl SearchableExtractor for WordPairProximityDocidsExtractor { fn extract_document_change( context: &DocumentChangeContext>, document_tokenizer: &DocumentTokenizer, + searchable_attributes: Option<&[&str]>, document_change: DocumentChange, ) -> Result<()> { let doc_alloc = &context.doc_alloc; @@ -71,7 +156,9 @@ impl SearchableExtractor for WordPairProximityDocidsExtractor { } DocumentChange::Update(inner) => { if !inner.has_changed_for_fields( - document_tokenizer.attribute_to_extract, + &mut |field_name: &str| { + match_searchable_field(field_name, searchable_attributes) + }, rtxn, index, context.db_fields_ids_map, diff --git a/crates/milli/src/update/new/extract/searchable/mod.rs b/crates/milli/src/update/new/extract/searchable/mod.rs index 7c949a3ce..79a6fae87 100644 --- a/crates/milli/src/update/new/extract/searchable/mod.rs +++ b/crates/milli/src/update/new/extract/searchable/mod.rs @@ -2,145 +2,28 @@ mod extract_word_docids; mod extract_word_pair_proximity_docids; mod tokenize_document; -use std::cell::RefCell; -use std::marker::PhantomData; - -use bumpalo::Bump; pub use extract_word_docids::{WordDocidsCaches, WordDocidsExtractors}; pub use extract_word_pair_proximity_docids::WordPairProximityDocidsExtractor; -use heed::RoTxn; -use tokenize_document::{tokenizer_builder, DocumentTokenizer}; -use super::cache::BalancedCaches; -use super::DocidsExtractor; -use crate::update::new::indexer::document_changes::{ - extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext, -}; -use crate::update::new::steps::IndexingStep; -use crate::update::new::thread_local::{FullySend, ThreadLocal}; -use crate::update::new::DocumentChange; -use crate::update::GrenadParameters; -use crate::{Index, Result, MAX_POSITION_PER_ATTRIBUTE}; +use crate::attribute_patterns::{match_field_legacy, PatternMatch}; -pub struct SearchableExtractorData<'a, EX: SearchableExtractor> { - tokenizer: &'a DocumentTokenizer<'a>, - grenad_parameters: &'a GrenadParameters, - buckets: usize, - _ex: PhantomData, -} +pub fn match_searchable_field( + field_name: &str, + searchable_fields: Option<&[&str]>, +) -> PatternMatch { + let Some(searchable_fields) = searchable_fields else { + // If no searchable fields are provided, consider all fields as searchable + return PatternMatch::Match; + }; -impl<'a, 'extractor, EX: SearchableExtractor + Sync> Extractor<'extractor> - for SearchableExtractorData<'a, EX> -{ - type Data = RefCell>; - - fn init_data(&self, extractor_alloc: &'extractor Bump) -> Result { - Ok(RefCell::new(BalancedCaches::new_in( - self.buckets, - self.grenad_parameters.max_memory_by_thread(), - extractor_alloc, - ))) - } - - fn process<'doc>( - &self, - changes: impl Iterator>>, - context: &DocumentChangeContext, - ) -> Result<()> { - for change in changes { - let change = change?; - EX::extract_document_change(context, self.tokenizer, change)?; + let mut selection = PatternMatch::NoMatch; + for pattern in searchable_fields { + match match_field_legacy(pattern, field_name) { + PatternMatch::Match => return PatternMatch::Match, + PatternMatch::Parent => selection = PatternMatch::Parent, + PatternMatch::NoMatch => (), } - Ok(()) - } -} - -pub trait SearchableExtractor: Sized + Sync { - fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>( - document_changes: &DC, - indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>, - extractor_allocs: &'extractor mut ThreadLocal>, - step: IndexingStep, - ) -> Result>> - where - MSP: Fn() -> bool + Sync, - { - let rtxn = indexing_context.index.read_txn()?; - let stop_words = indexing_context.index.stop_words(&rtxn)?; - let allowed_separators = indexing_context.index.allowed_separators(&rtxn)?; - let allowed_separators: Option> = - allowed_separators.as_ref().map(|s| s.iter().map(String::as_str).collect()); - let dictionary = indexing_context.index.dictionary(&rtxn)?; - let dictionary: Option> = - dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect()); - let mut builder = tokenizer_builder( - stop_words.as_ref(), - allowed_separators.as_deref(), - dictionary.as_deref(), - ); - let tokenizer = builder.build(); - - let attributes_to_extract = Self::attributes_to_extract(&rtxn, indexing_context.index)?; - let attributes_to_skip = Self::attributes_to_skip(&rtxn, indexing_context.index)?; - let localized_attributes_rules = - indexing_context.index.localized_attributes_rules(&rtxn)?.unwrap_or_default(); - - let document_tokenizer = DocumentTokenizer { - tokenizer: &tokenizer, - attribute_to_extract: attributes_to_extract.as_deref(), - attribute_to_skip: attributes_to_skip.as_slice(), - localized_attributes_rules: &localized_attributes_rules, - max_positions_per_attributes: MAX_POSITION_PER_ATTRIBUTE, - }; - - let extractor_data: SearchableExtractorData = SearchableExtractorData { - tokenizer: &document_tokenizer, - grenad_parameters: indexing_context.grenad_parameters, - buckets: rayon::current_num_threads(), - _ex: PhantomData, - }; - - let datastore = ThreadLocal::new(); - - { - let span = - tracing::trace_span!(target: "indexing::documents::extract", "docids_extraction"); - let _entered = span.enter(); - extract( - document_changes, - &extractor_data, - indexing_context, - extractor_allocs, - &datastore, - step, - )?; - } - - Ok(datastore.into_iter().map(RefCell::into_inner).collect()) } - fn extract_document_change( - context: &DocumentChangeContext>, - document_tokenizer: &DocumentTokenizer, - document_change: DocumentChange, - ) -> Result<()>; - - fn attributes_to_extract<'a>(rtxn: &'a RoTxn, index: &'a Index) - -> Result>>; - - fn attributes_to_skip<'a>(rtxn: &'a RoTxn, index: &'a Index) -> Result>; -} - -impl DocidsExtractor for T { - fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>( - document_changes: &DC, - indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>, - extractor_allocs: &'extractor mut ThreadLocal>, - step: IndexingStep, - ) -> Result>> - where - MSP: Fn() -> bool + Sync, - { - Self::run_extraction(document_changes, indexing_context, extractor_allocs, step) - } + selection } diff --git a/crates/milli/src/update/new/extract/searchable/tokenize_document.rs b/crates/milli/src/update/new/extract/searchable/tokenize_document.rs index 1c1605b66..dda46f24c 100644 --- a/crates/milli/src/update/new/extract/searchable/tokenize_document.rs +++ b/crates/milli/src/update/new/extract/searchable/tokenize_document.rs @@ -3,9 +3,10 @@ use std::collections::HashMap; use charabia::{SeparatorKind, Token, TokenKind, Tokenizer, TokenizerBuilder}; use serde_json::Value; +use crate::attribute_patterns::PatternMatch; use crate::update::new::document::Document; use crate::update::new::extract::perm_json_p::{ - seek_leaf_values_in_array, seek_leaf_values_in_object, select_field, Depth, Selection, + seek_leaf_values_in_array, seek_leaf_values_in_object, Depth, }; use crate::{ FieldId, GlobalFieldsIdsMap, InternalError, LocalizedAttributesRule, Result, UserError, @@ -17,8 +18,6 @@ const MAX_DISTANCE: u32 = 8; pub struct DocumentTokenizer<'a> { pub tokenizer: &'a Tokenizer<'a>, - pub attribute_to_extract: Option<&'a [&'a str]>, - pub attribute_to_skip: &'a [&'a str], pub localized_attributes_rules: &'a [LocalizedAttributesRule], pub max_positions_per_attributes: u32, } @@ -31,87 +30,94 @@ impl<'a> DocumentTokenizer<'a> { token_fn: &mut impl FnMut(&str, FieldId, u16, &str) -> Result<()>, ) -> Result<()> { let mut field_position = HashMap::new(); + let mut tokenize_field = |field_name: &str, _depth, value: &Value| { + let Some((field_id, meta)) = field_id_map.id_with_metadata_or_insert(field_name) else { + return Err(UserError::AttributeLimitReached.into()); + }; + + if meta.is_searchable() { + self.tokenize_field(field_id, field_name, value, token_fn, &mut field_position)?; + } + + // todo: should be a match on the field_name using `match_field_legacy` function, + // but for legacy reasons we iterate over all the fields to fill the field_id_map. + Ok(PatternMatch::Match) + }; for entry in document.iter_top_level_fields() { let (field_name, value) = entry?; - - let mut tokenize_field = |field_name: &str, _depth, value: &Value| { - let Some(field_id) = field_id_map.id_or_insert(field_name) else { - return Err(UserError::AttributeLimitReached.into()); - }; - - if select_field(field_name, self.attribute_to_extract, self.attribute_to_skip) - != Selection::Select - { - return Ok(()); - } - - let position = field_position - .entry(field_id) - .and_modify(|counter| *counter += MAX_DISTANCE) - .or_insert(0); - if *position >= self.max_positions_per_attributes { - return Ok(()); - } - - let text; - let tokens = match value { - Value::Number(n) => { - text = n.to_string(); - self.tokenizer.tokenize(text.as_str()) - } - Value::Bool(b) => { - text = b.to_string(); - self.tokenizer.tokenize(text.as_str()) - } - Value::String(text) => { - let locales = self - .localized_attributes_rules - .iter() - .find(|rule| rule.match_str(field_name)) - .map(|rule| rule.locales()); - self.tokenizer.tokenize_with_allow_list(text.as_str(), locales) - } - _ => return Ok(()), - }; - - // create an iterator of token with their positions. - let tokens = process_tokens(*position, tokens) - .take_while(|(p, _)| *p < self.max_positions_per_attributes); - - for (index, token) in tokens { - // keep a word only if it is not empty and fit in a LMDB key. - let token = token.lemma().trim(); - if !token.is_empty() && token.len() <= MAX_WORD_LENGTH { - *position = index; - if let Ok(position) = (*position).try_into() { - token_fn(field_name, field_id, position, token)?; - } - } - } - - Ok(()) - }; - // parse json. match serde_json::to_value(value).map_err(InternalError::SerdeJson)? { Value::Object(object) => seek_leaf_values_in_object( &object, - None, - &[], field_name, Depth::OnBaseKey, &mut tokenize_field, )?, Value::Array(array) => seek_leaf_values_in_array( &array, - None, - &[], field_name, Depth::OnBaseKey, &mut tokenize_field, )?, - value => tokenize_field(field_name, Depth::OnBaseKey, &value)?, + value => { + tokenize_field(field_name, Depth::OnBaseKey, &value)?; + } + } + } + + Ok(()) + } + + fn tokenize_field( + &self, + field_id: FieldId, + field_name: &str, + value: &Value, + token_fn: &mut impl FnMut(&str, u16, u16, &str) -> std::result::Result<(), crate::Error>, + field_position: &mut HashMap, + ) -> Result<()> { + let position = field_position + .entry(field_id) + .and_modify(|counter| *counter += MAX_DISTANCE) + .or_insert(0); + if *position >= self.max_positions_per_attributes { + return Ok(()); + } + + let text; + let tokens = match value { + Value::Number(n) => { + text = n.to_string(); + self.tokenizer.tokenize(text.as_str()) + } + Value::Bool(b) => { + text = b.to_string(); + self.tokenizer.tokenize(text.as_str()) + } + Value::String(text) => { + let locales = self + .localized_attributes_rules + .iter() + .find(|rule| rule.match_str(field_name) == PatternMatch::Match) + .map(|rule| rule.locales()); + self.tokenizer.tokenize_with_allow_list(text.as_str(), locales) + } + _ => return Ok(()), + }; + + // create an iterator of token with their positions. + let tokens = process_tokens(*position, tokens) + .take_while(|(p, _)| *p < self.max_positions_per_attributes); + + for (index, token) in tokens { + // keep a word only if it is not empty and fit in a LMDB key. + let token = token.lemma().trim(); + if !token.is_empty() && token.len() <= MAX_WORD_LENGTH { + *position = index; + if let Ok(position) = (*position).try_into() { + token_fn(field_name, field_id, position, token)?; + } } } @@ -215,15 +221,20 @@ mod test { let mut tb = TokenizerBuilder::default(); let document_tokenizer = DocumentTokenizer { tokenizer: &tb.build(), - attribute_to_extract: None, - attribute_to_skip: &["not-me", "me-nether.nope"], localized_attributes_rules: &[], max_positions_per_attributes: 1000, }; let fields_ids_map = FieldIdMapWithMetadata::new( fields_ids_map, - MetadataBuilder::new(Default::default(), Default::default(), Default::default(), None), + MetadataBuilder::new( + Default::default(), + Default::default(), + Default::default(), + None, + None, + Default::default(), + ), ); let fields_ids_map_lock = std::sync::RwLock::new(fields_ids_map); @@ -265,6 +276,10 @@ mod test { 2, 16, ]: "catto", + [ + 3, + 0, + ]: "unsearchable", [ 5, 0, @@ -277,6 +292,10 @@ mod test { 8, 0, ]: "23", + [ + 9, + 0, + ]: "unsearchable", } "###); } diff --git a/crates/milli/src/update/new/indexer/extract.rs b/crates/milli/src/update/new/indexer/extract.rs index f49cd834d..907a4d1df 100644 --- a/crates/milli/src/update/new/indexer/extract.rs +++ b/crates/milli/src/update/new/indexer/extract.rs @@ -199,7 +199,7 @@ where let span = tracing::trace_span!(target: "indexing::documents::extract", "word_pair_proximity_docids"); let _entered = span.enter(); - ::run_extraction( + WordPairProximityDocidsExtractor::run_extraction( document_changes, indexing_context, extractor_allocs, From 9a75dc6ab3f84254b64b9522f392da296a9a6033 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Mon, 3 Mar 2025 10:33:34 +0100 Subject: [PATCH 08/35] Update tests using filterable attributes rules **Changes:** Replace the BTreeSet by Vec without changing the test results **Impact:** - None --- crates/benchmarks/benches/indexing.rs | 5 ++- crates/benchmarks/benches/search_geo.rs | 8 ++-- crates/benchmarks/benches/search_songs.rs | 4 +- crates/dump/src/lib.rs | 7 +++- .../src/scheduler/test_failure.rs | 5 ++- crates/milli/src/index.rs | 32 ++++++++------ .../src/search/facet/facet_distribution.rs | 33 ++++++++++----- crates/milli/src/search/new/tests/cutoff.rs | 6 +-- crates/milli/src/search/new/tests/distinct.rs | 7 +++- .../milli/src/search/new/tests/integration.rs | 18 ++++---- crates/milli/src/snapshot_tests.rs | 2 +- crates/milli/src/update/settings.rs | 42 +++++++++++++------ .../milli/tests/search/facet_distribution.rs | 11 +++-- crates/milli/tests/search/mod.rs | 20 +++++---- 14 files changed, 123 insertions(+), 77 deletions(-) diff --git a/crates/benchmarks/benches/indexing.rs b/crates/benchmarks/benches/indexing.rs index 4bd5315ff..9938fca26 100644 --- a/crates/benchmarks/benches/indexing.rs +++ b/crates/benchmarks/benches/indexing.rs @@ -12,7 +12,7 @@ use milli::progress::Progress; use milli::update::new::indexer; use milli::update::{IndexerConfig, Settings}; use milli::vector::EmbeddingConfigs; -use milli::Index; +use milli::{FilterableAttributesRule, Index}; use rand::seq::SliceRandom; use rand_chacha::rand_core::SeedableRng; use roaring::RoaringBitmap; @@ -57,7 +57,8 @@ fn setup_settings<'t>( let searchable_fields = searchable_fields.iter().map(|s| s.to_string()).collect(); builder.set_searchable_fields(searchable_fields); - let filterable_fields = filterable_fields.iter().map(|s| s.to_string()).collect(); + let filterable_fields = + filterable_fields.iter().map(|s| FilterableAttributesRule::Field(s.to_string())).collect(); builder.set_filterable_fields(filterable_fields); let sortable_fields = sortable_fields.iter().map(|s| s.to_string()).collect(); diff --git a/crates/benchmarks/benches/search_geo.rs b/crates/benchmarks/benches/search_geo.rs index 72503ce57..d76929f99 100644 --- a/crates/benchmarks/benches/search_geo.rs +++ b/crates/benchmarks/benches/search_geo.rs @@ -2,7 +2,7 @@ mod datasets_paths; mod utils; use criterion::{criterion_group, criterion_main}; -use milli::update::Settings; +use milli::{update::Settings, FilterableAttributesRule}; use utils::Conf; #[cfg(not(windows))] @@ -21,8 +21,10 @@ fn base_conf(builder: &mut Settings) { ["name", "alternatenames", "elevation"].iter().map(|s| s.to_string()).collect(); builder.set_searchable_fields(searchable_fields); - let filterable_fields = - ["_geo", "population", "elevation"].iter().map(|s| s.to_string()).collect(); + let filterable_fields = ["_geo", "population", "elevation"] + .iter() + .map(|s| FilterableAttributesRule::Field(s.to_string())) + .collect(); builder.set_filterable_fields(filterable_fields); let sortable_fields = diff --git a/crates/benchmarks/benches/search_songs.rs b/crates/benchmarks/benches/search_songs.rs index bef014a0e..680a675ef 100644 --- a/crates/benchmarks/benches/search_songs.rs +++ b/crates/benchmarks/benches/search_songs.rs @@ -2,7 +2,7 @@ mod datasets_paths; mod utils; use criterion::{criterion_group, criterion_main}; -use milli::update::Settings; +use milli::{update::Settings, FilterableAttributesRule}; use utils::Conf; #[cfg(not(windows))] @@ -22,7 +22,7 @@ fn base_conf(builder: &mut Settings) { let faceted_fields = ["released-timestamp", "duration-float", "genre", "country", "artist"] .iter() - .map(|s| s.to_string()) + .map(|s| FilterableAttributesRule::Field(s.to_string())) .collect(); builder.set_filterable_fields(faceted_fields); } diff --git a/crates/dump/src/lib.rs b/crates/dump/src/lib.rs index e7fd22333..4e2d6ac2f 100644 --- a/crates/dump/src/lib.rs +++ b/crates/dump/src/lib.rs @@ -233,8 +233,8 @@ pub(crate) mod test { use meilisearch_types::features::{Network, Remote, RuntimeTogglableFeatures}; use meilisearch_types::index_uid_pattern::IndexUidPattern; use meilisearch_types::keys::{Action, Key}; - use meilisearch_types::milli; use meilisearch_types::milli::update::Setting; + use meilisearch_types::milli::{self, FilterableAttributesRule}; use meilisearch_types::settings::{Checked, FacetingSettings, Settings}; use meilisearch_types::task_view::DetailsView; use meilisearch_types::tasks::{Details, Kind, Status}; @@ -279,7 +279,10 @@ pub(crate) mod test { let settings = Settings { displayed_attributes: Setting::Set(vec![S("race"), S("name")]).into(), searchable_attributes: Setting::Set(vec![S("name"), S("race")]).into(), - filterable_attributes: Setting::Set(btreeset! { S("race"), S("age") }), + filterable_attributes: Setting::Set(vec![ + FilterableAttributesRule::Field(S("race")), + FilterableAttributesRule::Field(S("age")), + ]), sortable_attributes: Setting::Set(btreeset! { S("age") }), ranking_rules: Setting::NotSet, stop_words: Setting::NotSet, diff --git a/crates/index-scheduler/src/scheduler/test_failure.rs b/crates/index-scheduler/src/scheduler/test_failure.rs index 5cdcb248b..191910d38 100644 --- a/crates/index-scheduler/src/scheduler/test_failure.rs +++ b/crates/index-scheduler/src/scheduler/test_failure.rs @@ -1,11 +1,11 @@ use std::time::Instant; use big_s::S; -use maplit::btreeset; use meili_snap::snapshot; use meilisearch_types::milli::obkv_to_json; use meilisearch_types::milli::update::IndexDocumentsMethod::*; use meilisearch_types::milli::update::Setting; +use meilisearch_types::milli::FilterableAttributesRule; use meilisearch_types::tasks::{Kind, KindWithContent}; use crate::insta_snapshot::snapshot_index_scheduler; @@ -127,7 +127,8 @@ fn fail_in_process_batch_for_document_deletion() { use meilisearch_types::settings::{Settings, Unchecked}; let mut new_settings: Box> = Box::default(); - new_settings.filterable_attributes = Setting::Set(btreeset!(S("catto"))); + new_settings.filterable_attributes = + Setting::Set(vec![FilterableAttributesRule::Field(S("catto"))]); index_scheduler .register( diff --git a/crates/milli/src/index.rs b/crates/milli/src/index.rs index 75f4a8c17..12b98b729 100644 --- a/crates/milli/src/index.rs +++ b/crates/milli/src/index.rs @@ -1766,7 +1766,7 @@ pub(crate) mod tests { use big_s::S; use bumpalo::Bump; use heed::{EnvOpenOptions, RwTxn}; - use maplit::{btreemap, hashset}; + use maplit::btreemap; use memmap2::Mmap; use tempfile::TempDir; @@ -1782,7 +1782,8 @@ pub(crate) mod tests { use crate::vector::settings::{EmbedderSource, EmbeddingSettings}; use crate::vector::EmbeddingConfigs; use crate::{ - db_snap, obkv_to_json, Filter, Index, Search, SearchResult, ThreadPoolNoAbortBuilder, + db_snap, obkv_to_json, Filter, FilterableAttributesRule, Index, Search, SearchResult, + ThreadPoolNoAbortBuilder, }; pub(crate) struct TempIndex { @@ -2189,7 +2190,7 @@ pub(crate) mod tests { let rtxn = index.read_txn().unwrap(); let real = index.searchable_fields(&rtxn).unwrap(); - assert_eq!(real, &["doggo", "name"]); + assert!(real.is_empty()); let user_defined = index.user_defined_searchable_fields(&rtxn).unwrap().unwrap(); assert_eq!(user_defined, &["doggo", "name"]); @@ -2217,7 +2218,9 @@ pub(crate) mod tests { index .update_settings(|settings| { - settings.set_filterable_fields(hashset! { S(RESERVED_GEO_FIELD_NAME) }); + settings.set_filterable_fields(vec![FilterableAttributesRule::Field( + RESERVED_GEO_FIELD_NAME.to_string(), + )]); }) .unwrap(); index @@ -2325,7 +2328,9 @@ pub(crate) mod tests { index .update_settings(|settings| { - settings.set_filterable_fields(hashset! { S("doggo") }); + settings.set_filterable_fields(vec![FilterableAttributesRule::Field( + "doggo".to_string(), + )]); }) .unwrap(); index @@ -2362,15 +2367,14 @@ pub(crate) mod tests { #[test] fn replace_documents_external_ids_and_soft_deletion_check() { - use big_s::S; - use maplit::hashset; - let index = TempIndex::new(); index .update_settings(|settings| { settings.set_primary_key("id".to_owned()); - settings.set_filterable_fields(hashset! { S("doggo") }); + settings.set_filterable_fields(vec![FilterableAttributesRule::Field( + "doggo".to_string(), + )]); }) .unwrap(); @@ -2903,8 +2907,9 @@ pub(crate) mod tests { index .update_settings(|settings| { settings.set_primary_key("id".to_string()); - settings - .set_filterable_fields(HashSet::from([RESERVED_GEO_FIELD_NAME.to_string()])); + settings.set_filterable_fields(vec![FilterableAttributesRule::Field( + RESERVED_GEO_FIELD_NAME.to_string(), + )]); }) .unwrap(); @@ -2938,8 +2943,9 @@ pub(crate) mod tests { index .update_settings(|settings| { settings.set_primary_key("id".to_string()); - settings - .set_filterable_fields(HashSet::from([RESERVED_GEO_FIELD_NAME.to_string()])); + settings.set_filterable_fields(vec![FilterableAttributesRule::Field( + RESERVED_GEO_FIELD_NAME.to_string(), + )]); }) .unwrap(); diff --git a/crates/milli/src/search/facet/facet_distribution.rs b/crates/milli/src/search/facet/facet_distribution.rs index b165d4e80..beb5d2568 100644 --- a/crates/milli/src/search/facet/facet_distribution.rs +++ b/crates/milli/src/search/facet/facet_distribution.rs @@ -382,7 +382,7 @@ impl<'a> FacetDistribution<'a> { ) -> Result<()> { let mut invalid_facets = BTreeSet::new(); if let Some(facets) = &self.facets { - for (field, _) in facets { + for field in facets.keys() { let is_valid_faceted_field = fields_ids_map.id_with_metadata(field).map_or(false, |(_, metadata)| { metadata.is_faceted(filterable_attributes_rules) @@ -439,11 +439,10 @@ mod tests { use std::iter; use big_s::S; - use maplit::hashset; use crate::documents::mmap_from_objects; use crate::index::tests::TempIndex; - use crate::{milli_snap, FacetDistribution, OrderBy}; + use crate::{milli_snap, FacetDistribution, FilterableAttributesRule, OrderBy}; #[test] fn few_candidates_few_facet_values() { @@ -453,7 +452,9 @@ mod tests { let index = TempIndex::new(); index - .update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") })) + .update_settings(|settings| { + settings.set_filterable_fields(vec![FilterableAttributesRule::Field(S("colour"))]) + }) .unwrap(); let documents = documents!([ @@ -524,7 +525,9 @@ mod tests { let index = TempIndex::new_with_map_size(4096 * 10_000); index - .update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") })) + .update_settings(|settings| { + settings.set_filterable_fields(vec![FilterableAttributesRule::Field(S("colour"))]) + }) .unwrap(); let facet_values = ["Red", "RED", " red ", "Blue", "BLUE"]; @@ -609,7 +612,9 @@ mod tests { let index = TempIndex::new_with_map_size(4096 * 10_000); index - .update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") })) + .update_settings(|settings| { + settings.set_filterable_fields(vec![FilterableAttributesRule::Field(S("colour"))]) + }) .unwrap(); let facet_values = (0..1000).map(|x| format!("{x:x}")).collect::>(); @@ -668,7 +673,9 @@ mod tests { let index = TempIndex::new_with_map_size(4096 * 10_000); index - .update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") })) + .update_settings(|settings| { + settings.set_filterable_fields(vec![FilterableAttributesRule::Field(S("colour"))]) + }) .unwrap(); let facet_values = (0..1000).collect::>(); @@ -719,7 +726,9 @@ mod tests { let index = TempIndex::new_with_map_size(4096 * 10_000); index - .update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") })) + .update_settings(|settings| { + settings.set_filterable_fields(vec![FilterableAttributesRule::Field(S("colour"))]) + }) .unwrap(); let facet_values = (0..1000).collect::>(); @@ -770,7 +779,9 @@ mod tests { let index = TempIndex::new_with_map_size(4096 * 10_000); index - .update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") })) + .update_settings(|settings| { + settings.set_filterable_fields(vec![FilterableAttributesRule::Field(S("colour"))]) + }) .unwrap(); let facet_values = (0..1000).collect::>(); @@ -821,7 +832,9 @@ mod tests { let index = TempIndex::new_with_map_size(4096 * 10_000); index - .update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") })) + .update_settings(|settings| { + settings.set_filterable_fields(vec![FilterableAttributesRule::Field(S("colour"))]) + }) .unwrap(); let facet_values = (0..1000).collect::>(); diff --git a/crates/milli/src/search/new/tests/cutoff.rs b/crates/milli/src/search/new/tests/cutoff.rs index 63b67f2e7..f2dfb45d6 100644 --- a/crates/milli/src/search/new/tests/cutoff.rs +++ b/crates/milli/src/search/new/tests/cutoff.rs @@ -5,13 +5,11 @@ use std::time::Duration; -use big_s::S; -use maplit::hashset; use meili_snap::snapshot; use crate::index::tests::TempIndex; use crate::score_details::{ScoreDetails, ScoringStrategy}; -use crate::{Criterion, Filter, Search, TimeBudget}; +use crate::{Criterion, Filter, FilterableAttributesRule, Search, TimeBudget}; fn create_index() -> TempIndex { let index = TempIndex::new(); @@ -20,7 +18,7 @@ fn create_index() -> TempIndex { .update_settings(|s| { s.set_primary_key("id".to_owned()); s.set_searchable_fields(vec!["text".to_owned()]); - s.set_filterable_fields(hashset! { S("id") }); + s.set_filterable_fields(vec![FilterableAttributesRule::Field("id".to_owned())]); s.set_criteria(vec![Criterion::Words, Criterion::Typo]); }) .unwrap(); diff --git a/crates/milli/src/search/new/tests/distinct.rs b/crates/milli/src/search/new/tests/distinct.rs index dd27bfc8a..d3c453957 100644 --- a/crates/milli/src/search/new/tests/distinct.rs +++ b/crates/milli/src/search/new/tests/distinct.rs @@ -19,7 +19,10 @@ use maplit::hashset; use super::collect_field_values; use crate::index::tests::TempIndex; -use crate::{AscDesc, Criterion, Index, Member, Search, SearchResult, TermsMatchingStrategy}; +use crate::{ + AscDesc, Criterion, FilterableAttributesRule, Index, Member, Search, SearchResult, + TermsMatchingStrategy, +}; fn create_index() -> TempIndex { let index = TempIndex::new(); @@ -236,7 +239,7 @@ fn test_distinct_placeholder_no_ranking_rules() { // Set the letter as filterable and unset the distinct attribute. index .update_settings(|s| { - s.set_filterable_fields(hashset! { S("letter") }); + s.set_filterable_fields(vec![FilterableAttributesRule::Field("letter".to_owned())]); s.reset_distinct_field(); }) .unwrap(); diff --git a/crates/milli/src/search/new/tests/integration.rs b/crates/milli/src/search/new/tests/integration.rs index e60a09ec5..e718eb39d 100644 --- a/crates/milli/src/search/new/tests/integration.rs +++ b/crates/milli/src/search/new/tests/integration.rs @@ -9,7 +9,7 @@ use crate::progress::Progress; use crate::update::new::indexer; use crate::update::{IndexerConfig, Settings}; use crate::vector::EmbeddingConfigs; -use crate::{db_snap, Criterion, Index}; +use crate::{db_snap, Criterion, FilterableAttributesRule, Index}; pub const CONTENT: &str = include_str!("../../../../tests/assets/test_set.ndjson"); use crate::constants::RESERVED_GEO_FIELD_NAME; @@ -25,14 +25,14 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index { let mut builder = Settings::new(&mut wtxn, &index, &config); builder.set_criteria(criteria.to_vec()); - builder.set_filterable_fields(hashset! { - S("tag"), - S("asc_desc_rank"), - S(RESERVED_GEO_FIELD_NAME), - S("opt1"), - S("opt1.opt2"), - S("tag_in") - }); + builder.set_filterable_fields(vec![ + FilterableAttributesRule::Field(S("tag")), + FilterableAttributesRule::Field(S("asc_desc_rank")), + FilterableAttributesRule::Field(S(RESERVED_GEO_FIELD_NAME)), + FilterableAttributesRule::Field(S("opt1")), + FilterableAttributesRule::Field(S("opt1.opt2")), + FilterableAttributesRule::Field(S("tag_in")), + ]); builder.set_sortable_fields(hashset! { S("tag"), S("asc_desc_rank"), diff --git a/crates/milli/src/snapshot_tests.rs b/crates/milli/src/snapshot_tests.rs index 6635ab2f4..3e58c44d9 100644 --- a/crates/milli/src/snapshot_tests.rs +++ b/crates/milli/src/snapshot_tests.rs @@ -386,7 +386,7 @@ pub fn snap_settings(index: &Index) -> String { write_setting_to_snap!(criteria); write_setting_to_snap!(displayed_fields); write_setting_to_snap!(distinct_field); - write_setting_to_snap!(filterable_fields); + write_setting_to_snap!(filterable_attributes_rules); write_setting_to_snap!(sortable_fields); write_setting_to_snap!(synonyms); write_setting_to_snap!(authorize_typos); diff --git a/crates/milli/src/update/settings.rs b/crates/milli/src/update/settings.rs index d38fdf138..42f38ea0a 100644 --- a/crates/milli/src/update/settings.rs +++ b/crates/milli/src/update/settings.rs @@ -1876,7 +1876,7 @@ pub fn validate_embedding_settings( mod tests { use big_s::S; use heed::types::Bytes; - use maplit::{btreemap, btreeset, hashset}; + use maplit::{btreemap, btreeset}; use meili_snap::snapshot; use super::*; @@ -2086,7 +2086,9 @@ mod tests { // Set the filterable fields to be the age. index .update_settings(|settings| { - settings.set_filterable_fields(hashset! { S("age") }); + settings.set_filterable_fields(vec![FilterableAttributesRule::Field( + "age".to_string(), + )]); }) .unwrap(); @@ -2101,8 +2103,8 @@ mod tests { // Check that the displayed fields are correctly set. let rtxn = index.read_txn().unwrap(); - let fields_ids = index.filterable_fields(&rtxn).unwrap(); - assert_eq!(fields_ids, hashset! { S("age") }); + let fields_ids = index.filterable_attributes_rules(&rtxn).unwrap(); + assert_eq!(fields_ids, vec![FilterableAttributesRule::Field("age".to_string(),)]); // Only count the field_id 0 and level 0 facet values. // TODO we must support typed CSVs for numbers to be understood. let fidmap = index.fields_ids_map(&rtxn).unwrap(); @@ -2144,14 +2146,23 @@ mod tests { // Set the filterable fields to be the age and the name. index .update_settings(|settings| { - settings.set_filterable_fields(hashset! { S("age"), S("name") }); + settings.set_filterable_fields(vec![ + FilterableAttributesRule::Field("age".to_string()), + FilterableAttributesRule::Field("name".to_string()), + ]); }) .unwrap(); // Check that the displayed fields are correctly set. let rtxn = index.read_txn().unwrap(); - let fields_ids = index.filterable_fields(&rtxn).unwrap(); - assert_eq!(fields_ids, hashset! { S("age"), S("name") }); + let fields_ids = index.filterable_attributes_rules(&rtxn).unwrap(); + assert_eq!( + fields_ids, + vec![ + FilterableAttributesRule::Field("age".to_string()), + FilterableAttributesRule::Field("name".to_string()), + ] + ); let rtxn = index.read_txn().unwrap(); // Only count the field_id 2 and level 0 facet values. @@ -2176,14 +2187,16 @@ mod tests { // Remove the age from the filterable fields. index .update_settings(|settings| { - settings.set_filterable_fields(hashset! { S("name") }); + settings.set_filterable_fields(vec![FilterableAttributesRule::Field( + "name".to_string(), + )]); }) .unwrap(); // Check that the displayed fields are correctly set. let rtxn = index.read_txn().unwrap(); - let fields_ids = index.filterable_fields(&rtxn).unwrap(); - assert_eq!(fields_ids, hashset! { S("name") }); + let fields_ids = index.filterable_attributes_rules(&rtxn).unwrap(); + assert_eq!(fields_ids, vec![FilterableAttributesRule::Field("name".to_string())]); let rtxn = index.read_txn().unwrap(); // Only count the field_id 2 and level 0 facet values. @@ -2513,7 +2526,10 @@ mod tests { index .update_settings(|settings| { settings.set_displayed_fields(vec!["hello".to_string()]); - settings.set_filterable_fields(hashset! { S("age"), S("toto") }); + settings.set_filterable_fields(vec![ + FilterableAttributesRule::Field("age".to_string()), + FilterableAttributesRule::Field("toto".to_string()), + ]); settings.set_criteria(vec![Criterion::Asc(S("toto"))]); }) .unwrap(); @@ -2630,7 +2646,9 @@ mod tests { // Set the genres setting index .update_settings(|settings| { - settings.set_filterable_fields(hashset! { S("genres") }); + settings.set_filterable_fields(vec![FilterableAttributesRule::Field( + "genres".to_string(), + )]); }) .unwrap(); diff --git a/crates/milli/tests/search/facet_distribution.rs b/crates/milli/tests/search/facet_distribution.rs index 4d8bf324c..c5a61da9f 100644 --- a/crates/milli/tests/search/facet_distribution.rs +++ b/crates/milli/tests/search/facet_distribution.rs @@ -1,13 +1,12 @@ use big_s::S; use bumpalo::Bump; use heed::EnvOpenOptions; -use maplit::hashset; use milli::documents::mmap_from_objects; use milli::progress::Progress; use milli::update::new::indexer; use milli::update::{IndexerConfig, Settings}; use milli::vector::EmbeddingConfigs; -use milli::{FacetDistribution, Index, Object, OrderBy}; +use milli::{FacetDistribution, FilterableAttributesRule, Index, Object, OrderBy}; use serde_json::{from_value, json}; #[test] @@ -21,10 +20,10 @@ fn test_facet_distribution_with_no_facet_values() { let config = IndexerConfig::default(); let mut builder = Settings::new(&mut wtxn, &index, &config); - builder.set_filterable_fields(hashset! { - S("genres"), - S("tags"), - }); + builder.set_filterable_fields(vec![ + FilterableAttributesRule::Field(S("genres")), + FilterableAttributesRule::Field(S("tags")), + ]); builder.execute(|_| (), || false).unwrap(); wtxn.commit().unwrap(); diff --git a/crates/milli/tests/search/mod.rs b/crates/milli/tests/search/mod.rs index 337a4c88c..72b124219 100644 --- a/crates/milli/tests/search/mod.rs +++ b/crates/milli/tests/search/mod.rs @@ -11,7 +11,9 @@ use milli::progress::Progress; use milli::update::new::indexer; use milli::update::{IndexerConfig, Settings}; use milli::vector::EmbeddingConfigs; -use milli::{AscDesc, Criterion, DocumentId, Index, Member, TermsMatchingStrategy}; +use milli::{ + AscDesc, Criterion, DocumentId, FilterableAttributesRule, Index, Member, TermsMatchingStrategy, +}; use serde::{Deserialize, Deserializer}; use slice_group_by::GroupBy; @@ -42,14 +44,14 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index { let mut builder = Settings::new(&mut wtxn, &index, &config); builder.set_criteria(criteria.to_vec()); - builder.set_filterable_fields(hashset! { - S("tag"), - S("asc_desc_rank"), - S("_geo"), - S("opt1"), - S("opt1.opt2"), - S("tag_in") - }); + builder.set_filterable_fields(vec![ + FilterableAttributesRule::Field(S("tag")), + FilterableAttributesRule::Field(S("asc_desc_rank")), + FilterableAttributesRule::Field(S("_geo")), + FilterableAttributesRule::Field(S("opt1")), + FilterableAttributesRule::Field(S("opt1.opt2")), + FilterableAttributesRule::Field(S("tag_in")), + ]); builder.set_sortable_fields(hashset! { S("tag"), S("asc_desc_rank"), From 6dbec91d2b34c0e4ea4d8d9fc74ab83a1abf0fad Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Thu, 27 Feb 2025 16:50:50 +0100 Subject: [PATCH 09/35] Index document in filterable attributes tests **Reason:** Because the filterable attributes are patterns now, the fieldIdMap will only register the fields that exists in at least one document. if a field doesn't exist in any document, it will not be registered even if it has been specified in the filterable fields. --- crates/meilisearch/tests/search/errors.rs | 12 ++++++++- crates/meilisearch/tests/search/multi/mod.rs | 10 ++++++-- crates/milli/src/index.rs | 27 ++++++++++++-------- 3 files changed, 35 insertions(+), 14 deletions(-) diff --git a/crates/meilisearch/tests/search/errors.rs b/crates/meilisearch/tests/search/errors.rs index 9dea42b12..c2014ca42 100644 --- a/crates/meilisearch/tests/search/errors.rs +++ b/crates/meilisearch/tests/search/errors.rs @@ -1,8 +1,10 @@ use meili_snap::*; -use crate::common::{shared_does_not_exists_index, Server}; +use crate::common::{shared_does_not_exists_index, Server, DOCUMENTS, NESTED_DOCUMENTS}; use crate::json; +use super::test_settings_documents_indexing_swapping_and_search; + #[actix_rt::test] async fn search_unexisting_index() { let index = shared_does_not_exists_index().await; @@ -422,6 +424,8 @@ async fn search_invalid_threshold() { async fn search_non_filterable_facets() { let server = Server::new_shared(); let index = server.unique_index(); + let (response, _code) = index.add_documents(json!([{"id": 1, "title": "Doggo"}]), None).await; + index.wait_task(response.uid()).await.succeeded(); let (response, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await; // Wait for the settings update to complete index.wait_task(response.uid()).await.succeeded(); @@ -453,6 +457,9 @@ async fn search_non_filterable_facets() { async fn search_non_filterable_facets_multiple_filterable() { let server = Server::new_shared(); let index = server.unique_index(); + let (response, _code) = + index.add_documents(json!([{"id": 1, "title": "Doggo", "genres": "Action"}]), None).await; + index.wait_task(response.uid()).await.succeeded(); let (response, _code) = index.update_settings(json!({"filterableAttributes": ["title", "genres"]})).await; index.wait_task(response.uid()).await.succeeded(); @@ -514,6 +521,9 @@ async fn search_non_filterable_facets_no_filterable() { async fn search_non_filterable_facets_multiple_facets() { let server = Server::new_shared(); let index = server.unique_index(); + let (response, _code) = + index.add_documents(json!([{"id": 1, "title": "Doggo", "genres": "Action"}]), None).await; + index.wait_task(response.uid()).await.succeeded(); let (response, _uid) = index.update_settings(json!({"filterableAttributes": ["title", "genres"]})).await; index.wait_task(response.uid()).await.succeeded(); diff --git a/crates/meilisearch/tests/search/multi/mod.rs b/crates/meilisearch/tests/search/multi/mod.rs index 2a95a5dd2..e5c58268d 100644 --- a/crates/meilisearch/tests/search/multi/mod.rs +++ b/crates/meilisearch/tests/search/multi/mod.rs @@ -3604,22 +3604,28 @@ async fn federation_non_faceted_for_an_index() { let index = server.index("fruits"); + let documents = FRUITS_DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await.succeeded(); + let (value, _) = index .update_settings( json!({"searchableAttributes": ["name"], "filterableAttributes": ["BOOST", "id", "name"]}), ) .await; - index.wait_task(value.uid()).await.succeeded(); let index = server.index("fruits-no-name"); + let documents = FRUITS_DOCUMENTS.clone(); + let (value, _) = index.add_documents(documents, None).await; + index.wait_task(value.uid()).await.succeeded(); + let (value, _) = index .update_settings( json!({"searchableAttributes": ["name"], "filterableAttributes": ["BOOST", "id"]}), ) .await; - index.wait_task(value.uid()).await.succeeded(); let index = server.index("fruits-no-facets"); diff --git a/crates/milli/src/index.rs b/crates/milli/src/index.rs index 12b98b729..ff87eba7c 100644 --- a/crates/milli/src/index.rs +++ b/crates/milli/src/index.rs @@ -2978,7 +2978,9 @@ pub(crate) mod tests { index .update_settings(|settings| { settings.set_searchable_fields(vec![S("name")]); - settings.set_filterable_fields(HashSet::from([S("age")])); + settings.set_filterable_fields(vec![FilterableAttributesRule::Field( + "age".to_string(), + )]); }) .unwrap(); @@ -2986,35 +2988,37 @@ pub(crate) mod tests { .add_documents(documents!({ "id": 1, "name": "Many", "age": 28, "realName": "Maxime" })) .unwrap(); db_snap!(index, fields_ids_map, @r###" - 0 name | - 1 id | + 0 id | + 1 name | 2 age | 3 realName | "###); db_snap!(index, searchable_fields, @r###"["name"]"###); db_snap!(index, fieldids_weights_map, @r###" fid weight - 0 0 | + 1 0 | "###); index .update_settings(|settings| { settings.set_searchable_fields(vec![S("name"), S("realName")]); - settings.set_filterable_fields(HashSet::from([S("age")])); + settings.set_filterable_fields(vec![FilterableAttributesRule::Field( + "age".to_string(), + )]); }) .unwrap(); // The order of the field id map shouldn't change db_snap!(index, fields_ids_map, @r###" - 0 name | - 1 id | + 0 id | + 1 name | 2 age | 3 realName | "###); db_snap!(index, searchable_fields, @r###"["name", "realName"]"###); db_snap!(index, fieldids_weights_map, @r###" fid weight - 0 0 | + 1 0 | 3 1 | "###); } @@ -3099,14 +3103,16 @@ pub(crate) mod tests { index .update_settings(|settings| { settings.set_searchable_fields(vec![S("_vectors"), S("_vectors.doggo")]); - settings.set_filterable_fields(hashset![S("_vectors"), S("_vectors.doggo")]); + settings.set_filterable_fields(vec![ + FilterableAttributesRule::Field("_vectors".to_string()), + FilterableAttributesRule::Field("_vectors.doggo".to_string()), + ]); }) .unwrap(); db_snap!(index, fields_ids_map, @r###" 0 id | 1 _vectors | - 2 _vectors.doggo | "###); db_snap!(index, searchable_fields, @"[]"); db_snap!(index, fieldids_weights_map, @r###" @@ -3139,7 +3145,6 @@ pub(crate) mod tests { db_snap!(index, fields_ids_map, @r###" 0 id | 1 _vectors | - 2 _vectors.doggo | "###); db_snap!(index, searchable_fields, @"[]"); db_snap!(index, fieldids_weights_map, @r###" From 19944941553129e20257788815b425c82014e154 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Thu, 27 Feb 2025 16:39:07 +0100 Subject: [PATCH 10/35] Update snapshot using the new filterableAttributes type --- .../after_adding_the_documents.snap | 3 +-- .../after_adding_the_settings.snap | 3 +-- .../after_removing_the_documents.snap | 3 +-- .../registered_the_document_deletions.snap | 3 +-- .../registered_the_setting_and_document_addition.snap | 3 +-- 5 files changed, 5 insertions(+), 10 deletions(-) diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/after_adding_the_documents.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/after_adding_the_documents.snap index 1b9018726..ebacb5415 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/after_adding_the_documents.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/after_adding_the_documents.snap @@ -1,13 +1,12 @@ --- source: crates/index-scheduler/src/scheduler/test_failure.rs -snapshot_kind: text --- ### Autobatching Enabled = true ### Processing batch None: [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} 1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/after_adding_the_settings.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/after_adding_the_settings.snap index 5bbc89c44..0fc0d7fb5 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/after_adding_the_settings.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/after_adding_the_settings.snap @@ -1,13 +1,12 @@ --- source: crates/index-scheduler/src/scheduler/test_failure.rs -snapshot_kind: text --- ### Autobatching Enabled = true ### Processing batch None: [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} 1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/after_removing_the_documents.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/after_removing_the_documents.snap index 7149d5f97..c28ea8b95 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/after_removing_the_documents.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/after_removing_the_documents.snap @@ -1,13 +1,12 @@ --- source: crates/index-scheduler/src/scheduler/test_failure.rs -snapshot_kind: text --- ### Autobatching Enabled = true ### Processing batch None: [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} 1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }} 2 {uid: 2, batch_uid: 2, status: succeeded, details: { received_document_ids: 1, deleted_documents: Some(1) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1"] }} 3 {uid: 3, batch_uid: 2, status: failed, error: ResponseError { code: 200, message: "Index `doggos`: Invalid type for filter subexpression: expected: String, Array, found: true.", error_code: "invalid_document_filter", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#invalid_document_filter" }, details: { original_filter: true, deleted_documents: Some(0) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: Bool(true) }} diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/registered_the_document_deletions.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/registered_the_document_deletions.snap index b13a63738..8b010498f 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/registered_the_document_deletions.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/registered_the_document_deletions.snap @@ -1,13 +1,12 @@ --- source: crates/index-scheduler/src/scheduler/test_failure.rs -snapshot_kind: text --- ### Autobatching Enabled = true ### Processing batch None: [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} 1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }} 2 {uid: 2, status: enqueued, details: { received_document_ids: 1, deleted_documents: None }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1"] }} 3 {uid: 3, status: enqueued, details: { original_filter: true, deleted_documents: None }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: Bool(true) }} diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/registered_the_setting_and_document_addition.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/registered_the_setting_and_document_addition.snap index 9e10d3052..0ba3ef598 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/registered_the_setting_and_document_addition.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/registered_the_setting_and_document_addition.snap @@ -1,13 +1,12 @@ --- source: crates/index-scheduler/src/scheduler/test_failure.rs -snapshot_kind: text --- ### Autobatching Enabled = true ### Processing batch None: [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} 1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: From f2a28a4dd75503853e0dde89b1dbe55b5ec1039d Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Mon, 3 Mar 2025 09:41:21 +0100 Subject: [PATCH 11/35] Add and enhance tests **Changes:** Introduce a test_settings_documents_indexing_swapping_and_search function that run the test twice: 1) by indexing the settings before the documents then running the test 2) by indexing the documents before the settings then running the test This helps to ensure that their is no bug coming from one or the other indexer. --- crates/meilisearch/tests/common/server.rs | 6 + crates/meilisearch/tests/search/errors.rs | 581 +++++++++------- .../meilisearch/tests/search/facet_search.rs | 181 ++++- crates/meilisearch/tests/search/filters.rs | 625 ++++++++++++++++++ crates/meilisearch/tests/search/geo.rs | 184 ++++++ crates/meilisearch/tests/search/mod.rs | 454 +++++++++---- 6 files changed, 1684 insertions(+), 347 deletions(-) create mode 100644 crates/meilisearch/tests/search/filters.rs diff --git a/crates/meilisearch/tests/common/server.rs b/crates/meilisearch/tests/common/server.rs index f78542db1..d1e81e0a7 100644 --- a/crates/meilisearch/tests/common/server.rs +++ b/crates/meilisearch/tests/common/server.rs @@ -125,6 +125,12 @@ impl Server { self.service.post("/indexes", body).await } + pub async fn delete_index(&self, uid: impl AsRef) -> (Value, StatusCode) { + let url = format!("/indexes/{}", urlencoding::encode(uid.as_ref())); + let (value, code) = self.service.delete(url).await; + (value, code) + } + pub fn index_with_encoder(&self, uid: impl AsRef, encoder: Encoder) -> Index<'_> { Index { uid: uid.as_ref().to_string(), diff --git a/crates/meilisearch/tests/search/errors.rs b/crates/meilisearch/tests/search/errors.rs index c2014ca42..05f084a0e 100644 --- a/crates/meilisearch/tests/search/errors.rs +++ b/crates/meilisearch/tests/search/errors.rs @@ -646,14 +646,11 @@ async fn search_bad_matching_strategy() { #[actix_rt::test] async fn filter_invalid_syntax_object() { - let server = Server::new_shared(); - let index = server.unique_index(); - - let (task, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await; - index.wait_task(task.uid()).await.succeeded(); - - index - .search(json!({"filter": "title & Glass"}), |response, code| { + test_settings_documents_indexing_swapping_and_search( + &DOCUMENTS, + &json!({"filterableAttributes": ["title"]}), + &json!({"filter": "title & Glass"}), + |response, code| { snapshot!(response, @r###" { "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", @@ -663,20 +660,18 @@ async fn filter_invalid_syntax_object() { } "###); snapshot!(code, @"400 Bad Request"); - }) - .await; + }, + ) + .await; } #[actix_rt::test] async fn filter_invalid_syntax_array() { - let server = Server::new_shared(); - let index = server.unique_index(); - - let (task, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await; - index.wait_task(task.uid()).await.succeeded(); - - index - .search(json!({"filter": ["title & Glass"]}), |response, code| { + test_settings_documents_indexing_swapping_and_search( + &DOCUMENTS, + &json!({"filterableAttributes": ["title"]}), + &json!({"filter": ["title & Glass"]}), + |response, code| { snapshot!(response, @r###" { "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", @@ -686,206 +681,327 @@ async fn filter_invalid_syntax_array() { } "###); snapshot!(code, @"400 Bad Request"); - }) - .await; + }, + ) + .await; } #[actix_rt::test] async fn filter_invalid_syntax_string() { - let server = Server::new_shared(); - let index = server.unique_index(); - - let (task, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await; - index.wait_task(task.uid()).await.succeeded(); - - let expected_response = json!({ - "message": "Found unexpected characters at the end of the filter: `XOR title = Glass`. You probably forgot an `OR` or an `AND` rule.\n15:32 title = Glass XOR title = Glass", - "code": "invalid_search_filter", - "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#invalid_search_filter" - }); - index - .search(json!({"filter": "title = Glass XOR title = Glass"}), |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); - }) - .await; + test_settings_documents_indexing_swapping_and_search( + &DOCUMENTS, + &json!({"filterableAttributes": ["title"]}), + &json!({"filter": "title = Glass XOR title = Glass"}), + |response, code| { + snapshot!(response, @r###" + { + "message": "Found unexpected characters at the end of the filter: `XOR title = Glass`. You probably forgot an `OR` or an `AND` rule.\n15:32 title = Glass XOR title = Glass", + "code": "invalid_search_filter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_search_filter" + } + "###); + snapshot!(code, @"400 Bad Request"); + }, + ) + .await; } #[actix_rt::test] async fn filter_invalid_attribute_array() { - let server = Server::new_shared(); - let index = server.unique_index(); - - let (task, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await; - index.wait_task(task.uid()).await.succeeded(); - - let expected_response = json!({ - "message": format!("Index `{}`: Attribute `many` is not filterable. Available filterable attributes are: `title`.\n1:5 many = Glass", index.uid), - "code": "invalid_search_filter", - "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#invalid_search_filter" - }); - index - .search(json!({"filter": ["many = Glass"]}), |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); - }) - .await; + test_settings_documents_indexing_swapping_and_search( + &DOCUMENTS, + &json!({"filterableAttributes": ["title"]}), + &json!({"filter": ["many = Glass"]}), + |response, code| { + snapshot!(response, @r###" + { + "message": "Index `test`: Attribute `many` is not filterable. Available filterable attributes are: `title`.\n1:5 many = Glass", + "code": "invalid_search_filter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_search_filter" + } + "###); + snapshot!(code, @"400 Bad Request"); + }, + ) + .await; } #[actix_rt::test] async fn filter_invalid_attribute_string() { - let server = Server::new_shared(); - let index = server.unique_index(); - - let (task, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await; - index.wait_task(task.uid()).await.succeeded(); - - let expected_response = json!({ - "message": format!("Index `{}`: Attribute `many` is not filterable. Available filterable attributes are: `title`.\n1:5 many = Glass", index.uid), - "code": "invalid_search_filter", - "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#invalid_search_filter" - }); - index - .search(json!({"filter": "many = Glass"}), |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); - }) - .await; + test_settings_documents_indexing_swapping_and_search( + &DOCUMENTS, + &json!({"filterableAttributes": ["title"]}), + &json!({"filter": "many = Glass"}), + |response, code| { + snapshot!(response, @r###" + { + "message": "Index `test`: Attribute `many` is not filterable. Available filterable attributes are: `title`.\n1:5 many = Glass", + "code": "invalid_search_filter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_search_filter" + } + "###); + snapshot!(code, @"400 Bad Request"); + }, + ) + .await; } #[actix_rt::test] async fn filter_reserved_geo_attribute_array() { - let server = Server::new_shared(); - let index = server.unique_index(); - - let (task, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await; - index.wait_task(task.uid()).await.succeeded(); - - let expected_response = json!({ - "message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:13 _geo = Glass", - "code": "invalid_search_filter", - "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#invalid_search_filter" - }); - index - .search(json!({"filter": ["_geo = Glass"]}), |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); - }) - .await; + test_settings_documents_indexing_swapping_and_search( + &DOCUMENTS, + &json!({"filterableAttributes": ["title"]}), + &json!({"filter": ["_geo = Glass"]}), + |response, code| { + snapshot!(response, @r###" + { + "message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:13 _geo = Glass", + "code": "invalid_search_filter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_search_filter" + } + "###); + snapshot!(code, @"400 Bad Request"); + }, + ) + .await; } #[actix_rt::test] async fn filter_reserved_geo_attribute_string() { - let server = Server::new_shared(); - let index = server.unique_index(); - - let (task, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await; - index.wait_task(task.uid()).await.succeeded(); - - let expected_response = json!({ - "message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:13 _geo = Glass", - "code": "invalid_search_filter", - "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#invalid_search_filter" - }); - index - .search(json!({"filter": "_geo = Glass"}), |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); - }) - .await; + test_settings_documents_indexing_swapping_and_search( + &DOCUMENTS, + &json!({"filterableAttributes": ["title"]}), + &json!({"filter": "_geo = Glass"}), + |response, code| { + snapshot!(response, @r###" + { + "message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:13 _geo = Glass", + "code": "invalid_search_filter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_search_filter" + } + "###); + snapshot!(code, @"400 Bad Request"); + }, + ) + .await; } #[actix_rt::test] async fn filter_reserved_attribute_array() { - let server = Server::new_shared(); - let index = server.unique_index(); - - let (task, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await; - index.wait_task(task.uid()).await.succeeded(); - - let expected_response = json!({ - "message": "`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:21 _geoDistance = Glass", - "code": "invalid_search_filter", - "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#invalid_search_filter" - }); - index - .search(json!({"filter": ["_geoDistance = Glass"]}), |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); - }) - .await; + test_settings_documents_indexing_swapping_and_search( + &DOCUMENTS, + &json!({"filterableAttributes": ["title"]}), + &json!({"filter": ["_geoDistance = Glass"]}), + |response, code| { + snapshot!(response, @r###" + { + "message": "`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:21 _geoDistance = Glass", + "code": "invalid_search_filter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_search_filter" + } + "###); + snapshot!(code, @"400 Bad Request"); + }, + ) + .await; } #[actix_rt::test] async fn filter_reserved_attribute_string() { - let server = Server::new_shared(); - let index = server.unique_index(); - - let (task, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await; - index.wait_task(task.uid()).await.succeeded(); - - let expected_response = json!({ - "message": "`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:21 _geoDistance = Glass", - "code": "invalid_search_filter", - "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#invalid_search_filter" - }); - index - .search(json!({"filter": "_geoDistance = Glass"}), |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); - }) - .await; + test_settings_documents_indexing_swapping_and_search( + &DOCUMENTS, + &json!({"filterableAttributes": ["title"]}), + &json!({"filter": "_geoDistance = Glass"}), + |response, code| { + snapshot!(response, @r###" + { + "message": "`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:21 _geoDistance = Glass", + "code": "invalid_search_filter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_search_filter" + } + "###); + snapshot!(code, @"400 Bad Request"); + }, + ) + .await; } #[actix_rt::test] async fn filter_reserved_geo_point_array() { - let server = Server::new_shared(); - let index = server.unique_index(); - - let (task, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await; - index.wait_task(task.uid()).await.succeeded(); - - let expected_response = json!({ - "message": "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:18 _geoPoint = Glass", - "code": "invalid_search_filter", - "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#invalid_search_filter" - }); - index - .search(json!({"filter": ["_geoPoint = Glass"]}), |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); - }) - .await; + test_settings_documents_indexing_swapping_and_search( + &DOCUMENTS, + &json!({"filterableAttributes": ["title"]}), + &json!({"filter": ["_geoPoint = Glass"]}), + |response, code| { + snapshot!(response, @r###" + { + "message": "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:18 _geoPoint = Glass", + "code": "invalid_search_filter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_search_filter" + } + "###); + snapshot!(code, @"400 Bad Request"); + }, + ) + .await; } #[actix_rt::test] async fn filter_reserved_geo_point_string() { - let server = Server::new_shared(); - let index = server.unique_index(); + test_settings_documents_indexing_swapping_and_search( + &DOCUMENTS, + &json!({"filterableAttributes": ["title"]}), + &json!({"filter": "_geoPoint = Glass"}), + |response, code| { + snapshot!(response, @r###" + { + "message": "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:18 _geoPoint = Glass", + "code": "invalid_search_filter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_search_filter" + } + "###); + snapshot!(code, @"400 Bad Request"); + }, + ) + .await; +} - let (task, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await; - index.wait_task(task.uid()).await.succeeded(); +#[actix_rt::test] +async fn search_with_pattern_filter_settings_errors() { + // Check if the Equality filter works with patterns + test_settings_documents_indexing_swapping_and_search( + &NESTED_DOCUMENTS, + &json!({"filterableAttributes": [{ + "patterns": ["cattos","doggos.age"], + "features": { + "facetSearch": false, + "filter": {"equality": false, "comparison": true} + } + }]}), + &json!({ + "filter": "cattos = pésti" + }), + |response, code| { + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "Index `test`: Filter operator `=` is not allowed for the attribute `cattos`, allowed operators: OR, AND, NOT, <, >, <=, >=, TO, IS EMPTY, IS NULL, EXISTS.", + "code": "invalid_search_filter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_search_filter" + } + "###); + }, + ) + .await; - let expected_response = json!({ - "message": "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:18 _geoPoint = Glass", - "code": "invalid_search_filter", - "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#invalid_search_filter" - }); - index - .search(json!({"filter": "_geoPoint = Glass"}), |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); - }) - .await; + test_settings_documents_indexing_swapping_and_search( + &NESTED_DOCUMENTS, + &json!({"filterableAttributes": [{ + "patterns": ["cattos","doggos.age"], + "features": { + "facetSearch": false, + "filter": {"equality": false, "comparison": true} + } + }]}), + &json!({ + "filter": "cattos IN [pésti, simba]" + }), + |response, code| { + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "Index `test`: Filter operator `=` is not allowed for the attribute `cattos`, allowed operators: OR, AND, NOT, <, >, <=, >=, TO, IS EMPTY, IS NULL, EXISTS.", + "code": "invalid_search_filter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_search_filter" + } + "###); + }, +) +.await; + + // Check if the Comparison filter works with patterns + test_settings_documents_indexing_swapping_and_search( + &NESTED_DOCUMENTS, + &json!({"filterableAttributes": [{"patterns": ["cattos","doggos.age"]}]}), + &json!({ + "filter": "doggos.age > 2" + }), + |response, code| { + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "Index `test`: Filter operator `>` is not allowed for the attribute `doggos.age`, allowed operators: OR, AND, NOT, =, !=, IN, IS EMPTY, IS NULL, EXISTS.", + "code": "invalid_search_filter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_search_filter" + } + "###); + }, + ) + .await; + + test_settings_documents_indexing_swapping_and_search( + &NESTED_DOCUMENTS, + &json!({"filterableAttributes": [{ + "patterns": ["cattos","doggos.age"], + "features": { + "facetSearch": false, + "filter": {"equality": true, "comparison": false} + } + }]}), + &json!({ + "filter": "doggos.age > 2" + }), + |response, code| { + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "Index `test`: Filter operator `>` is not allowed for the attribute `doggos.age`, allowed operators: OR, AND, NOT, =, !=, IN, IS EMPTY, IS NULL, EXISTS.", + "code": "invalid_search_filter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_search_filter" + } + "###); + }, + ) + .await; + + test_settings_documents_indexing_swapping_and_search( + &NESTED_DOCUMENTS, + &json!({"filterableAttributes": [{ + "patterns": ["cattos","doggos.age"], + "features": { + "facetSearch": false, + "filter": {"equality": true, "comparison": false} + } + }]}), + &json!({ + "filter": "doggos.age 2 TO 4" + }), + |response, code| { + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "Index `test`: Filter operator `TO` is not allowed for the attribute `doggos.age`, allowed operators: OR, AND, NOT, =, !=, IN, IS EMPTY, IS NULL, EXISTS.", + "code": "invalid_search_filter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_search_filter" + } + "###); + }, + ) + .await; } #[actix_rt::test] @@ -1028,62 +1144,62 @@ async fn sort_unset_ranking_rule() { #[actix_rt::test] async fn search_on_unknown_field() { - let server = Server::new_shared(); - let index = server.unique_index(); - let (response, _code) = - index.update_settings_searchable_attributes(json!(["id", "title"])).await; - index.wait_task(response.uid()).await.succeeded(); - - let expected_response = json!({ - "message": format!("Index `{}`: Attribute `unknown` is not searchable. Available searchable attributes are: `id, title`.", index.uid), - "code": "invalid_search_attributes_to_search_on", - "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#invalid_search_attributes_to_search_on" - }); - index - .search( - json!({"q": "Captain Marvel", "attributesToSearchOn": ["unknown"]}), - |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); - }, - ) - .await; + test_settings_documents_indexing_swapping_and_search( + &DOCUMENTS, + &json!({"searchableAttributes": ["id", "title"]}), + &json!({"q": "Captain Marvel", "attributesToSearchOn": ["unknown"]}), + |response, code| { + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Index `test`: Attribute `unknown` is not searchable. Available searchable attributes are: `id, title`.", + "code": "invalid_search_attributes_to_search_on", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_search_attributes_to_search_on" + } + "###); + }, + ) + .await; } #[actix_rt::test] async fn search_on_unknown_field_plus_joker() { - let server = Server::new_shared(); - let index = server.unique_index(); - let (response, _code) = - index.update_settings_searchable_attributes(json!(["id", "title"])).await; - index.wait_task(response.uid()).await.succeeded(); + test_settings_documents_indexing_swapping_and_search( + &DOCUMENTS, + &json!({"searchableAttributes": ["id", "title"]}), + &json!({"q": "Captain Marvel", "attributesToSearchOn": ["*", "unknown"]}), + |response, code| { + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Index `test`: Attribute `unknown` is not searchable. Available searchable attributes are: `id, title`.", + "code": "invalid_search_attributes_to_search_on", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_search_attributes_to_search_on" + } + "###); + }, + ) + .await; - let expected_response = json!({ - "message": format!("Index `{}`: Attribute `unknown` is not searchable. Available searchable attributes are: `id, title`.", index.uid), - "code": "invalid_search_attributes_to_search_on", - "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#invalid_search_attributes_to_search_on" - }); - index - .search( - json!({"q": "Captain Marvel", "attributesToSearchOn": ["*", "unknown"]}), - |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); - }, - ) - .await; - - index - .search( - json!({"q": "Captain Marvel", "attributesToSearchOn": ["unknown", "*"]}), - |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); - }, - ) - .await; + test_settings_documents_indexing_swapping_and_search( + &DOCUMENTS, + &json!({"searchableAttributes": ["id", "title"]}), + &json!({"q": "Captain Marvel", "attributesToSearchOn": ["unknown", "*"]}), + |response, code| { + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Index `test`: Attribute `unknown` is not searchable. Available searchable attributes are: `id, title`.", + "code": "invalid_search_attributes_to_search_on", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_search_attributes_to_search_on" + } + "###); + }, + ) + .await; } #[actix_rt::test] @@ -1092,6 +1208,9 @@ async fn distinct_at_search_time() { let index = server.unique_index(); let (task, _) = index.create(None).await; index.wait_task(task.uid()).await.succeeded(); + let (response, _code) = + index.add_documents(json!([{"id": 1, "color": "Doggo", "machin": "Action"}]), None).await; + index.wait_task(response.uid()).await.succeeded(); let expected_response = json!({ "message": format!("Index `{}`: Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. This index does not have configured filterable attributes.", index.uid), diff --git a/crates/meilisearch/tests/search/facet_search.rs b/crates/meilisearch/tests/search/facet_search.rs index 7e46c5d15..33b906d0f 100644 --- a/crates/meilisearch/tests/search/facet_search.rs +++ b/crates/meilisearch/tests/search/facet_search.rs @@ -1,7 +1,9 @@ use meili_snap::snapshot; +use meilisearch::Opt; use once_cell::sync::Lazy; +use tempfile::TempDir; -use crate::common::{Server, Value}; +use crate::common::{default_settings, Server, Value, NESTED_DOCUMENTS}; use crate::json; static DOCUMENTS: Lazy = Lazy::new(|| { @@ -34,6 +36,62 @@ static DOCUMENTS: Lazy = Lazy::new(|| { ]) }); +async fn test_settings_documents_indexing_swapping_and_facet_search( + documents: &Value, + settings: &Value, + query: &Value, + test: impl Fn(Value, actix_http::StatusCode) + std::panic::UnwindSafe + Clone, +) { + let temp = TempDir::new().unwrap(); + let server = Server::new_with_options(Opt { ..default_settings(temp.path()) }).await.unwrap(); + + eprintln!("Documents -> Settings -> test"); + let index = server.index("test"); + + let (task, code) = index.add_documents(documents.clone(), None).await; + assert_eq!(code, 202, "{}", task); + let response = index.wait_task(task.uid()).await; + assert!(response.is_success(), "{:?}", response); + + let (task, code) = index.update_settings(settings.clone()).await; + assert_eq!(code, 202, "{}", task); + let response = index.wait_task(task.uid()).await; + assert!(response.is_success(), "{:?}", response); + + let (response, code) = index.facet_search(query.clone()).await; + insta::allow_duplicates! { + test(response, code); + } + + let (task, code) = server.delete_index("test").await; + assert_eq!(code, 202, "{}", task); + let response = server.wait_task(task.uid()).await; + assert!(response.is_success(), "{:?}", response); + + eprintln!("Settings -> Documents -> test"); + let index = server.index("test"); + + let (task, code) = index.update_settings(settings.clone()).await; + assert_eq!(code, 202, "{}", task); + let response = index.wait_task(task.uid()).await; + assert!(response.is_success(), "{:?}", response); + + let (task, code) = index.add_documents(documents.clone(), None).await; + assert_eq!(code, 202, "{}", task); + let response = index.wait_task(task.uid()).await; + assert!(response.is_success(), "{:?}", response); + + let (response, code) = index.facet_search(query.clone()).await; + insta::allow_duplicates! { + test(response, code); + } + + let (task, code) = server.delete_index("test").await; + assert_eq!(code, 202, "{}", task); + let response = server.wait_task(task.uid()).await; + assert!(response.is_success(), "{:?}", response); +} + #[actix_rt::test] async fn simple_facet_search() { let server = Server::new().await; @@ -436,3 +494,124 @@ async fn deactivate_facet_search_add_documents_and_reset_facet_search() { assert_eq!(code, 200, "{}", response); assert_eq!(dbg!(response)["facetHits"].as_array().unwrap().len(), 2); } + +#[actix_rt::test] +async fn facet_search_with_filterable_attributes_rules() { + test_settings_documents_indexing_swapping_and_facet_search( + &DOCUMENTS, + &json!({"filterableAttributes": ["genres"]}), + &json!({"facetName": "genres", "facetQuery": "a"}), + |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(response["facetHits"], @r###"[{"value":"Action","count":3},{"value":"Adventure","count":2}]"###); + }, + ) + .await; + + test_settings_documents_indexing_swapping_and_facet_search( + &DOCUMENTS, + &json!({"filterableAttributes": [{"patterns": ["genres"], "features": {"facetSearch": true, "filter": {"equality": false, "comparison": false}}}]}), + &json!({"facetName": "genres", "facetQuery": "a"}), + |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(response["facetHits"], @r###"[{"value":"Action","count":3},{"value":"Adventure","count":2}]"###); + }, + ).await; + + test_settings_documents_indexing_swapping_and_facet_search( + &NESTED_DOCUMENTS, + &json!({"filterableAttributes": ["doggos.name"]}), + &json!({"facetName": "doggos.name", "facetQuery": "b"}), + |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(response["facetHits"], @r###"[{"value":"bobby","count":1},{"value":"buddy","count":1}]"###); + }, + ) + .await; + + test_settings_documents_indexing_swapping_and_facet_search( + &NESTED_DOCUMENTS, + &json!({"filterableAttributes": [{"patterns": ["doggos.name"], "features": {"facetSearch": true, "filter": {"equality": false, "comparison": false}}}]}), + &json!({"facetName": "doggos.name", "facetQuery": "b"}), + |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(response["facetHits"], @r###"[{"value":"bobby","count":1},{"value":"buddy","count":1}]"###); + }, + ).await; +} + +#[actix_rt::test] +async fn facet_search_with_filterable_attributes_rules_errors() { + test_settings_documents_indexing_swapping_and_facet_search( + &DOCUMENTS, + &json!({"filterableAttributes": ["genres"]}), + &json!({"facetName": "invalid", "facetQuery": "a"}), + |response, code| { + snapshot!(code, @"400 Bad Request"); + snapshot!(response["message"], @r###""Attribute `invalid` is not facet-searchable. Available facet-searchable attributes are: `genres`. To make it facet-searchable add it to the `filterableAttributes` index settings.""###); + }, + ) + .await; + + test_settings_documents_indexing_swapping_and_facet_search( + &DOCUMENTS, + &json!({"filterableAttributes": [{"patterns": ["genres"]}]}), + &json!({"facetName": "genres", "facetQuery": "a"}), + |response, code| { + snapshot!(code, @"400 Bad Request"); + snapshot!(response["message"], @r###""Attribute `genres` is not facet-searchable. This index does not have configured facet-searchable attributes. To make it facet-searchable add it to the `filterableAttributes` index settings.""###); + }, + ) + .await; + + test_settings_documents_indexing_swapping_and_facet_search( + &DOCUMENTS, + &json!({"filterableAttributes": [{"patterns": ["genres"], "features": {"facetSearch": false, "filter": {"equality": true, "comparison": true}}}]}), + &json!({"facetName": "genres", "facetQuery": "a"}), + |response, code| { + snapshot!(code, @"400 Bad Request"); + snapshot!(response["message"], @r###""Attribute `genres` is not facet-searchable. This index does not have configured facet-searchable attributes. To make it facet-searchable add it to the `filterableAttributes` index settings.""###); + }, + ).await; + + test_settings_documents_indexing_swapping_and_facet_search( + &DOCUMENTS, + &json!({"filterableAttributes": [{"patterns": ["genres"], "features": {"facetSearch": false, "filter": {"equality": false, "comparison": false}}}]}), + &json!({"facetName": "genres", "facetQuery": "a"}), + |response, code| { + snapshot!(code, @"400 Bad Request"); + snapshot!(response["message"], @r###""Attribute `genres` is not facet-searchable. This index does not have configured facet-searchable attributes. To make it facet-searchable add it to the `filterableAttributes` index settings.""###); + }, + ).await; + + test_settings_documents_indexing_swapping_and_facet_search( + &NESTED_DOCUMENTS, + &json!({"filterableAttributes": [{"patterns": ["doggos.name"]}]}), + &json!({"facetName": "invalid.name", "facetQuery": "b"}), + |response, code| { + snapshot!(code, @"400 Bad Request"); + snapshot!(response["message"], @r###""Attribute `invalid.name` is not facet-searchable. This index does not have configured facet-searchable attributes. To make it facet-searchable add it to the `filterableAttributes` index settings.""###); + }, + ) + .await; + + test_settings_documents_indexing_swapping_and_facet_search( + &NESTED_DOCUMENTS, + &json!({"filterableAttributes": [{"patterns": ["doggos.name"], "features": {"facetSearch": false, "filter": {"equality": true, "comparison": true}}}]}), + &json!({"facetName": "doggos.name", "facetQuery": "b"}), + |response, code| { + snapshot!(code, @"400 Bad Request"); + snapshot!(response["message"], @r###""Attribute `doggos.name` is not facet-searchable. This index does not have configured facet-searchable attributes. To make it facet-searchable add it to the `filterableAttributes` index settings.""###); + }, + ).await; + + test_settings_documents_indexing_swapping_and_facet_search( + &NESTED_DOCUMENTS, + &json!({"filterableAttributes": [{"patterns": ["doggos.name"], "features": {"facetSearch": false, "filter": {"equality": false, "comparison": false}}}]}), + &json!({"facetName": "doggos.name", "facetQuery": "b"}), + |response, code| { + snapshot!(code, @"400 Bad Request"); + snapshot!(response["message"], @r###""Attribute `doggos.name` is not facet-searchable. This index does not have configured facet-searchable attributes. To make it facet-searchable add it to the `filterableAttributes` index settings.""###); + }, + ).await; +} diff --git a/crates/meilisearch/tests/search/filters.rs b/crates/meilisearch/tests/search/filters.rs new file mode 100644 index 000000000..bb268ccf5 --- /dev/null +++ b/crates/meilisearch/tests/search/filters.rs @@ -0,0 +1,625 @@ +use meili_snap::{json_string, snapshot}; +use meilisearch::Opt; +use tempfile::TempDir; + +use super::test_settings_documents_indexing_swapping_and_search; +use crate::{ + common::{default_settings, shared_index_with_documents, Server, DOCUMENTS, NESTED_DOCUMENTS}, + json, +}; + +#[actix_rt::test] +async fn search_with_filter_string_notation() { + let server = Server::new().await; + let index = server.index("test"); + + let (_, code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await; + meili_snap::snapshot!(code, @"202 Accepted"); + + let documents = DOCUMENTS.clone(); + let (task, code) = index.add_documents(documents, None).await; + meili_snap::snapshot!(code, @"202 Accepted"); + let res = index.wait_task(task.uid()).await; + meili_snap::snapshot!(res["status"], @r###""succeeded""###); + + index + .search( + json!({ + "filter": "title = Gläss" + }), + |response, code| { + assert_eq!(code, 200, "{}", response); + assert_eq!(response["hits"].as_array().unwrap().len(), 1); + }, + ) + .await; + + let index = server.index("nested"); + + let (_, code) = + index.update_settings(json!({"filterableAttributes": ["cattos", "doggos.age"]})).await; + meili_snap::snapshot!(code, @"202 Accepted"); + + let documents = NESTED_DOCUMENTS.clone(); + let (task, code) = index.add_documents(documents, None).await; + meili_snap::snapshot!(code, @"202 Accepted"); + let res = index.wait_task(task.uid()).await; + meili_snap::snapshot!(res["status"], @r###""succeeded""###); + + index + .search( + json!({ + "filter": "cattos = pésti" + }), + |response, code| { + assert_eq!(code, 200, "{}", response); + assert_eq!(response["hits"].as_array().unwrap().len(), 1); + assert_eq!(response["hits"][0]["id"], json!(852)); + }, + ) + .await; + + index + .search( + json!({ + "filter": "doggos.age > 5" + }), + |response, code| { + assert_eq!(code, 200, "{}", response); + assert_eq!(response["hits"].as_array().unwrap().len(), 2); + assert_eq!(response["hits"][0]["id"], json!(654)); + assert_eq!(response["hits"][1]["id"], json!(951)); + }, + ) + .await; +} + +#[actix_rt::test] +async fn search_with_filter_array_notation() { + let index = shared_index_with_documents().await; + let (response, code) = index + .search_post(json!({ + "filter": ["title = Gläss"] + })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!(response["hits"].as_array().unwrap().len(), 1); + + let (response, code) = index + .search_post(json!({ + "filter": [["title = Gläss", "title = \"Shazam!\"", "title = \"Escape Room\""]] + })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!(response["hits"].as_array().unwrap().len(), 3); +} + +#[actix_rt::test] +async fn search_with_contains_filter() { + let temp = TempDir::new().unwrap(); + let server = Server::new_with_options(Opt { + experimental_contains_filter: true, + ..default_settings(temp.path()) + }) + .await + .unwrap(); + let index = server.index("movies"); + + index.update_settings(json!({"filterableAttributes": ["title"]})).await; + + let documents = DOCUMENTS.clone(); + let (request, _code) = index.add_documents(documents, None).await; + index.wait_task(request.uid()).await.succeeded(); + + let (response, code) = index + .search_post(json!({ + "filter": "title CONTAINS cap" + })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!(response["hits"].as_array().unwrap().len(), 2); +} + +#[actix_rt::test] +async fn search_with_pattern_filter_settings() { + // Check if the Equality filter works with patterns + test_settings_documents_indexing_swapping_and_search( + &NESTED_DOCUMENTS, + &json!({"filterableAttributes": [{"patterns": ["cattos","doggos.age"]}]}), + &json!({ + "filter": "cattos = pésti" + }), + |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 852, + "father": "jean", + "mother": "michelle", + "doggos": [ + { + "name": "bobby", + "age": 2 + }, + { + "name": "buddy", + "age": 4 + } + ], + "cattos": "pésti" + } + ] + "###); + }, + ) + .await; + + test_settings_documents_indexing_swapping_and_search( + &NESTED_DOCUMENTS, + &json!({"filterableAttributes": [{ + "patterns": ["cattos","doggos.age"], + "features": { + "facetSearch": false, + "filter": {"equality": true, "comparison": false} + } + }]}), + &json!({ + "filter": "cattos = pésti" + }), + |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 852, + "father": "jean", + "mother": "michelle", + "doggos": [ + { + "name": "bobby", + "age": 2 + }, + { + "name": "buddy", + "age": 4 + } + ], + "cattos": "pésti" + } + ] + "###); + }, + ) + .await; + + // Check if the Comparison filter works with patterns + test_settings_documents_indexing_swapping_and_search( + &NESTED_DOCUMENTS, + &json!({"filterableAttributes": [{ + "patterns": ["cattos","doggos.age"], + "features": { + "facetSearch": false, + "filter": {"equality": false, "comparison": true} + } + }]}), + &json!({ + "filter": "doggos.age > 2" + }), + |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 852, + "father": "jean", + "mother": "michelle", + "doggos": [ + { + "name": "bobby", + "age": 2 + }, + { + "name": "buddy", + "age": 4 + } + ], + "cattos": "pésti" + }, + { + "id": 654, + "father": "pierre", + "mother": "sabine", + "doggos": [ + { + "name": "gros bill", + "age": 8 + } + ], + "cattos": [ + "simba", + "pestiféré" + ] + }, + { + "id": 951, + "father": "jean-baptiste", + "mother": "sophie", + "doggos": [ + { + "name": "turbo", + "age": 5 + }, + { + "name": "fast", + "age": 6 + } + ], + "cattos": [ + "moumoute", + "gomez" + ] + } + ] + "###); + }, + ) + .await; +} + +#[actix_rt::test] +async fn search_with_pattern_filter_settings_scenario_1() { + let temp = TempDir::new().unwrap(); + let server = Server::new_with_options(Opt { ..default_settings(temp.path()) }).await.unwrap(); + + eprintln!("Documents -> Settings -> test"); + let index = server.index("test"); + + let (task, code) = index.add_documents(NESTED_DOCUMENTS.clone(), None).await; + assert_eq!(code, 202, "{}", task); + let response = index.wait_task(task.uid()).await; + snapshot!(response["status"], @r###""succeeded""###); + + let (task, code) = index + .update_settings(json!({"filterableAttributes": [{ + "patterns": ["cattos","doggos.age"], + "features": { + "facetSearch": false, + "filter": {"equality": true, "comparison": false} + } + }]})) + .await; + assert_eq!(code, 202, "{}", task); + let response = index.wait_task(task.uid()).await; + snapshot!(response["status"], @r###""succeeded""###); + + // Check if the Equality filter works + index + .search( + json!({ + "filter": "cattos = pésti" + }), + |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 852, + "father": "jean", + "mother": "michelle", + "doggos": [ + { + "name": "bobby", + "age": 2 + }, + { + "name": "buddy", + "age": 4 + } + ], + "cattos": "pésti" + } + ] + "###); + }, + ) + .await; + + // Check if the Comparison filter returns an error + index + .search( + json!({ + "filter": "doggos.age > 2" + }), + |response, code| { + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "Index `test`: Filter operator `>` is not allowed for the attribute `doggos.age`, allowed operators: OR, AND, NOT, =, !=, IN, IS EMPTY, IS NULL, EXISTS.", + "code": "invalid_search_filter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_search_filter" + } + "###); + }, + ) + .await; + + // Update the settings activate comparison filter + let (task, code) = index + .update_settings(json!({"filterableAttributes": [{ + "patterns": ["cattos","doggos.age"], + "features": { + "facetSearch": false, + "filter": {"equality": true, "comparison": true} + } + }]})) + .await; + assert_eq!(code, 202, "{}", task); + let response = index.wait_task(task.uid()).await; + snapshot!(response["status"], @r###""succeeded""###); + + // Check if the Equality filter works + index + .search( + json!({ + "filter": "cattos = pésti" + }), + |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 852, + "father": "jean", + "mother": "michelle", + "doggos": [ + { + "name": "bobby", + "age": 2 + }, + { + "name": "buddy", + "age": 4 + } + ], + "cattos": "pésti" + } + ] + "###); + }, + ) + .await; + + // Check if the Comparison filter works + index + .search( + json!({ + "filter": "doggos.age > 2" + }), + |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 852, + "father": "jean", + "mother": "michelle", + "doggos": [ + { + "name": "bobby", + "age": 2 + }, + { + "name": "buddy", + "age": 4 + } + ], + "cattos": "pésti" + }, + { + "id": 654, + "father": "pierre", + "mother": "sabine", + "doggos": [ + { + "name": "gros bill", + "age": 8 + } + ], + "cattos": [ + "simba", + "pestiféré" + ] + }, + { + "id": 951, + "father": "jean-baptiste", + "mother": "sophie", + "doggos": [ + { + "name": "turbo", + "age": 5 + }, + { + "name": "fast", + "age": 6 + } + ], + "cattos": [ + "moumoute", + "gomez" + ] + } + ] + "###); + }, + ) + .await; + + // Update the settings deactivate equality filter + let (task, code) = index + .update_settings(json!({"filterableAttributes": [{ + "patterns": ["cattos","doggos.age"], + "features": { + "facetSearch": false, + "filter": {"equality": false, "comparison": true} + } + }]})) + .await; + assert_eq!(code, 202, "{}", task); + let response = index.wait_task(task.uid()).await; + snapshot!(response["status"], @r###""succeeded""###); + + // Check if the Equality filter returns an error + index + .search( + json!({ + "filter": "cattos = pésti" + }), + |response, code| { + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "Index `test`: Filter operator `=` is not allowed for the attribute `cattos`, allowed operators: OR, AND, NOT, <, >, <=, >=, TO, IS EMPTY, IS NULL, EXISTS.", + "code": "invalid_search_filter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_search_filter" + } + "###); + }, + ) + .await; + + // Check if the Comparison filter works + index + .search( + json!({ + "filter": "doggos.age > 2" + }), + |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 852, + "father": "jean", + "mother": "michelle", + "doggos": [ + { + "name": "bobby", + "age": 2 + }, + { + "name": "buddy", + "age": 4 + } + ], + "cattos": "pésti" + }, + { + "id": 654, + "father": "pierre", + "mother": "sabine", + "doggos": [ + { + "name": "gros bill", + "age": 8 + } + ], + "cattos": [ + "simba", + "pestiféré" + ] + }, + { + "id": 951, + "father": "jean-baptiste", + "mother": "sophie", + "doggos": [ + { + "name": "turbo", + "age": 5 + }, + { + "name": "fast", + "age": 6 + } + ], + "cattos": [ + "moumoute", + "gomez" + ] + } + ] + "###); + }, + ) + .await; + + // rollback the settings + let (task, code) = index + .update_settings(json!({"filterableAttributes": [{ + "patterns": ["cattos","doggos.age"], + "features": { + "facetSearch": false, + "filter": {"equality": true, "comparison": false} + } + }]})) + .await; + assert_eq!(code, 202, "{}", task); + let response = index.wait_task(task.uid()).await; + snapshot!(response["status"], @r###""succeeded""###); + + // Check if the Equality filter works + index + .search( + json!({ + "filter": "cattos = pésti" + }), + |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 852, + "father": "jean", + "mother": "michelle", + "doggos": [ + { + "name": "bobby", + "age": 2 + }, + { + "name": "buddy", + "age": 4 + } + ], + "cattos": "pésti" + } + ] + "###); + }, + ) + .await; + + // Check if the Comparison filter returns an error + index + .search( + json!({ + "filter": "doggos.age > 2" + }), + |response, code| { + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "Index `test`: Filter operator `>` is not allowed for the attribute `doggos.age`, allowed operators: OR, AND, NOT, =, !=, IN, IS EMPTY, IS NULL, EXISTS.", + "code": "invalid_search_filter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_search_filter" + } + "###); + }, + ) + .await; +} diff --git a/crates/meilisearch/tests/search/geo.rs b/crates/meilisearch/tests/search/geo.rs index b0cc8b6ca..a314ca241 100644 --- a/crates/meilisearch/tests/search/geo.rs +++ b/crates/meilisearch/tests/search/geo.rs @@ -1,9 +1,12 @@ use meili_snap::{json_string, snapshot}; +use meilisearch_types::milli::constants::RESERVED_GEO_FIELD_NAME; use once_cell::sync::Lazy; use crate::common::{Server, Value}; use crate::json; +use super::test_settings_documents_indexing_swapping_and_search; + static DOCUMENTS: Lazy = Lazy::new(|| { json!([ { @@ -184,3 +187,184 @@ async fn bug_4640() { ) .await; } + +#[actix_rt::test] +async fn geo_asc_with_words() { + let documents = json!([ + { "id": 0, "doggo": "jean", RESERVED_GEO_FIELD_NAME: { "lat": 0, "lng": 0 } }, + { "id": 1, "doggo": "intel", RESERVED_GEO_FIELD_NAME: { "lat": 88, "lng": 0 } }, + { "id": 2, "doggo": "jean bob", RESERVED_GEO_FIELD_NAME: { "lat": -89, "lng": 0 } }, + { "id": 3, "doggo": "jean michel", RESERVED_GEO_FIELD_NAME: { "lat": 0, "lng": 178 } }, + { "id": 4, "doggo": "bob marley", RESERVED_GEO_FIELD_NAME: { "lat": 0, "lng": -179 } }, + ]); + + test_settings_documents_indexing_swapping_and_search( + &documents, + &json!({"searchableAttributes": ["id", "doggo"], "rankingRules": ["words", "geo:asc"]}), + &json!({"q": "jean"}), + |response, code| { + assert_eq!(code, 200, "{}", response); + snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###" + { + "hits": [ + { + "id": 0, + "doggo": "jean", + "_geo": { + "lat": 0, + "lng": 0 + } + }, + { + "id": 2, + "doggo": "jean bob", + "_geo": { + "lat": -89, + "lng": 0 + } + }, + { + "id": 3, + "doggo": "jean michel", + "_geo": { + "lat": 0, + "lng": 178 + } + } + ], + "query": "jean", + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 3 + } + "###); + }, + ) + .await; + + test_settings_documents_indexing_swapping_and_search( + &documents, + &json!({"searchableAttributes": ["id", "doggo"], "rankingRules": ["words", "geo:asc"]}), + &json!({"q": "bob"}), + |response, code| { + assert_eq!(code, 200, "{}", response); + snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###" + { + "hits": [ + { + "id": 2, + "doggo": "jean bob", + "_geo": { + "lat": -89, + "lng": 0 + } + }, + { + "id": 4, + "doggo": "bob marley", + "_geo": { + "lat": 0, + "lng": -179 + } + } + ], + "query": "bob", + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 2 + } + "###); + }, + ) + .await; + + test_settings_documents_indexing_swapping_and_search( + &documents, + &json!({"searchableAttributes": ["id", "doggo"], "rankingRules": ["words", "geo:asc"]}), + &json!({"q": "intel"}), + |response, code| { + assert_eq!(code, 200, "{}", response); + snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###" + { + "hits": [ + { + "id": 1, + "doggo": "intel", + "_geo": { + "lat": 88, + "lng": 0 + } + } + ], + "query": "intel", + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 1 + } + "###); + }, + ) + .await; +} + +#[actix_rt::test] +async fn geo_sort_with_words() { + let documents = json!([ + { "id": 0, "doggo": "jean", RESERVED_GEO_FIELD_NAME: { "lat": 0, "lng": 0 } }, + { "id": 1, "doggo": "intel", RESERVED_GEO_FIELD_NAME: { "lat": 88, "lng": 0 } }, + { "id": 2, "doggo": "jean bob", RESERVED_GEO_FIELD_NAME: { "lat": -89, "lng": 0 } }, + { "id": 3, "doggo": "jean michel", RESERVED_GEO_FIELD_NAME: { "lat": 0, "lng": 178 } }, + { "id": 4, "doggo": "bob marley", RESERVED_GEO_FIELD_NAME: { "lat": 0, "lng": -179 } }, + ]); + + test_settings_documents_indexing_swapping_and_search( + &documents, + &json!({"searchableAttributes": ["id", "doggo"], "rankingRules": ["words", "sort"], "sortableAttributes": [RESERVED_GEO_FIELD_NAME]}), + &json!({"q": "jean", "sort": ["_geoPoint(0.0, 0.0):asc"]}), + |response, code| { + assert_eq!(code, 200, "{}", response); + snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###" + { + "hits": [ + { + "id": 0, + "doggo": "jean", + "_geo": { + "lat": 0, + "lng": 0 + }, + "_geoDistance": 0 + }, + { + "id": 2, + "doggo": "jean bob", + "_geo": { + "lat": -89, + "lng": 0 + }, + "_geoDistance": 9896348 + }, + { + "id": 3, + "doggo": "jean michel", + "_geo": { + "lat": 0, + "lng": 178 + }, + "_geoDistance": 19792697 + } + ], + "query": "jean", + "processingTimeMs": "[time]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 3 + } + "###); + }, + ) + .await; +} diff --git a/crates/meilisearch/tests/search/mod.rs b/crates/meilisearch/tests/search/mod.rs index a5fa94eea..2f3e60f34 100644 --- a/crates/meilisearch/tests/search/mod.rs +++ b/crates/meilisearch/tests/search/mod.rs @@ -4,6 +4,7 @@ mod distinct; mod errors; mod facet_search; +mod filters; mod formatted; mod geo; mod hybrid; @@ -21,10 +22,58 @@ use tempfile::TempDir; use crate::common::{ default_settings, shared_index_with_documents, shared_index_with_nested_documents, Server, - DOCUMENTS, FRUITS_DOCUMENTS, NESTED_DOCUMENTS, SCORE_DOCUMENTS, VECTOR_DOCUMENTS, + Value, DOCUMENTS, FRUITS_DOCUMENTS, NESTED_DOCUMENTS, SCORE_DOCUMENTS, VECTOR_DOCUMENTS, }; use crate::json; +async fn test_settings_documents_indexing_swapping_and_search( + documents: &Value, + settings: &Value, + query: &Value, + test: impl Fn(Value, actix_http::StatusCode) + std::panic::UnwindSafe + Clone, +) { + let temp = TempDir::new().unwrap(); + let server = Server::new_with_options(Opt { ..default_settings(temp.path()) }).await.unwrap(); + + eprintln!("Documents -> Settings -> test"); + let index = server.index("test"); + + let (task, code) = index.add_documents(documents.clone(), None).await; + assert_eq!(code, 202, "{}", task); + let response = index.wait_task(task.uid()).await; + assert!(response.is_success(), "{:?}", response); + + let (task, code) = index.update_settings(settings.clone()).await; + assert_eq!(code, 202, "{}", task); + let response = index.wait_task(task.uid()).await; + assert!(response.is_success(), "{:?}", response); + + index.search(query.clone(), test.clone()).await; + let (task, code) = server.delete_index("test").await; + assert_eq!(code, 202, "{}", task); + let response = server.wait_task(task.uid()).await; + assert!(response.is_success(), "{:?}", response); + + eprintln!("Settings -> Documents -> test"); + let index = server.index("test"); + + let (task, code) = index.update_settings(settings.clone()).await; + assert_eq!(code, 202, "{}", task); + let response = index.wait_task(task.uid()).await; + assert!(response.is_success(), "{:?}", response); + + let (task, code) = index.add_documents(documents.clone(), None).await; + assert_eq!(code, 202, "{}", task); + let response = index.wait_task(task.uid()).await; + assert!(response.is_success(), "{:?}", response); + + index.search(query.clone(), test.clone()).await; + let (task, code) = server.delete_index("test").await; + assert_eq!(code, 202, "{}", task); + let response = server.wait_task(task.uid()).await; + assert!(response.is_success(), "{:?}", response); +} + #[actix_rt::test] async fn simple_placeholder_search() { let index = shared_index_with_documents().await; @@ -355,118 +404,6 @@ async fn search_multiple_params() { .await; } -#[actix_rt::test] -async fn search_with_filter_string_notation() { - let server = Server::new().await; - let index = server.index("test"); - - let (_, code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await; - meili_snap::snapshot!(code, @"202 Accepted"); - - let documents = DOCUMENTS.clone(); - let (task, code) = index.add_documents(documents, None).await; - meili_snap::snapshot!(code, @"202 Accepted"); - let res = index.wait_task(task.uid()).await; - meili_snap::snapshot!(res["status"], @r###""succeeded""###); - - index - .search( - json!({ - "filter": "title = Gläss" - }), - |response, code| { - assert_eq!(code, 200, "{}", response); - assert_eq!(response["hits"].as_array().unwrap().len(), 1); - }, - ) - .await; - - let index = server.index("nested"); - - let (_, code) = - index.update_settings(json!({"filterableAttributes": ["cattos", "doggos.age"]})).await; - meili_snap::snapshot!(code, @"202 Accepted"); - - let documents = NESTED_DOCUMENTS.clone(); - let (task, code) = index.add_documents(documents, None).await; - meili_snap::snapshot!(code, @"202 Accepted"); - let res = index.wait_task(task.uid()).await; - meili_snap::snapshot!(res["status"], @r###""succeeded""###); - - index - .search( - json!({ - "filter": "cattos = pésti" - }), - |response, code| { - assert_eq!(code, 200, "{}", response); - assert_eq!(response["hits"].as_array().unwrap().len(), 1); - assert_eq!(response["hits"][0]["id"], json!(852)); - }, - ) - .await; - - index - .search( - json!({ - "filter": "doggos.age > 5" - }), - |response, code| { - assert_eq!(code, 200, "{}", response); - assert_eq!(response["hits"].as_array().unwrap().len(), 2); - assert_eq!(response["hits"][0]["id"], json!(654)); - assert_eq!(response["hits"][1]["id"], json!(951)); - }, - ) - .await; -} - -#[actix_rt::test] -async fn search_with_filter_array_notation() { - let index = shared_index_with_documents().await; - let (response, code) = index - .search_post(json!({ - "filter": ["title = Gläss"] - })) - .await; - assert_eq!(code, 200, "{}", response); - assert_eq!(response["hits"].as_array().unwrap().len(), 1); - - let (response, code) = index - .search_post(json!({ - "filter": [["title = Gläss", "title = \"Shazam!\"", "title = \"Escape Room\""]] - })) - .await; - assert_eq!(code, 200, "{}", response); - assert_eq!(response["hits"].as_array().unwrap().len(), 3); -} - -#[actix_rt::test] -async fn search_with_contains_filter() { - let temp = TempDir::new().unwrap(); - let server = Server::new_with_options(Opt { - experimental_contains_filter: true, - ..default_settings(temp.path()) - }) - .await - .unwrap(); - let index = server.index("movies"); - - index.update_settings(json!({"filterableAttributes": ["title"]})).await; - - let documents = DOCUMENTS.clone(); - let (request, _code) = index.add_documents(documents, None).await; - index.wait_task(request.uid()).await.succeeded(); - - let (response, code) = index - .search_post(json!({ - "filter": "title CONTAINS cap" - })) - .await; - assert_eq!(code, 200, "{}", response); - assert_eq!(response["hits"].as_array().unwrap().len(), 2); -} - #[actix_rt::test] async fn search_with_sort_on_numbers() { let index = shared_index_with_documents().await; @@ -589,7 +526,7 @@ async fn search_facet_distribution() { |response, code| { assert_eq!(code, 200, "{}", response); let dist = response["facetDistribution"].as_object().unwrap(); - assert_eq!(dist.len(), 1); + assert_eq!(dist.len(), 1, "{:?}", dist); assert_eq!( dist["doggos.name"], json!({ "bobby": 1, "buddy": 1, "gros bill": 1, "turbo": 1, "fast": 1}) @@ -606,7 +543,7 @@ async fn search_facet_distribution() { |response, code| { assert_eq!(code, 200, "{}", response); let dist = response["facetDistribution"].as_object().unwrap(); - assert_eq!(dist.len(), 3); + assert_eq!(dist.len(), 3, "{:?}", dist); assert_eq!( dist["doggos.name"], json!({ "bobby": 1, "buddy": 1, "gros bill": 1, "turbo": 1, "fast": 1}) @@ -1559,6 +1496,293 @@ async fn change_attributes_settings() { .await; } +#[actix_rt::test] +async fn test_nested_fields() { + let documents = json!([ + { + "id": 0, + "title": "The zeroth document", + }, + { + "id": 1, + "title": "The first document", + "nested": { + "object": "field", + "machin": "bidule", + }, + }, + { + "id": 2, + "title": "The second document", + "nested": [ + "array", + { + "object": "field", + }, + { + "prout": "truc", + "machin": "lol", + }, + ], + }, + { + "id": 3, + "title": "The third document", + "nested": "I lied", + }, + ]); + + let settings = json!({ + "searchableAttributes": ["title", "nested.object", "nested.machin"], + "filterableAttributes": ["title", "nested.object", "nested.machin"] + }); + + // Test empty search returns all documents + test_settings_documents_indexing_swapping_and_search( + &documents, + &settings, + &json!({"q": "document"}), + |response, code| { + assert_eq!(code, 200, "{}", response); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 0, + "title": "The zeroth document" + }, + { + "id": 1, + "title": "The first document", + "nested": { + "object": "field", + "machin": "bidule" + } + }, + { + "id": 2, + "title": "The second document", + "nested": [ + "array", + { + "object": "field" + }, + { + "prout": "truc", + "machin": "lol" + } + ] + }, + { + "id": 3, + "title": "The third document", + "nested": "I lied" + } + ] + "###); + }, + ) + .await; + + // Test searching specific documents + test_settings_documents_indexing_swapping_and_search( + &documents, + &settings, + &json!({"q": "zeroth"}), + |response, code| { + assert_eq!(code, 200, "{}", response); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 0, + "title": "The zeroth document" + } + ] + "###); + }, + ) + .await; + + test_settings_documents_indexing_swapping_and_search( + &documents, + &settings, + &json!({"q": "first"}), + |response, code| { + assert_eq!(code, 200, "{}", response); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 1, + "title": "The first document", + "nested": { + "object": "field", + "machin": "bidule" + } + } + ] + "###); + }, + ) + .await; + + // Test searching nested fields + test_settings_documents_indexing_swapping_and_search( + &documents, + &settings, + &json!({"q": "field"}), + |response, code| { + assert_eq!(code, 200, "{}", response); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 1, + "title": "The first document", + "nested": { + "object": "field", + "machin": "bidule" + } + }, + { + "id": 2, + "title": "The second document", + "nested": [ + "array", + { + "object": "field" + }, + { + "prout": "truc", + "machin": "lol" + } + ] + } + ] + "###); + }, + ) + .await; + + test_settings_documents_indexing_swapping_and_search( + &documents, + &settings, + &json!({"q": "array"}), + |response, code| { + assert_eq!(code, 200, "{}", response); + // nested is not searchable + snapshot!(json_string!(response["hits"]), @"[]"); + }, + ) + .await; + + test_settings_documents_indexing_swapping_and_search( + &documents, + &settings, + &json!({"q": "lied"}), + |response, code| { + assert_eq!(code, 200, "{}", response); + // nested is not searchable + snapshot!(json_string!(response["hits"]), @"[]"); + }, + ) + .await; + + // Test filtering on nested fields + test_settings_documents_indexing_swapping_and_search( + &documents, + &settings, + &json!({"filter": "nested.object = field"}), + |response, code| { + assert_eq!(code, 200, "{}", response); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 1, + "title": "The first document", + "nested": { + "object": "field", + "machin": "bidule" + } + }, + { + "id": 2, + "title": "The second document", + "nested": [ + "array", + { + "object": "field" + }, + { + "prout": "truc", + "machin": "lol" + } + ] + } + ] + "###); + }, + ) + .await; + + test_settings_documents_indexing_swapping_and_search( + &documents, + &settings, + &json!({"filter": "nested.machin = bidule"}), + |response, code| { + assert_eq!(code, 200, "{}", response); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 1, + "title": "The first document", + "nested": { + "object": "field", + "machin": "bidule" + } + } + ] + "###); + }, + ) + .await; + + // Test filtering on non-filterable nested field fails + test_settings_documents_indexing_swapping_and_search( + &documents, + &settings, + &json!({"filter": "nested = array"}), + |response, code| { + assert_eq!(code, 400, "{}", response); + snapshot!(json_string!(response), @r###" + { + "message": "Index `test`: Attribute `nested` is not filterable. Available filterable attributes are: `nested.machin`, `nested.object`, `title`.\n1:7 nested = array", + "code": "invalid_search_filter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_search_filter" + } + "###); + }, + ) + .await; + + // Test filtering on non-filterable nested field fails + test_settings_documents_indexing_swapping_and_search( + &documents, + &settings, + &json!({"filter": r#"nested = "I lied""#}), + |response, code| { + assert_eq!(code, 400, "{}", response); + snapshot!(json_string!(response), @r###" + { + "message": "Index `test`: Attribute `nested` is not filterable. Available filterable attributes are: `nested.machin`, `nested.object`, `title`.\n1:7 nested = \"I lied\"", + "code": "invalid_search_filter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_search_filter" + } + "###); + }, + ) + .await; +} + /// Modifying facets with different casing should work correctly #[actix_rt::test] async fn change_facet_casing() { From 23e07f1a9352ee160d47e986f6fe936136eb4486 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Thu, 27 Feb 2025 16:47:57 +0100 Subject: [PATCH 12/35] Attribute positions changed in snapshots **Reason:** Only the existing field are registered in the fieldid_map --- ...__attribute_position_different_fields.snap | 58 +++++++++---------- ...e_position__attribute_position_ngrams.snap | 58 +++++++++---------- ...position__attribute_position_repeated.snap | 22 +++---- ...position__attribute_position_simple-2.snap | 58 +++++++++---------- 4 files changed, 98 insertions(+), 98 deletions(-) diff --git a/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_different_fields.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_different_fields.snap index 2626ee7d4..bf5b14f47 100644 --- a/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_different_fields.snap +++ b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_different_fields.snap @@ -1,5 +1,5 @@ --- -source: milli/src/search/new/tests/attribute_position.rs +source: crates/milli/src/search/new/tests/attribute_position.rs expression: "format!(\"{document_ids_scores:#?}\")" --- [ @@ -8,8 +8,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 5, - max_rank: 5, + rank: 3, + max_rank: 3, }, ), Position( @@ -25,8 +25,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 5, - max_rank: 5, + rank: 3, + max_rank: 3, }, ), Position( @@ -42,8 +42,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 5, - max_rank: 5, + rank: 3, + max_rank: 3, }, ), Position( @@ -59,8 +59,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 5, - max_rank: 5, + rank: 3, + max_rank: 3, }, ), Position( @@ -76,8 +76,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 5, - max_rank: 5, + rank: 3, + max_rank: 3, }, ), Position( @@ -93,8 +93,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 5, - max_rank: 5, + rank: 3, + max_rank: 3, }, ), Position( @@ -110,8 +110,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 5, - max_rank: 5, + rank: 3, + max_rank: 3, }, ), Position( @@ -127,8 +127,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 5, - max_rank: 5, + rank: 3, + max_rank: 3, }, ), Position( @@ -144,8 +144,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 5, - max_rank: 5, + rank: 3, + max_rank: 3, }, ), Position( @@ -161,8 +161,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 5, - max_rank: 5, + rank: 3, + max_rank: 3, }, ), Position( @@ -178,8 +178,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 5, - max_rank: 5, + rank: 3, + max_rank: 3, }, ), Position( @@ -195,8 +195,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 5, - max_rank: 5, + rank: 3, + max_rank: 3, }, ), Position( @@ -212,8 +212,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 5, - max_rank: 5, + rank: 3, + max_rank: 3, }, ), Position( @@ -229,8 +229,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 5, - max_rank: 5, + rank: 3, + max_rank: 3, }, ), Position( diff --git a/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_ngrams.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_ngrams.snap index 2626ee7d4..bf5b14f47 100644 --- a/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_ngrams.snap +++ b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_ngrams.snap @@ -1,5 +1,5 @@ --- -source: milli/src/search/new/tests/attribute_position.rs +source: crates/milli/src/search/new/tests/attribute_position.rs expression: "format!(\"{document_ids_scores:#?}\")" --- [ @@ -8,8 +8,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 5, - max_rank: 5, + rank: 3, + max_rank: 3, }, ), Position( @@ -25,8 +25,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 5, - max_rank: 5, + rank: 3, + max_rank: 3, }, ), Position( @@ -42,8 +42,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 5, - max_rank: 5, + rank: 3, + max_rank: 3, }, ), Position( @@ -59,8 +59,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 5, - max_rank: 5, + rank: 3, + max_rank: 3, }, ), Position( @@ -76,8 +76,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 5, - max_rank: 5, + rank: 3, + max_rank: 3, }, ), Position( @@ -93,8 +93,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 5, - max_rank: 5, + rank: 3, + max_rank: 3, }, ), Position( @@ -110,8 +110,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 5, - max_rank: 5, + rank: 3, + max_rank: 3, }, ), Position( @@ -127,8 +127,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 5, - max_rank: 5, + rank: 3, + max_rank: 3, }, ), Position( @@ -144,8 +144,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 5, - max_rank: 5, + rank: 3, + max_rank: 3, }, ), Position( @@ -161,8 +161,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 5, - max_rank: 5, + rank: 3, + max_rank: 3, }, ), Position( @@ -178,8 +178,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 5, - max_rank: 5, + rank: 3, + max_rank: 3, }, ), Position( @@ -195,8 +195,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 5, - max_rank: 5, + rank: 3, + max_rank: 3, }, ), Position( @@ -212,8 +212,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 5, - max_rank: 5, + rank: 3, + max_rank: 3, }, ), Position( @@ -229,8 +229,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 5, - max_rank: 5, + rank: 3, + max_rank: 3, }, ), Position( diff --git a/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_repeated.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_repeated.snap index 73dec5f8b..af35d0d8d 100644 --- a/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_repeated.snap +++ b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_repeated.snap @@ -1,5 +1,5 @@ --- -source: milli/src/search/new/tests/attribute_position.rs +source: crates/milli/src/search/new/tests/attribute_position.rs expression: "format!(\"{document_ids_scores:#?}\")" --- [ @@ -8,8 +8,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 11, - max_rank: 11, + rank: 6, + max_rank: 6, }, ), Position( @@ -25,8 +25,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 11, - max_rank: 11, + rank: 6, + max_rank: 6, }, ), Position( @@ -42,8 +42,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 11, - max_rank: 11, + rank: 6, + max_rank: 6, }, ), Position( @@ -59,8 +59,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 11, - max_rank: 11, + rank: 6, + max_rank: 6, }, ), Position( @@ -76,8 +76,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 11, - max_rank: 11, + rank: 6, + max_rank: 6, }, ), Position( diff --git a/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_simple-2.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_simple-2.snap index 2626ee7d4..bf5b14f47 100644 --- a/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_simple-2.snap +++ b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_simple-2.snap @@ -1,5 +1,5 @@ --- -source: milli/src/search/new/tests/attribute_position.rs +source: crates/milli/src/search/new/tests/attribute_position.rs expression: "format!(\"{document_ids_scores:#?}\")" --- [ @@ -8,8 +8,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 5, - max_rank: 5, + rank: 3, + max_rank: 3, }, ), Position( @@ -25,8 +25,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 5, - max_rank: 5, + rank: 3, + max_rank: 3, }, ), Position( @@ -42,8 +42,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 5, - max_rank: 5, + rank: 3, + max_rank: 3, }, ), Position( @@ -59,8 +59,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 5, - max_rank: 5, + rank: 3, + max_rank: 3, }, ), Position( @@ -76,8 +76,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 5, - max_rank: 5, + rank: 3, + max_rank: 3, }, ), Position( @@ -93,8 +93,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 5, - max_rank: 5, + rank: 3, + max_rank: 3, }, ), Position( @@ -110,8 +110,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 5, - max_rank: 5, + rank: 3, + max_rank: 3, }, ), Position( @@ -127,8 +127,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 5, - max_rank: 5, + rank: 3, + max_rank: 3, }, ), Position( @@ -144,8 +144,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 5, - max_rank: 5, + rank: 3, + max_rank: 3, }, ), Position( @@ -161,8 +161,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 5, - max_rank: 5, + rank: 3, + max_rank: 3, }, ), Position( @@ -178,8 +178,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 5, - max_rank: 5, + rank: 3, + max_rank: 3, }, ), Position( @@ -195,8 +195,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 5, - max_rank: 5, + rank: 3, + max_rank: 3, }, ), Position( @@ -212,8 +212,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 5, - max_rank: 5, + rank: 3, + max_rank: 3, }, ), Position( @@ -229,8 +229,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 5, - max_rank: 5, + rank: 3, + max_rank: 3, }, ), Position( From d35470e29b0cad83cce66f48c1bccb1c0137f722 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Thu, 27 Feb 2025 16:48:53 +0100 Subject: [PATCH 13/35] Update dumps **Impact:** - dump import --- crates/dump/src/reader/compat/v5_to_v6.rs | 11 ++++++++++- crates/dump/src/reader/v6/mod.rs | 2 ++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/crates/dump/src/reader/compat/v5_to_v6.rs b/crates/dump/src/reader/compat/v5_to_v6.rs index 2dd4ed761..6b63e7c6b 100644 --- a/crates/dump/src/reader/compat/v5_to_v6.rs +++ b/crates/dump/src/reader/compat/v5_to_v6.rs @@ -322,7 +322,16 @@ impl From> for v6::Settings { v6::Settings { displayed_attributes: v6::Setting::from(settings.displayed_attributes).into(), searchable_attributes: v6::Setting::from(settings.searchable_attributes).into(), - filterable_attributes: settings.filterable_attributes.into(), + filterable_attributes: match settings.filterable_attributes { + v5::settings::Setting::Set(filterable_attributes) => v6::Setting::Set( + filterable_attributes + .into_iter() + .map(v6::FilterableAttributesRule::Field) + .collect(), + ), + v5::settings::Setting::Reset => v6::Setting::Reset, + v5::settings::Setting::NotSet => v6::Setting::NotSet, + }, sortable_attributes: settings.sortable_attributes.into(), ranking_rules: { match settings.ranking_rules { diff --git a/crates/dump/src/reader/v6/mod.rs b/crates/dump/src/reader/v6/mod.rs index d9ceec114..0b4ba5bdd 100644 --- a/crates/dump/src/reader/v6/mod.rs +++ b/crates/dump/src/reader/v6/mod.rs @@ -46,6 +46,8 @@ pub type ResponseError = meilisearch_types::error::ResponseError; pub type Code = meilisearch_types::error::Code; pub type RankingRuleView = meilisearch_types::settings::RankingRuleView; +pub type FilterableAttributesRule = meilisearch_types::milli::FilterableAttributesRule; + pub struct V6Reader { dump: TempDir, instance_uid: Option, From 0401c4e51175917e2a21df386cdcd3bb7b34fe9b Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Mon, 3 Mar 2025 16:08:21 +0100 Subject: [PATCH 14/35] Add a settings API test --- .../tests/settings/get_settings.rs | 61 +++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/crates/meilisearch/tests/settings/get_settings.rs b/crates/meilisearch/tests/settings/get_settings.rs index 2a7d713f2..16ab9a7ae 100644 --- a/crates/meilisearch/tests/settings/get_settings.rs +++ b/crates/meilisearch/tests/settings/get_settings.rs @@ -1,3 +1,5 @@ +use meili_snap::{json_string, snapshot}; + use crate::common::Server; use crate::json; @@ -510,3 +512,62 @@ async fn set_and_reset_distinct_attribute_with_dedicated_route() { assert_eq!(response, json!(null)); } + +#[actix_rt::test] +async fn granular_filterable_attributes() { + let server = Server::new().await; + let index = server.index("test"); + index.create(None).await; + + let (response, code) = + index.update_settings(json!({ "filterableAttributes": [ + { "patterns": ["name"], "features": { "facetSearch": true, "filter": {"equality": true, "comparison": false} } }, + { "patterns": ["age"], "features": { "facetSearch": false, "filter": {"equality": true, "comparison": true} } }, + { "patterns": ["id"] } + ] })).await; + assert_eq!(code, 202); + index.wait_task(response.uid()).await.succeeded(); + + let (response, code) = index.settings().await; + assert_eq!(code, 200, "{}", response); + snapshot!(json_string!(response["filterableAttributes"]), @r###" + [ + { + "patterns": [ + "name" + ], + "features": { + "facetSearch": true, + "filter": { + "equality": true, + "comparison": false + } + } + }, + { + "patterns": [ + "age" + ], + "features": { + "facetSearch": false, + "filter": { + "equality": true, + "comparison": true + } + } + }, + { + "patterns": [ + "id" + ], + "features": { + "facetSearch": false, + "filter": { + "equality": true, + "comparison": false + } + } + } + ] + "###); +} From a7a62e5e4c2603907cbe1699b267dc6deac74994 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Wed, 5 Mar 2025 08:49:18 +0100 Subject: [PATCH 15/35] Add some documentation in modules --- crates/milli/src/attribute_patterns.rs | 23 +++++++++ .../milli/src/filterable_attributes_rules.rs | 50 +++++++++++++++++++ 2 files changed, 73 insertions(+) diff --git a/crates/milli/src/attribute_patterns.rs b/crates/milli/src/attribute_patterns.rs index baf239c3f..b08341bd3 100644 --- a/crates/milli/src/attribute_patterns.rs +++ b/crates/milli/src/attribute_patterns.rs @@ -28,6 +28,7 @@ impl From> for AttributePatterns { } impl AttributePatterns { + /// Match a string against the attribute patterns using the match_pattern function. pub fn match_str(&self, str: &str) -> PatternMatch { let mut pattern_match = PatternMatch::NoMatch; for pattern in &self.patterns { @@ -41,22 +42,35 @@ impl AttributePatterns { } } +/// Match a string against a pattern. +/// +/// The pattern can be a wildcard, a prefix, a suffix or an exact match. +/// +/// # Arguments +/// +/// * `pattern` - The pattern to match against. +/// * `str` - The string to match against the pattern. fn match_pattern(pattern: &str, str: &str) -> PatternMatch { + // If the pattern is a wildcard, return Match if pattern == "*" { return PatternMatch::Match; } else if pattern.starts_with('*') && pattern.ends_with('*') { + // If the starts and ends with a wildcard, return Match if the string contains the pattern without the wildcards if str.contains(&pattern[1..pattern.len() - 1]) { return PatternMatch::Match; } } else if let Some(pattern) = pattern.strip_prefix('*') { + // If the pattern starts with a wildcard, return Match if the string ends with the pattern without the wildcard if str.ends_with(pattern) { return PatternMatch::Match; } } else if let Some(pattern) = pattern.strip_suffix('*') { + // If the pattern ends with a wildcard, return Match if the string starts with the pattern without the wildcard if str.starts_with(pattern) { return PatternMatch::Match; } } else if pattern == str { + // If the pattern is exactly the string, return Match return PatternMatch::Match; } @@ -68,6 +82,15 @@ fn match_pattern(pattern: &str, str: &str) -> PatternMatch { } } +/// Match a field against a pattern using the legacy behavior. +/// +/// A field matches a pattern if it is a parent of the pattern or if it is the pattern itself. +/// This behavior is used to match the sortable attributes, the searchable attributes and the filterable attributes rules `Field`. +/// +/// # Arguments +/// +/// * `pattern` - The pattern to match against. +/// * `field` - The field to match against the pattern. pub fn match_field_legacy(pattern: &str, field: &str) -> PatternMatch { if is_faceted_by(field, pattern) { // If the field matches the pattern or is a nested field of the pattern, return Match (legacy behavior) diff --git a/crates/milli/src/filterable_attributes_rules.rs b/crates/milli/src/filterable_attributes_rules.rs index 0b7c9092b..12e27572c 100644 --- a/crates/milli/src/filterable_attributes_rules.rs +++ b/crates/milli/src/filterable_attributes_rules.rs @@ -17,20 +17,30 @@ pub enum FilterableAttributesRule { } impl FilterableAttributesRule { + /// Match a field against the filterable attributes rule. pub fn match_str(&self, field: &str) -> PatternMatch { match self { + // If the rule is a field, match the field against the pattern using the legacy behavior FilterableAttributesRule::Field(pattern) => match_field_legacy(pattern, field), + // If the rule is a pattern, match the field against the pattern using the new behavior FilterableAttributesRule::Pattern(patterns) => patterns.match_str(field), } } + /// Check if the rule is a geo field. + /// + /// prefer using `index.is_geo_enabled`, `index.is_geo_filtering_enabled` or `index.is_geo_sorting_enabled` + /// to check if the geo feature is enabled. pub fn has_geo(&self) -> bool { matches!(self, FilterableAttributesRule::Field(field_name) if field_name == RESERVED_GEO_FIELD_NAME) } + /// Get the features of the rule. pub fn features(&self) -> FilterableAttributesFeatures { match self { + // If the rule is a field, return the legacy default features FilterableAttributesRule::Field(_) => FilterableAttributesFeatures::legacy_default(), + // If the rule is a pattern, return the features of the pattern FilterableAttributesRule::Pattern(patterns) => patterns.features(), } } @@ -66,10 +76,15 @@ pub struct FilterableAttributesFeatures { } impl FilterableAttributesFeatures { + /// Create a new `FilterableAttributesFeatures` with the legacy default features. + /// + /// This is the default behavior for `FilterableAttributesRule::Field`. + /// This will set the facet search to true and activate all the filter operators. pub fn legacy_default() -> Self { Self { facet_search: true, filter: FilterFeatures::legacy_default() } } + /// Create a new `FilterableAttributesFeatures` with no features. pub fn no_features() -> Self { Self { facet_search: false, filter: FilterFeatures::no_features() } } @@ -135,6 +150,7 @@ pub struct FilterFeatures { } impl FilterFeatures { + /// Get the allowed operators for the filter. pub fn allowed_operators(&self) -> Vec { if !self.is_filterable() { return vec![]; @@ -188,10 +204,15 @@ impl FilterFeatures { self.is_filterable() } + /// Create a new `FilterFeatures` with the legacy default features. + /// + /// This is the default behavior for `FilterableAttributesRule::Field`. + /// This will set the equality and comparison to true. pub fn legacy_default() -> Self { Self { equality: true, comparison: true } } + /// Create a new `FilterFeatures` with no features. pub fn no_features() -> Self { Self { equality: false, comparison: false } } @@ -203,6 +224,15 @@ impl Default for FilterFeatures { } } +/// Match a field against a set of filterable attributes rules. +/// +/// This function will return the set of field names that match the given filter. +/// +/// # Arguments +/// +/// * `filterable_attributes` - The set of filterable attributes rules to match against. +/// * `fields_ids_map` - The map of field names to field ids. +/// * `filter` - The filter function to apply to the filterable attributes rules. pub fn filtered_matching_field_names<'fim>( filterable_attributes: &[FilterableAttributesRule], fields_ids_map: &'fim FieldsIdsMap, @@ -222,6 +252,14 @@ pub fn filtered_matching_field_names<'fim>( result } +/// Match a field against a set of filterable attributes rules. +/// +/// This function will return the features that match the given field name. +/// +/// # Arguments +/// +/// * `field_name` - The field name to match against. +/// * `filterable_attributes` - The set of filterable attributes rules to match against. pub fn matching_features( field_name: &str, filterable_attributes: &[FilterableAttributesRule], @@ -234,6 +272,12 @@ pub fn matching_features( None } +/// Check if a field is filterable calling the method `FilterableAttributesFeatures::is_filterable()`. +/// +/// # Arguments +/// +/// * `field_name` - The field name to check. +/// * `filterable_attributes` - The set of filterable attributes rules to match against. pub fn is_field_filterable( field_name: &str, filterable_attributes: &[FilterableAttributesRule], @@ -242,6 +286,12 @@ pub fn is_field_filterable( .map_or(false, |features| features.is_filterable()) } +/// Check if a field is facet searchable calling the method `FilterableAttributesFeatures::is_facet_searchable()`. +/// +/// # Arguments +/// +/// * `field_name` - The field name to check. +/// * `filterable_attributes` - The set of filterable attributes rules to match against. pub fn is_field_facet_searchable( field_name: &str, filterable_attributes: &[FilterableAttributesRule], From 5fa4b5c50abdcaf7675ef34bbfeb6acdfb29903b Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Wed, 5 Mar 2025 09:44:52 +0100 Subject: [PATCH 16/35] Add a test on filterable attributes rules priority **Changes:** - Add a new test playing with filterable attributes rules priority - Optimize the faceted field selector avoiding to match false positives --- crates/meilisearch/tests/search/filters.rs | 133 ++++++++++++++++++ .../milli/src/filterable_attributes_rules.rs | 27 +++- 2 files changed, 154 insertions(+), 6 deletions(-) diff --git a/crates/meilisearch/tests/search/filters.rs b/crates/meilisearch/tests/search/filters.rs index bb268ccf5..375a4ef63 100644 --- a/crates/meilisearch/tests/search/filters.rs +++ b/crates/meilisearch/tests/search/filters.rs @@ -623,3 +623,136 @@ async fn search_with_pattern_filter_settings_scenario_1() { ) .await; } + +#[actix_rt::test] +async fn test_filterable_attributes_priority() { + // Test that the filterable attributes priority is respected + + // check if doggos.name is filterable + test_settings_documents_indexing_swapping_and_search( + &NESTED_DOCUMENTS, + &json!({"filterableAttributes": [ + // deactivated filter + {"patterns": ["doggos.a*"], "features": {"facetSearch": false, "filter": {"equality": false, "comparison": false}}}, + // activated filter + {"patterns": ["doggos.*"]}, + ]}), + &json!({ + "filter": "doggos.name = bobby" + }), + |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 852, + "father": "jean", + "mother": "michelle", + "doggos": [ + { + "name": "bobby", + "age": 2 + }, + { + "name": "buddy", + "age": 4 + } + ], + "cattos": "pésti" + } + ] + "###); + }, + ) + .await; + + // check if doggos.name is filterable 2 + test_settings_documents_indexing_swapping_and_search( + &NESTED_DOCUMENTS, + &json!({"filterableAttributes": [ + // deactivated filter + {"patterns": ["doggos"], "features": {"facetSearch": false, "filter": {"equality": false, "comparison": false}}}, + // activated filter + {"patterns": ["doggos.*"]}, + ]}), + &json!({ + "filter": "doggos.name = bobby" + }), + |response, code| { + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 852, + "father": "jean", + "mother": "michelle", + "doggos": [ + { + "name": "bobby", + "age": 2 + }, + { + "name": "buddy", + "age": 4 + } + ], + "cattos": "pésti" + } + ] + "###); + }, + ) + .await; + + // check if doggos.age is not filterable + test_settings_documents_indexing_swapping_and_search( + &NESTED_DOCUMENTS, + &json!({"filterableAttributes": [ + // deactivated filter + {"patterns": ["doggos.a*"], "features": {"facetSearch": false, "filter": {"equality": false, "comparison": false}}}, + // activated filter + {"patterns": ["doggos.*"]}, + ]}), + &json!({ + "filter": "doggos.age > 2" + }), + |response, code| { + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "Index `test`: Attribute `doggos.age` is not filterable. Available filterable attributes are: `doggos.age`, `doggos.name`.\n1:11 doggos.age > 2", + "code": "invalid_search_filter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_search_filter" + } + "###); + }, + ) + .await; + + // check if doggos is not filterable + test_settings_documents_indexing_swapping_and_search( + &NESTED_DOCUMENTS, + &json!({"filterableAttributes": [ + // deactivated filter + {"patterns": ["doggos"], "features": {"facetSearch": false, "filter": {"equality": false, "comparison": false}}}, + // activated filter + {"patterns": ["doggos.*"]}, + ]}), + &json!({ + "filter": "doggos EXISTS" + }), + |response, code| { + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "Index `test`: Attribute `doggos` is not filterable. Available filterable attributes are: `doggos.age`, `doggos.name`.\n1:7 doggos EXISTS", + "code": "invalid_search_filter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_search_filter" + } + "###); + }, + ) + .await; +} diff --git a/crates/milli/src/filterable_attributes_rules.rs b/crates/milli/src/filterable_attributes_rules.rs index 12e27572c..08bccee9b 100644 --- a/crates/milli/src/filterable_attributes_rules.rs +++ b/crates/milli/src/filterable_attributes_rules.rs @@ -342,15 +342,30 @@ fn match_pattern_by_features( filter: &impl Fn(&FilterableAttributesFeatures) -> bool, ) -> PatternMatch { let mut selection = PatternMatch::NoMatch; + + // `can_match` becomes false if the field name matches (PatternMatch::Match) any pattern that is not facet searchable or filterable, + // this ensures that the field doesn't match a pattern with a lower priority, however it can still match a pattern for a nested field as a parent (PatternMatch::Parent). + // See the test `search::filters::test_filterable_attributes_priority` for more details. + let mut can_match = true; + // Check if the field name matches any pattern that is facet searchable or filterable for pattern in filterable_attributes { - let features = pattern.features(); - if filter(&features) { - match pattern.match_str(field_name) { - PatternMatch::Match => return PatternMatch::Match, - PatternMatch::Parent => selection = PatternMatch::Parent, - PatternMatch::NoMatch => (), + match pattern.match_str(field_name) { + PatternMatch::Match => { + let features = pattern.features(); + if filter(&features) && can_match { + return PatternMatch::Match; + } else { + can_match = false; + } } + PatternMatch::Parent => { + let features = pattern.features(); + if filter(&features) { + selection = PatternMatch::Parent; + } + } + PatternMatch::NoMatch => (), } } From 63e753bde07c916e1f717cdf14b60f1a07e5f4fb Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Wed, 5 Mar 2025 12:05:40 +0100 Subject: [PATCH 17/35] Apply PR requests related to settings API --- crates/meilisearch/tests/search/errors.rs | 10 +++---- .../meilisearch/tests/search/facet_search.rs | 16 +++++----- crates/meilisearch/tests/search/filters.rs | 30 +++++++++---------- .../tests/settings/get_settings.rs | 12 ++++---- crates/milli/src/attribute_patterns.rs | 5 ++-- .../milli/src/filterable_attributes_rules.rs | 10 +++---- crates/milli/src/index.rs | 4 +-- 7 files changed, 44 insertions(+), 43 deletions(-) diff --git a/crates/meilisearch/tests/search/errors.rs b/crates/meilisearch/tests/search/errors.rs index 05f084a0e..05d2d2563 100644 --- a/crates/meilisearch/tests/search/errors.rs +++ b/crates/meilisearch/tests/search/errors.rs @@ -881,7 +881,7 @@ async fn search_with_pattern_filter_settings_errors() { test_settings_documents_indexing_swapping_and_search( &NESTED_DOCUMENTS, &json!({"filterableAttributes": [{ - "patterns": ["cattos","doggos.age"], + "attributePatterns": ["cattos","doggos.age"], "features": { "facetSearch": false, "filter": {"equality": false, "comparison": true} @@ -907,7 +907,7 @@ async fn search_with_pattern_filter_settings_errors() { test_settings_documents_indexing_swapping_and_search( &NESTED_DOCUMENTS, &json!({"filterableAttributes": [{ - "patterns": ["cattos","doggos.age"], + "attributePatterns": ["cattos","doggos.age"], "features": { "facetSearch": false, "filter": {"equality": false, "comparison": true} @@ -933,7 +933,7 @@ async fn search_with_pattern_filter_settings_errors() { // Check if the Comparison filter works with patterns test_settings_documents_indexing_swapping_and_search( &NESTED_DOCUMENTS, - &json!({"filterableAttributes": [{"patterns": ["cattos","doggos.age"]}]}), + &json!({"filterableAttributes": [{"attributePatterns": ["cattos","doggos.age"]}]}), &json!({ "filter": "doggos.age > 2" }), @@ -954,7 +954,7 @@ async fn search_with_pattern_filter_settings_errors() { test_settings_documents_indexing_swapping_and_search( &NESTED_DOCUMENTS, &json!({"filterableAttributes": [{ - "patterns": ["cattos","doggos.age"], + "attributePatterns": ["cattos","doggos.age"], "features": { "facetSearch": false, "filter": {"equality": true, "comparison": false} @@ -980,7 +980,7 @@ async fn search_with_pattern_filter_settings_errors() { test_settings_documents_indexing_swapping_and_search( &NESTED_DOCUMENTS, &json!({"filterableAttributes": [{ - "patterns": ["cattos","doggos.age"], + "attributePatterns": ["cattos","doggos.age"], "features": { "facetSearch": false, "filter": {"equality": true, "comparison": false} diff --git a/crates/meilisearch/tests/search/facet_search.rs b/crates/meilisearch/tests/search/facet_search.rs index 33b906d0f..25f894757 100644 --- a/crates/meilisearch/tests/search/facet_search.rs +++ b/crates/meilisearch/tests/search/facet_search.rs @@ -510,7 +510,7 @@ async fn facet_search_with_filterable_attributes_rules() { test_settings_documents_indexing_swapping_and_facet_search( &DOCUMENTS, - &json!({"filterableAttributes": [{"patterns": ["genres"], "features": {"facetSearch": true, "filter": {"equality": false, "comparison": false}}}]}), + &json!({"filterableAttributes": [{"attributePatterns": ["genres"], "features": {"facetSearch": true, "filter": {"equality": false, "comparison": false}}}]}), &json!({"facetName": "genres", "facetQuery": "a"}), |response, code| { snapshot!(code, @"200 OK"); @@ -531,7 +531,7 @@ async fn facet_search_with_filterable_attributes_rules() { test_settings_documents_indexing_swapping_and_facet_search( &NESTED_DOCUMENTS, - &json!({"filterableAttributes": [{"patterns": ["doggos.name"], "features": {"facetSearch": true, "filter": {"equality": false, "comparison": false}}}]}), + &json!({"filterableAttributes": [{"attributePatterns": ["doggos.name"], "features": {"facetSearch": true, "filter": {"equality": false, "comparison": false}}}]}), &json!({"facetName": "doggos.name", "facetQuery": "b"}), |response, code| { snapshot!(code, @"200 OK"); @@ -555,7 +555,7 @@ async fn facet_search_with_filterable_attributes_rules_errors() { test_settings_documents_indexing_swapping_and_facet_search( &DOCUMENTS, - &json!({"filterableAttributes": [{"patterns": ["genres"]}]}), + &json!({"filterableAttributes": [{"attributePatterns": ["genres"]}]}), &json!({"facetName": "genres", "facetQuery": "a"}), |response, code| { snapshot!(code, @"400 Bad Request"); @@ -566,7 +566,7 @@ async fn facet_search_with_filterable_attributes_rules_errors() { test_settings_documents_indexing_swapping_and_facet_search( &DOCUMENTS, - &json!({"filterableAttributes": [{"patterns": ["genres"], "features": {"facetSearch": false, "filter": {"equality": true, "comparison": true}}}]}), + &json!({"filterableAttributes": [{"attributePatterns": ["genres"], "features": {"facetSearch": false, "filter": {"equality": true, "comparison": true}}}]}), &json!({"facetName": "genres", "facetQuery": "a"}), |response, code| { snapshot!(code, @"400 Bad Request"); @@ -576,7 +576,7 @@ async fn facet_search_with_filterable_attributes_rules_errors() { test_settings_documents_indexing_swapping_and_facet_search( &DOCUMENTS, - &json!({"filterableAttributes": [{"patterns": ["genres"], "features": {"facetSearch": false, "filter": {"equality": false, "comparison": false}}}]}), + &json!({"filterableAttributes": [{"attributePatterns": ["genres"], "features": {"facetSearch": false, "filter": {"equality": false, "comparison": false}}}]}), &json!({"facetName": "genres", "facetQuery": "a"}), |response, code| { snapshot!(code, @"400 Bad Request"); @@ -586,7 +586,7 @@ async fn facet_search_with_filterable_attributes_rules_errors() { test_settings_documents_indexing_swapping_and_facet_search( &NESTED_DOCUMENTS, - &json!({"filterableAttributes": [{"patterns": ["doggos.name"]}]}), + &json!({"filterableAttributes": [{"attributePatterns": ["doggos.name"]}]}), &json!({"facetName": "invalid.name", "facetQuery": "b"}), |response, code| { snapshot!(code, @"400 Bad Request"); @@ -597,7 +597,7 @@ async fn facet_search_with_filterable_attributes_rules_errors() { test_settings_documents_indexing_swapping_and_facet_search( &NESTED_DOCUMENTS, - &json!({"filterableAttributes": [{"patterns": ["doggos.name"], "features": {"facetSearch": false, "filter": {"equality": true, "comparison": true}}}]}), + &json!({"filterableAttributes": [{"attributePatterns": ["doggos.name"], "features": {"facetSearch": false, "filter": {"equality": true, "comparison": true}}}]}), &json!({"facetName": "doggos.name", "facetQuery": "b"}), |response, code| { snapshot!(code, @"400 Bad Request"); @@ -607,7 +607,7 @@ async fn facet_search_with_filterable_attributes_rules_errors() { test_settings_documents_indexing_swapping_and_facet_search( &NESTED_DOCUMENTS, - &json!({"filterableAttributes": [{"patterns": ["doggos.name"], "features": {"facetSearch": false, "filter": {"equality": false, "comparison": false}}}]}), + &json!({"filterableAttributes": [{"attributePatterns": ["doggos.name"], "features": {"facetSearch": false, "filter": {"equality": false, "comparison": false}}}]}), &json!({"facetName": "doggos.name", "facetQuery": "b"}), |response, code| { snapshot!(code, @"400 Bad Request"); diff --git a/crates/meilisearch/tests/search/filters.rs b/crates/meilisearch/tests/search/filters.rs index 375a4ef63..818ffabaa 100644 --- a/crates/meilisearch/tests/search/filters.rs +++ b/crates/meilisearch/tests/search/filters.rs @@ -125,7 +125,7 @@ async fn search_with_pattern_filter_settings() { // Check if the Equality filter works with patterns test_settings_documents_indexing_swapping_and_search( &NESTED_DOCUMENTS, - &json!({"filterableAttributes": [{"patterns": ["cattos","doggos.age"]}]}), + &json!({"filterableAttributes": [{"attributePatterns": ["cattos","doggos.age"]}]}), &json!({ "filter": "cattos = pésti" }), @@ -158,7 +158,7 @@ async fn search_with_pattern_filter_settings() { test_settings_documents_indexing_swapping_and_search( &NESTED_DOCUMENTS, &json!({"filterableAttributes": [{ - "patterns": ["cattos","doggos.age"], + "attributePatterns": ["cattos","doggos.age"], "features": { "facetSearch": false, "filter": {"equality": true, "comparison": false} @@ -197,7 +197,7 @@ async fn search_with_pattern_filter_settings() { test_settings_documents_indexing_swapping_and_search( &NESTED_DOCUMENTS, &json!({"filterableAttributes": [{ - "patterns": ["cattos","doggos.age"], + "attributePatterns": ["cattos","doggos.age"], "features": { "facetSearch": false, "filter": {"equality": false, "comparison": true} @@ -282,7 +282,7 @@ async fn search_with_pattern_filter_settings_scenario_1() { let (task, code) = index .update_settings(json!({"filterableAttributes": [{ - "patterns": ["cattos","doggos.age"], + "attributePatterns": ["cattos","doggos.age"], "features": { "facetSearch": false, "filter": {"equality": true, "comparison": false} @@ -348,7 +348,7 @@ async fn search_with_pattern_filter_settings_scenario_1() { // Update the settings activate comparison filter let (task, code) = index .update_settings(json!({"filterableAttributes": [{ - "patterns": ["cattos","doggos.age"], + "attributePatterns": ["cattos","doggos.age"], "features": { "facetSearch": false, "filter": {"equality": true, "comparison": true} @@ -460,7 +460,7 @@ async fn search_with_pattern_filter_settings_scenario_1() { // Update the settings deactivate equality filter let (task, code) = index .update_settings(json!({"filterableAttributes": [{ - "patterns": ["cattos","doggos.age"], + "attributePatterns": ["cattos","doggos.age"], "features": { "facetSearch": false, "filter": {"equality": false, "comparison": true} @@ -560,7 +560,7 @@ async fn search_with_pattern_filter_settings_scenario_1() { // rollback the settings let (task, code) = index .update_settings(json!({"filterableAttributes": [{ - "patterns": ["cattos","doggos.age"], + "attributePatterns": ["cattos","doggos.age"], "features": { "facetSearch": false, "filter": {"equality": true, "comparison": false} @@ -633,9 +633,9 @@ async fn test_filterable_attributes_priority() { &NESTED_DOCUMENTS, &json!({"filterableAttributes": [ // deactivated filter - {"patterns": ["doggos.a*"], "features": {"facetSearch": false, "filter": {"equality": false, "comparison": false}}}, + {"attributePatterns": ["doggos.a*"], "features": {"facetSearch": false, "filter": {"equality": false, "comparison": false}}}, // activated filter - {"patterns": ["doggos.*"]}, + {"attributePatterns": ["doggos.*"]}, ]}), &json!({ "filter": "doggos.name = bobby" @@ -671,9 +671,9 @@ async fn test_filterable_attributes_priority() { &NESTED_DOCUMENTS, &json!({"filterableAttributes": [ // deactivated filter - {"patterns": ["doggos"], "features": {"facetSearch": false, "filter": {"equality": false, "comparison": false}}}, + {"attributePatterns": ["doggos"], "features": {"facetSearch": false, "filter": {"equality": false, "comparison": false}}}, // activated filter - {"patterns": ["doggos.*"]}, + {"attributePatterns": ["doggos.*"]}, ]}), &json!({ "filter": "doggos.name = bobby" @@ -709,9 +709,9 @@ async fn test_filterable_attributes_priority() { &NESTED_DOCUMENTS, &json!({"filterableAttributes": [ // deactivated filter - {"patterns": ["doggos.a*"], "features": {"facetSearch": false, "filter": {"equality": false, "comparison": false}}}, + {"attributePatterns": ["doggos.a*"], "features": {"facetSearch": false, "filter": {"equality": false, "comparison": false}}}, // activated filter - {"patterns": ["doggos.*"]}, + {"attributePatterns": ["doggos.*"]}, ]}), &json!({ "filter": "doggos.age > 2" @@ -735,9 +735,9 @@ async fn test_filterable_attributes_priority() { &NESTED_DOCUMENTS, &json!({"filterableAttributes": [ // deactivated filter - {"patterns": ["doggos"], "features": {"facetSearch": false, "filter": {"equality": false, "comparison": false}}}, + {"attributePatterns": ["doggos"], "features": {"facetSearch": false, "filter": {"equality": false, "comparison": false}}}, // activated filter - {"patterns": ["doggos.*"]}, + {"attributePatterns": ["doggos.*"]}, ]}), &json!({ "filter": "doggos EXISTS" diff --git a/crates/meilisearch/tests/settings/get_settings.rs b/crates/meilisearch/tests/settings/get_settings.rs index 16ab9a7ae..ff9ae5472 100644 --- a/crates/meilisearch/tests/settings/get_settings.rs +++ b/crates/meilisearch/tests/settings/get_settings.rs @@ -521,9 +521,9 @@ async fn granular_filterable_attributes() { let (response, code) = index.update_settings(json!({ "filterableAttributes": [ - { "patterns": ["name"], "features": { "facetSearch": true, "filter": {"equality": true, "comparison": false} } }, - { "patterns": ["age"], "features": { "facetSearch": false, "filter": {"equality": true, "comparison": true} } }, - { "patterns": ["id"] } + { "attributePatterns": ["name"], "features": { "facetSearch": true, "filter": {"equality": true, "comparison": false} } }, + { "attributePatterns": ["age"], "features": { "facetSearch": false, "filter": {"equality": true, "comparison": true} } }, + { "attributePatterns": ["id"] } ] })).await; assert_eq!(code, 202); index.wait_task(response.uid()).await.succeeded(); @@ -533,7 +533,7 @@ async fn granular_filterable_attributes() { snapshot!(json_string!(response["filterableAttributes"]), @r###" [ { - "patterns": [ + "attributePatterns": [ "name" ], "features": { @@ -545,7 +545,7 @@ async fn granular_filterable_attributes() { } }, { - "patterns": [ + "attributePatterns": [ "age" ], "features": { @@ -557,7 +557,7 @@ async fn granular_filterable_attributes() { } }, { - "patterns": [ + "attributePatterns": [ "id" ], "features": { diff --git a/crates/milli/src/attribute_patterns.rs b/crates/milli/src/attribute_patterns.rs index b08341bd3..c7045c68e 100644 --- a/crates/milli/src/attribute_patterns.rs +++ b/crates/milli/src/attribute_patterns.rs @@ -8,7 +8,7 @@ use crate::is_faceted_by; #[repr(transparent)] #[serde(transparent)] pub struct AttributePatterns { - #[schema(value_type = Vec)] + #[schema(example = json!(["title", "overview_*", "release_date"]))] pub patterns: Vec, } @@ -121,7 +121,8 @@ pub fn match_distinct_field(distinct_field: Option<&str>, field: &str) -> Patter #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum PatternMatch { - /// The field is a parent of the of a nested field that matches the pattern + /// The field is a parent of a nested field that matches the pattern + /// For example, the field is `toto`, and the pattern is `toto.titi` Parent, /// The field matches the pattern Match, diff --git a/crates/milli/src/filterable_attributes_rules.rs b/crates/milli/src/filterable_attributes_rules.rs index 08bccee9b..efef46810 100644 --- a/crates/milli/src/filterable_attributes_rules.rs +++ b/crates/milli/src/filterable_attributes_rules.rs @@ -50,7 +50,7 @@ impl FilterableAttributesRule { #[serde(deny_unknown_fields, rename_all = "camelCase")] #[deserr(rename_all = camelCase, deny_unknown_fields)] pub struct FilterableAttributesPatterns { - pub patterns: AttributePatterns, + pub attribute_patterns: AttributePatterns, #[serde(default)] #[deserr(default)] pub features: FilterableAttributesFeatures, @@ -58,15 +58,15 @@ pub struct FilterableAttributesPatterns { impl FilterableAttributesPatterns { pub fn match_str(&self, field: &str) -> PatternMatch { - self.patterns.match_str(field) + self.attribute_patterns.match_str(field) } pub fn features(&self) -> FilterableAttributesFeatures { - self.features.clone() + self.features } } -#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug, Deserr, ToSchema)] +#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Copy, Debug, Deserr, ToSchema)] #[serde(deny_unknown_fields, rename_all = "camelCase")] #[deserr(rename_all = camelCase, deny_unknown_fields)] #[derive(Default)] @@ -143,7 +143,7 @@ impl Deserr for FilterableAttributesRule { } } -#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug, Deserr, ToSchema)] +#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Copy, Debug, Deserr, ToSchema)] pub struct FilterFeatures { equality: bool, comparison: bool, diff --git a/crates/milli/src/index.rs b/crates/milli/src/index.rs index ff87eba7c..5bc434517 100644 --- a/crates/milli/src/index.rs +++ b/crates/milli/src/index.rs @@ -868,12 +868,12 @@ impl Index { pub(crate) fn put_filterable_attributes_rules( &self, wtxn: &mut RwTxn<'_>, - #[allow(clippy::ptr_arg)] fields: &Vec, + fields: &[FilterableAttributesRule], ) -> heed::Result<()> { self.main.remap_types::>().put( wtxn, main_key::FILTERABLE_FIELDS_KEY, - fields, + &fields, ) } From 67f7470c836150bb41d7926e136915b01d208ead Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Wed, 5 Mar 2025 15:42:10 +0100 Subject: [PATCH 18/35] Apply PR requests related to Refactor search and facet-search --- crates/meilisearch/tests/search/errors.rs | 10 +- crates/meilisearch/tests/search/filters.rs | 8 +- crates/milli/src/error.rs | 9 +- .../milli/src/filterable_attributes_rules.rs | 32 +++-- crates/milli/src/search/facet/filter.rs | 132 +++++++++++------- 5 files changed, 111 insertions(+), 80 deletions(-) diff --git a/crates/meilisearch/tests/search/errors.rs b/crates/meilisearch/tests/search/errors.rs index 05d2d2563..0ea121a7d 100644 --- a/crates/meilisearch/tests/search/errors.rs +++ b/crates/meilisearch/tests/search/errors.rs @@ -894,7 +894,7 @@ async fn search_with_pattern_filter_settings_errors() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Index `test`: Filter operator `=` is not allowed for the attribute `cattos`, allowed operators: OR, AND, NOT, <, >, <=, >=, TO, IS EMPTY, IS NULL, EXISTS.", + "message": "Index `test`: Filter operator `=` is not allowed for the attribute `cattos`.\n - Note: allowed operators: OR, AND, NOT, <, >, <=, >=, TO, IS EMPTY, IS NULL, EXISTS.\n - Note: field `cattos` matched rule #0 in `filterableAttributes`", "code": "invalid_search_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_filter" @@ -920,7 +920,7 @@ async fn search_with_pattern_filter_settings_errors() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Index `test`: Filter operator `=` is not allowed for the attribute `cattos`, allowed operators: OR, AND, NOT, <, >, <=, >=, TO, IS EMPTY, IS NULL, EXISTS.", + "message": "Index `test`: Filter operator `=` is not allowed for the attribute `cattos`.\n - Note: allowed operators: OR, AND, NOT, <, >, <=, >=, TO, IS EMPTY, IS NULL, EXISTS.\n - Note: field `cattos` matched rule #0 in `filterableAttributes`", "code": "invalid_search_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_filter" @@ -941,7 +941,7 @@ async fn search_with_pattern_filter_settings_errors() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Index `test`: Filter operator `>` is not allowed for the attribute `doggos.age`, allowed operators: OR, AND, NOT, =, !=, IN, IS EMPTY, IS NULL, EXISTS.", + "message": "Index `test`: Filter operator `>` is not allowed for the attribute `doggos.age`.\n - Note: allowed operators: OR, AND, NOT, =, !=, IN, IS EMPTY, IS NULL, EXISTS.\n - Note: field `doggos.age` matched rule #0 in `filterableAttributes`", "code": "invalid_search_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_filter" @@ -967,7 +967,7 @@ async fn search_with_pattern_filter_settings_errors() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Index `test`: Filter operator `>` is not allowed for the attribute `doggos.age`, allowed operators: OR, AND, NOT, =, !=, IN, IS EMPTY, IS NULL, EXISTS.", + "message": "Index `test`: Filter operator `>` is not allowed for the attribute `doggos.age`.\n - Note: allowed operators: OR, AND, NOT, =, !=, IN, IS EMPTY, IS NULL, EXISTS.\n - Note: field `doggos.age` matched rule #0 in `filterableAttributes`", "code": "invalid_search_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_filter" @@ -993,7 +993,7 @@ async fn search_with_pattern_filter_settings_errors() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Index `test`: Filter operator `TO` is not allowed for the attribute `doggos.age`, allowed operators: OR, AND, NOT, =, !=, IN, IS EMPTY, IS NULL, EXISTS.", + "message": "Index `test`: Filter operator `TO` is not allowed for the attribute `doggos.age`.\n - Note: allowed operators: OR, AND, NOT, =, !=, IN, IS EMPTY, IS NULL, EXISTS.\n - Note: field `doggos.age` matched rule #0 in `filterableAttributes`", "code": "invalid_search_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_filter" diff --git a/crates/meilisearch/tests/search/filters.rs b/crates/meilisearch/tests/search/filters.rs index 818ffabaa..4ee280646 100644 --- a/crates/meilisearch/tests/search/filters.rs +++ b/crates/meilisearch/tests/search/filters.rs @@ -335,7 +335,7 @@ async fn search_with_pattern_filter_settings_scenario_1() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Index `test`: Filter operator `>` is not allowed for the attribute `doggos.age`, allowed operators: OR, AND, NOT, =, !=, IN, IS EMPTY, IS NULL, EXISTS.", + "message": "Index `test`: Filter operator `>` is not allowed for the attribute `doggos.age`.\n - Note: allowed operators: OR, AND, NOT, =, !=, IN, IS EMPTY, IS NULL, EXISTS.\n - Note: field `doggos.age` matched rule #0 in `filterableAttributes`", "code": "invalid_search_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_filter" @@ -481,7 +481,7 @@ async fn search_with_pattern_filter_settings_scenario_1() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Index `test`: Filter operator `=` is not allowed for the attribute `cattos`, allowed operators: OR, AND, NOT, <, >, <=, >=, TO, IS EMPTY, IS NULL, EXISTS.", + "message": "Index `test`: Filter operator `=` is not allowed for the attribute `cattos`.\n - Note: allowed operators: OR, AND, NOT, <, >, <=, >=, TO, IS EMPTY, IS NULL, EXISTS.\n - Note: field `cattos` matched rule #0 in `filterableAttributes`", "code": "invalid_search_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_filter" @@ -613,7 +613,7 @@ async fn search_with_pattern_filter_settings_scenario_1() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Index `test`: Filter operator `>` is not allowed for the attribute `doggos.age`, allowed operators: OR, AND, NOT, =, !=, IN, IS EMPTY, IS NULL, EXISTS.", + "message": "Index `test`: Filter operator `>` is not allowed for the attribute `doggos.age`.\n - Note: allowed operators: OR, AND, NOT, =, !=, IN, IS EMPTY, IS NULL, EXISTS.\n - Note: field `doggos.age` matched rule #0 in `filterableAttributes`", "code": "invalid_search_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_filter" @@ -720,7 +720,7 @@ async fn test_filterable_attributes_priority() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Index `test`: Attribute `doggos.age` is not filterable. Available filterable attributes are: `doggos.age`, `doggos.name`.\n1:11 doggos.age > 2", + "message": "Index `test`: Attribute `doggos.age` is not filterable. Available filterable attributes are: `doggos.name`.\n1:11 doggos.age > 2", "code": "invalid_search_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_filter" diff --git a/crates/milli/src/error.rs b/crates/milli/src/error.rs index 857a812cd..b34c2bd9a 100644 --- a/crates/milli/src/error.rs +++ b/crates/milli/src/error.rs @@ -138,8 +138,13 @@ and can not be more than 511 bytes.", .document_id.to_string() InvalidFilter(String), #[error("Invalid type for filter subexpression: expected: {}, found: {}.", .0.join(", "), .1)] InvalidFilterExpression(&'static [&'static str], Value), - #[error("Filter operator `{operator}` is not allowed for the attribute `{field}`, allowed operators: {}.", allowed_operators.join(", "))] - FilterOperatorNotAllowed { field: String, allowed_operators: Vec, operator: String }, + #[error("Filter operator `{operator}` is not allowed for the attribute `{field}`.\n - Note: allowed operators: {}.\n - Note: field `{field}` matched rule #{rule_index} in `filterableAttributes`", allowed_operators.join(", "))] + FilterOperatorNotAllowed { + field: String, + allowed_operators: Vec, + operator: String, + rule_index: usize, + }, #[error("Attribute `{}` is not sortable. {}", .field, match .valid_fields.is_empty() { diff --git a/crates/milli/src/filterable_attributes_rules.rs b/crates/milli/src/filterable_attributes_rules.rs index efef46810..dbc6d72af 100644 --- a/crates/milli/src/filterable_attributes_rules.rs +++ b/crates/milli/src/filterable_attributes_rules.rs @@ -236,16 +236,13 @@ impl Default for FilterFeatures { pub fn filtered_matching_field_names<'fim>( filterable_attributes: &[FilterableAttributesRule], fields_ids_map: &'fim FieldsIdsMap, - filter: &impl Fn(&FilterableAttributesFeatures) -> bool, + filter: &impl Fn(FilterableAttributesFeatures) -> bool, ) -> BTreeSet<&'fim str> { let mut result = BTreeSet::new(); for (_, field_name) in fields_ids_map.iter() { - for filterable_attribute in filterable_attributes { - if filterable_attribute.match_str(field_name) == PatternMatch::Match { - let features = filterable_attribute.features(); - if filter(&features) { - result.insert(field_name); - } + if let Some((_, features)) = matching_features(field_name, filterable_attributes) { + if filter(features) { + result.insert(field_name); } } } @@ -260,13 +257,18 @@ pub fn filtered_matching_field_names<'fim>( /// /// * `field_name` - The field name to match against. /// * `filterable_attributes` - The set of filterable attributes rules to match against. +/// +/// # Returns +/// +/// * `Some((rule_index, features))` - The features of the matching rule and the index of the rule in the `filterable_attributes` array. +/// * `None` - No matching rule was found. pub fn matching_features( field_name: &str, filterable_attributes: &[FilterableAttributesRule], -) -> Option { - for filterable_attribute in filterable_attributes { +) -> Option<(usize, FilterableAttributesFeatures)> { + for (id, filterable_attribute) in filterable_attributes.iter().enumerate() { if filterable_attribute.match_str(field_name) == PatternMatch::Match { - return Some(filterable_attribute.features()); + return Some((id, filterable_attribute.features())); } } None @@ -283,7 +285,7 @@ pub fn is_field_filterable( filterable_attributes: &[FilterableAttributesRule], ) -> bool { matching_features(field_name, filterable_attributes) - .map_or(false, |features| features.is_filterable()) + .map_or(false, |(_, features)| features.is_filterable()) } /// Check if a field is facet searchable calling the method `FilterableAttributesFeatures::is_facet_searchable()`. @@ -297,7 +299,7 @@ pub fn is_field_facet_searchable( filterable_attributes: &[FilterableAttributesRule], ) -> bool { matching_features(field_name, filterable_attributes) - .map_or(false, |features| features.is_facet_searchable()) + .map_or(false, |(_, features)| features.is_facet_searchable()) } /// Match a field against a set of filterable, facet searchable fields, distinct field, sortable fields, and asc_desc fields. @@ -339,7 +341,7 @@ pub fn match_faceted_field( fn match_pattern_by_features( field_name: &str, filterable_attributes: &[FilterableAttributesRule], - filter: &impl Fn(&FilterableAttributesFeatures) -> bool, + filter: &impl Fn(FilterableAttributesFeatures) -> bool, ) -> PatternMatch { let mut selection = PatternMatch::NoMatch; @@ -353,7 +355,7 @@ fn match_pattern_by_features( match pattern.match_str(field_name) { PatternMatch::Match => { let features = pattern.features(); - if filter(&features) && can_match { + if filter(features) && can_match { return PatternMatch::Match; } else { can_match = false; @@ -361,7 +363,7 @@ fn match_pattern_by_features( } PatternMatch::Parent => { let features = pattern.features(); - if filter(&features) { + if filter(features) { selection = PatternMatch::Parent; } } diff --git a/crates/milli/src/search/facet/filter.rs b/crates/milli/src/search/facet/filter.rs index fa3e4ea28..bc7209ef9 100644 --- a/crates/milli/src/search/facet/filter.rs +++ b/crates/milli/src/search/facet/filter.rs @@ -20,8 +20,9 @@ use crate::heed_codec::facet::{ }; use crate::index::db_name::FACET_ID_STRING_DOCIDS; use crate::{ - distance_between_two_points, lat_lng_to_xyz, FieldId, FilterableAttributesFeatures, - FilterableAttributesRule, Index, InternalError, Result, SerializationError, + distance_between_two_points, lat_lng_to_xyz, FieldId, FieldsIdsMap, + FilterableAttributesFeatures, FilterableAttributesRule, Index, InternalError, Result, + SerializationError, }; /// The maximum number of filters the filter AST can process. @@ -233,11 +234,11 @@ impl<'a> Filter<'a> { impl<'a> Filter<'a> { pub fn evaluate(&self, rtxn: &heed::RoTxn<'_>, index: &Index) -> Result { // to avoid doing this for each recursive call we're going to do it ONCE ahead of time + let fields_ids_map = index.fields_ids_map(rtxn)?; let filterable_attributes_rules = index.filterable_attributes_rules(rtxn)?; for fid in self.condition.fids(MAX_FILTER_DEPTH) { let attribute = fid.value(); if !is_field_filterable(attribute, &filterable_attributes_rules) { - let fields_ids_map = index.fields_ids_map(rtxn)?; return Err(fid.as_external_error(FilterError::AttributeNotFilterable { attribute, filterable_fields: filtered_matching_field_names( @@ -248,7 +249,7 @@ impl<'a> Filter<'a> { }))?; } } - self.inner_evaluate(rtxn, index, &filterable_attributes_rules, None) + self.inner_evaluate(rtxn, index, &fields_ids_map, &filterable_attributes_rules, None) } fn evaluate_operator( @@ -258,6 +259,7 @@ impl<'a> Filter<'a> { universe: Option<&RoaringBitmap>, operator: &Condition<'a>, features: &FilterableAttributesFeatures, + rule_index: usize, ) -> Result { let numbers_db = index.facet_id_f64_docids; let strings_db = index.facet_id_string_docids; @@ -275,19 +277,29 @@ impl<'a> Filter<'a> { | Condition::Between { .. } if !features.is_filterable_comparison() => { - return Err(generate_filter_error(rtxn, index, field_id, operator, features)); + return Err(generate_filter_error( + rtxn, index, field_id, operator, features, rule_index, + )); } Condition::Empty if !features.is_filterable_empty() => { - return Err(generate_filter_error(rtxn, index, field_id, operator, features)); + return Err(generate_filter_error( + rtxn, index, field_id, operator, features, rule_index, + )); } Condition::Null if !features.is_filterable_null() => { - return Err(generate_filter_error(rtxn, index, field_id, operator, features)); + return Err(generate_filter_error( + rtxn, index, field_id, operator, features, rule_index, + )); } Condition::Exists if !features.is_filterable_exists() => { - return Err(generate_filter_error(rtxn, index, field_id, operator, features)); + return Err(generate_filter_error( + rtxn, index, field_id, operator, features, rule_index, + )); } Condition::Equal(_) | Condition::NotEqual(_) if !features.is_filterable_equality() => { - return Err(generate_filter_error(rtxn, index, field_id, operator, features)); + return Err(generate_filter_error( + rtxn, index, field_id, operator, features, rule_index, + )); } Condition::GreaterThan(val) => { (Excluded(val.parse_finite_float()?), Included(f64::MAX)) @@ -338,8 +350,9 @@ impl<'a> Filter<'a> { } Condition::NotEqual(val) => { let operator = Condition::Equal(val.clone()); - let docids = - Self::evaluate_operator(rtxn, index, field_id, None, &operator, features)?; + let docids = Self::evaluate_operator( + rtxn, index, field_id, None, &operator, features, rule_index, + )?; let all_ids = index.documents_ids(rtxn)?; return Ok(all_ids - docids); } @@ -441,7 +454,8 @@ impl<'a> Filter<'a> { &self, rtxn: &heed::RoTxn<'_>, index: &Index, - filterable_fields: &[FilterableAttributesRule], + field_ids_map: &FieldsIdsMap, + filterable_attribute_rules: &[FilterableAttributesRule], universe: Option<&RoaringBitmap>, ) -> Result { if universe.map_or(false, |u| u.is_empty()) { @@ -454,7 +468,8 @@ impl<'a> Filter<'a> { &(f.as_ref().clone()).into(), rtxn, index, - filterable_fields, + field_ids_map, + filterable_attribute_rules, universe, )?; match universe { @@ -466,15 +481,14 @@ impl<'a> Filter<'a> { } } FilterCondition::In { fid, els } => { - match matching_features(fid.value(), filterable_fields) { - Some(features) if features.is_filterable() => { - let field_ids_map = index.fields_ids_map(rtxn)?; + match matching_features(fid.value(), filterable_attribute_rules) { + Some((rule_index, features)) if features.is_filterable() => { if let Some(fid) = field_ids_map.id(fid.value()) { els.iter() .map(|el| Condition::Equal(el.clone())) .map(|op| { Self::evaluate_operator( - rtxn, index, fid, universe, &op, &features, + rtxn, index, fid, universe, &op, &features, rule_index, ) }) .union() @@ -482,46 +496,50 @@ impl<'a> Filter<'a> { Ok(RoaringBitmap::new()) } } - _ => { - let field_ids_map = index.fields_ids_map(rtxn)?; - Err(fid.as_external_error(FilterError::AttributeNotFilterable { - attribute: fid.value(), - filterable_fields: filtered_matching_field_names( - filterable_fields, - &field_ids_map, - &|features| features.is_filterable(), - ), - }))? - } + _ => Err(fid.as_external_error(FilterError::AttributeNotFilterable { + attribute: fid.value(), + filterable_fields: filtered_matching_field_names( + filterable_attribute_rules, + &field_ids_map, + &|features| features.is_filterable(), + ), + }))?, } } FilterCondition::Condition { fid, op } => { - match matching_features(fid.value(), filterable_fields) { - Some(features) if features.is_filterable() => { - let field_ids_map = index.fields_ids_map(rtxn)?; + match matching_features(fid.value(), filterable_attribute_rules) { + Some((rule_index, features)) if features.is_filterable() => { if let Some(fid) = field_ids_map.id(fid.value()) { - Self::evaluate_operator(rtxn, index, fid, universe, op, &features) + Self::evaluate_operator( + rtxn, index, fid, universe, op, &features, rule_index, + ) } else { Ok(RoaringBitmap::new()) } } - _ => { - let field_ids_map = index.fields_ids_map(rtxn)?; - Err(fid.as_external_error(FilterError::AttributeNotFilterable { - attribute: fid.value(), - filterable_fields: filtered_matching_field_names( - filterable_fields, - &field_ids_map, - &|features| features.is_filterable(), - ), - }))? - } + _ => Err(fid.as_external_error(FilterError::AttributeNotFilterable { + attribute: fid.value(), + filterable_fields: filtered_matching_field_names( + filterable_attribute_rules, + &field_ids_map, + &|features| features.is_filterable(), + ), + }))?, } } FilterCondition::Or(subfilters) => subfilters .iter() .cloned() - .map(|f| Self::inner_evaluate(&f.into(), rtxn, index, filterable_fields, universe)) + .map(|f| { + Self::inner_evaluate( + &f.into(), + rtxn, + index, + field_ids_map, + filterable_attribute_rules, + universe, + ) + }) .union(), FilterCondition::And(subfilters) => { let mut subfilters_iter = subfilters.iter(); @@ -530,7 +548,8 @@ impl<'a> Filter<'a> { &(first_subfilter.clone()).into(), rtxn, index, - filterable_fields, + field_ids_map, + filterable_attribute_rules, universe, )?; for f in subfilters_iter { @@ -544,7 +563,8 @@ impl<'a> Filter<'a> { &(f.clone()).into(), rtxn, index, - filterable_fields, + field_ids_map, + filterable_attribute_rules, Some(&bitmap), )?; } @@ -582,11 +602,10 @@ impl<'a> Filter<'a> { Ok(result) } else { - let field_ids_map = index.fields_ids_map(rtxn)?; Err(point[0].as_external_error(FilterError::AttributeNotFilterable { attribute: RESERVED_GEO_FIELD_NAME, filterable_fields: filtered_matching_field_names( - filterable_fields, + filterable_attribute_rules, &field_ids_map, &|features| features.is_filterable(), ), @@ -649,7 +668,8 @@ impl<'a> Filter<'a> { let selected_lat = Filter { condition: condition_lat }.inner_evaluate( rtxn, index, - filterable_fields, + field_ids_map, + filterable_attribute_rules, universe, )?; @@ -682,7 +702,8 @@ impl<'a> Filter<'a> { let left = Filter { condition: condition_left }.inner_evaluate( rtxn, index, - filterable_fields, + field_ids_map, + filterable_attribute_rules, universe, )?; @@ -696,7 +717,8 @@ impl<'a> Filter<'a> { let right = Filter { condition: condition_right }.inner_evaluate( rtxn, index, - filterable_fields, + field_ids_map, + filterable_attribute_rules, universe, )?; @@ -712,19 +734,19 @@ impl<'a> Filter<'a> { Filter { condition: condition_lng }.inner_evaluate( rtxn, index, - filterable_fields, + field_ids_map, + filterable_attribute_rules, universe, )? }; Ok(selected_lat & selected_lng) } else { - let field_ids_map = index.fields_ids_map(rtxn)?; Err(top_right_point[0].as_external_error( FilterError::AttributeNotFilterable { attribute: RESERVED_GEO_FIELD_NAME, filterable_fields: filtered_matching_field_names( - filterable_fields, + filterable_attribute_rules, &field_ids_map, &|features| features.is_filterable(), ), @@ -742,6 +764,7 @@ fn generate_filter_error( field_id: FieldId, operator: &Condition<'_>, features: &FilterableAttributesFeatures, + rule_index: usize, ) -> Error { match index.fields_ids_map(rtxn) { Ok(fields_ids_map) => { @@ -750,6 +773,7 @@ fn generate_filter_error( field: field.to_string(), allowed_operators: features.allowed_filter_operators(), operator: operator.operator().to_string(), + rule_index, }) } Err(e) => e.into(), From b88aa9cc76b6af7b3b5f94072b6b63e427fc7b02 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Wed, 5 Mar 2025 18:22:12 +0100 Subject: [PATCH 19/35] Rely on FieldIdMapWithMetadata in facet search and filters --- crates/milli/src/error.rs | 4 +- crates/milli/src/fields_ids_map/metadata.rs | 27 ++++- .../milli/src/filterable_attributes_rules.rs | 14 +-- crates/milli/src/index.rs | 12 +- crates/milli/src/search/facet/filter.rs | 113 ++++++++---------- crates/milli/src/search/facet/search.rs | 49 ++++---- crates/milli/src/search/mod.rs | 2 +- .../milli/src/update/index_documents/mod.rs | 12 +- 8 files changed, 128 insertions(+), 105 deletions(-) diff --git a/crates/milli/src/error.rs b/crates/milli/src/error.rs index b34c2bd9a..3121f5405 100644 --- a/crates/milli/src/error.rs +++ b/crates/milli/src/error.rs @@ -138,12 +138,12 @@ and can not be more than 511 bytes.", .document_id.to_string() InvalidFilter(String), #[error("Invalid type for filter subexpression: expected: {}, found: {}.", .0.join(", "), .1)] InvalidFilterExpression(&'static [&'static str], Value), - #[error("Filter operator `{operator}` is not allowed for the attribute `{field}`.\n - Note: allowed operators: {}.\n - Note: field `{field}` matched rule #{rule_index} in `filterableAttributes`", allowed_operators.join(", "))] + #[error("Filter operator `{operator}` is not allowed for the attribute `{field}`.\n - Note: allowed operators: {}.\n - Note: field `{field}` {} in `filterableAttributes`", allowed_operators.join(", "), rule_index.map_or("did not match any rule".to_string(), |rule_index| format!("matched rule #{rule_index}")))] FilterOperatorNotAllowed { field: String, allowed_operators: Vec, operator: String, - rule_index: usize, + rule_index: Option, }, #[error("Attribute `{}` is not sortable. {}", .field, diff --git a/crates/milli/src/fields_ids_map/metadata.rs b/crates/milli/src/fields_ids_map/metadata.rs index fd333c3c6..5636256eb 100644 --- a/crates/milli/src/fields_ids_map/metadata.rs +++ b/crates/milli/src/fields_ids_map/metadata.rs @@ -126,20 +126,35 @@ impl Metadata { &self, rules: &'rules [FilterableAttributesRule], ) -> Option<&'rules FilterableAttributesRule> { + self.filterable_attributes_with_rule_index(rules).map(|(_, rule)| rule) + } + + pub fn filterable_attributes_with_rule_index<'rules>( + &self, + rules: &'rules [FilterableAttributesRule], + ) -> Option<(usize, &'rules FilterableAttributesRule)> { let filterable_attributes_rule_id = self.filterable_attributes_rule_id?.get(); - // - 1: `filterable_attributes_rule_id` is NonZero - let rule = rules.get((filterable_attributes_rule_id - 1) as usize).unwrap(); - Some(rule) + let rule_id = (filterable_attributes_rule_id - 1) as usize; + let rule = rules.get(rule_id).unwrap(); + Some((rule_id, rule)) } pub fn filterable_attributes_features( &self, rules: &[FilterableAttributesRule], ) -> FilterableAttributesFeatures { - self.filterable_attributes(rules) - .map(|rule| rule.features()) + let (_, features) = self.filterable_attributes_features_with_rule_index(rules); + features + } + + pub fn filterable_attributes_features_with_rule_index( + &self, + rules: &[FilterableAttributesRule], + ) -> (Option, FilterableAttributesFeatures) { + self.filterable_attributes_with_rule_index(rules) + .map(|(rule_index, rule)| (Some(rule_index), rule.features())) // if there is no filterable attributes rule, return no features - .unwrap_or_else(FilterableAttributesFeatures::no_features) + .unwrap_or_else(|| (None, FilterableAttributesFeatures::no_features())) } pub fn is_sortable(&self) -> bool { diff --git a/crates/milli/src/filterable_attributes_rules.rs b/crates/milli/src/filterable_attributes_rules.rs index dbc6d72af..50f9b8e9d 100644 --- a/crates/milli/src/filterable_attributes_rules.rs +++ b/crates/milli/src/filterable_attributes_rules.rs @@ -6,7 +6,8 @@ use utoipa::ToSchema; use crate::{ attribute_patterns::{match_distinct_field, match_field_legacy, PatternMatch}, constants::RESERVED_GEO_FIELD_NAME, - AttributePatterns, FieldsIdsMap, + fields_ids_map::metadata::FieldIdMapWithMetadata, + AttributePatterns, }; #[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug, ToSchema)] @@ -235,15 +236,14 @@ impl Default for FilterFeatures { /// * `filter` - The filter function to apply to the filterable attributes rules. pub fn filtered_matching_field_names<'fim>( filterable_attributes: &[FilterableAttributesRule], - fields_ids_map: &'fim FieldsIdsMap, + fields_ids_map: &'fim FieldIdMapWithMetadata, filter: &impl Fn(FilterableAttributesFeatures) -> bool, ) -> BTreeSet<&'fim str> { let mut result = BTreeSet::new(); - for (_, field_name) in fields_ids_map.iter() { - if let Some((_, features)) = matching_features(field_name, filterable_attributes) { - if filter(features) { - result.insert(field_name); - } + for (_, field_name, metadata) in fields_ids_map.iter() { + let features = metadata.filterable_attributes_features(filterable_attributes); + if filter(features) { + result.insert(field_name); } } result diff --git a/crates/milli/src/index.rs b/crates/milli/src/index.rs index 5bc434517..f9109a137 100644 --- a/crates/milli/src/index.rs +++ b/crates/milli/src/index.rs @@ -13,7 +13,7 @@ use crate::constants::{self, RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAM use crate::database_stats::DatabaseStats; use crate::documents::PrimaryKey; use crate::error::{InternalError, UserError}; -use crate::fields_ids_map::metadata::FieldIdMapWithMetadata; +use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder}; use crate::fields_ids_map::FieldsIdsMap; use crate::heed_codec::facet::{ FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, @@ -514,6 +514,16 @@ impl Index { .unwrap_or_default()) } + /// Returns the fields ids map with metadata. + /// + /// This structure is not yet stored in the index, and is generated on the fly. + pub fn fields_ids_map_with_metadata(&self, rtxn: &RoTxn<'_>) -> Result { + Ok(FieldIdMapWithMetadata::new( + self.fields_ids_map(rtxn)?, + MetadataBuilder::from_index(self, rtxn)?, + )) + } + /* fieldids weights map */ // This maps the fields ids to their weights. // Their weights is defined by the ordering of the searchable attributes. diff --git a/crates/milli/src/search/facet/filter.rs b/crates/milli/src/search/facet/filter.rs index bc7209ef9..b8c9cddfc 100644 --- a/crates/milli/src/search/facet/filter.rs +++ b/crates/milli/src/search/facet/filter.rs @@ -12,17 +12,15 @@ use serde_json::Value; use super::facet_range_search; use crate::constants::RESERVED_GEO_FIELD_NAME; use crate::error::{Error, UserError}; -use crate::filterable_attributes_rules::{ - filtered_matching_field_names, is_field_filterable, matching_features, -}; +use crate::fields_ids_map::metadata::FieldIdMapWithMetadata; +use crate::filterable_attributes_rules::{filtered_matching_field_names, is_field_filterable}; use crate::heed_codec::facet::{ FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, OrderedF64Codec, }; use crate::index::db_name::FACET_ID_STRING_DOCIDS; use crate::{ - distance_between_two_points, lat_lng_to_xyz, FieldId, FieldsIdsMap, - FilterableAttributesFeatures, FilterableAttributesRule, Index, InternalError, Result, - SerializationError, + distance_between_two_points, lat_lng_to_xyz, FieldId, FilterableAttributesFeatures, + FilterableAttributesRule, Index, InternalError, Result, SerializationError, }; /// The maximum number of filters the filter AST can process. @@ -234,21 +232,32 @@ impl<'a> Filter<'a> { impl<'a> Filter<'a> { pub fn evaluate(&self, rtxn: &heed::RoTxn<'_>, index: &Index) -> Result { // to avoid doing this for each recursive call we're going to do it ONCE ahead of time - let fields_ids_map = index.fields_ids_map(rtxn)?; + let fields_ids_map = index.fields_ids_map_with_metadata(rtxn)?; let filterable_attributes_rules = index.filterable_attributes_rules(rtxn)?; for fid in self.condition.fids(MAX_FILTER_DEPTH) { let attribute = fid.value(); - if !is_field_filterable(attribute, &filterable_attributes_rules) { - return Err(fid.as_external_error(FilterError::AttributeNotFilterable { - attribute, - filterable_fields: filtered_matching_field_names( - &filterable_attributes_rules, - &fields_ids_map, - &|features| features.is_filterable(), - ), - }))?; + if let Some((_, metadata)) = fields_ids_map.id_with_metadata(fid.value()) { + if metadata + .filterable_attributes_features(&filterable_attributes_rules) + .is_filterable() + { + continue; + } + } else if is_field_filterable(attribute, &filterable_attributes_rules) { + continue; } + + // If the field is not filterable, return an error + return Err(fid.as_external_error(FilterError::AttributeNotFilterable { + attribute, + filterable_fields: filtered_matching_field_names( + &filterable_attributes_rules, + &fields_ids_map, + &|features| features.is_filterable(), + ), + }))?; } + self.inner_evaluate(rtxn, index, &fields_ids_map, &filterable_attributes_rules, None) } @@ -259,7 +268,7 @@ impl<'a> Filter<'a> { universe: Option<&RoaringBitmap>, operator: &Condition<'a>, features: &FilterableAttributesFeatures, - rule_index: usize, + rule_index: Option, ) -> Result { let numbers_db = index.facet_id_f64_docids; let strings_db = index.facet_id_string_docids; @@ -454,7 +463,7 @@ impl<'a> Filter<'a> { &self, rtxn: &heed::RoTxn<'_>, index: &Index, - field_ids_map: &FieldsIdsMap, + field_ids_map: &FieldIdMapWithMetadata, filterable_attribute_rules: &[FilterableAttributesRule], universe: Option<&RoaringBitmap>, ) -> Result { @@ -480,51 +489,33 @@ impl<'a> Filter<'a> { } } } - FilterCondition::In { fid, els } => { - match matching_features(fid.value(), filterable_attribute_rules) { - Some((rule_index, features)) if features.is_filterable() => { - if let Some(fid) = field_ids_map.id(fid.value()) { - els.iter() - .map(|el| Condition::Equal(el.clone())) - .map(|op| { - Self::evaluate_operator( - rtxn, index, fid, universe, &op, &features, rule_index, - ) - }) - .union() - } else { - Ok(RoaringBitmap::new()) - } - } - _ => Err(fid.as_external_error(FilterError::AttributeNotFilterable { - attribute: fid.value(), - filterable_fields: filtered_matching_field_names( - filterable_attribute_rules, - &field_ids_map, - &|features| features.is_filterable(), - ), - }))?, - } - } - FilterCondition::Condition { fid, op } => { - match matching_features(fid.value(), filterable_attribute_rules) { - Some((rule_index, features)) if features.is_filterable() => { - if let Some(fid) = field_ids_map.id(fid.value()) { + FilterCondition::In { fid, els } => match field_ids_map.id_with_metadata(fid.value()) { + Some((fid, metadata)) => { + let (rule_index, features) = metadata + .filterable_attributes_features_with_rule_index(filterable_attribute_rules); + els.iter() + .map(|el| Condition::Equal(el.clone())) + .map(|op| { Self::evaluate_operator( - rtxn, index, fid, universe, op, &features, rule_index, + rtxn, index, fid, universe, &op, &features, rule_index, ) - } else { - Ok(RoaringBitmap::new()) - } + }) + .union() + } + None => Ok(RoaringBitmap::new()), + }, + FilterCondition::Condition { fid, op } => { + match field_ids_map.id_with_metadata(fid.value()) { + Some((fid, metadata)) => { + let (rule_index, features) = metadata + .filterable_attributes_features_with_rule_index( + filterable_attribute_rules, + ); + Self::evaluate_operator( + rtxn, index, fid, universe, op, &features, rule_index, + ) } - _ => Err(fid.as_external_error(FilterError::AttributeNotFilterable { - attribute: fid.value(), - filterable_fields: filtered_matching_field_names( - filterable_attribute_rules, - &field_ids_map, - &|features| features.is_filterable(), - ), - }))?, + None => Ok(RoaringBitmap::new()), } } FilterCondition::Or(subfilters) => subfilters @@ -764,7 +755,7 @@ fn generate_filter_error( field_id: FieldId, operator: &Condition<'_>, features: &FilterableAttributesFeatures, - rule_index: usize, + rule_index: Option, ) -> Error { match index.fields_ids_map(rtxn) { Ok(fields_ids_map) => { diff --git a/crates/milli/src/search/facet/search.rs b/crates/milli/src/search/facet/search.rs index a11e5cd49..da1e1610b 100644 --- a/crates/milli/src/search/facet/search.rs +++ b/crates/milli/src/search/facet/search.rs @@ -77,30 +77,37 @@ impl<'a> SearchForFacetValues<'a> { let rtxn = self.search_query.rtxn; let filterable_attributes_rules = index.filterable_attributes_rules(rtxn)?; - if !is_field_facet_searchable(&self.facet, &filterable_attributes_rules) { - let fields_ids_map = index.fields_ids_map(rtxn)?; - let matching_field_names = filtered_matching_field_names( - &filterable_attributes_rules, - &fields_ids_map, - &|features| features.is_facet_searchable(), - ); - let (valid_fields, hidden_fields) = - index.remove_hidden_fields(rtxn, matching_field_names)?; - - return Err(UserError::InvalidFacetSearchFacetName { - field: self.facet.clone(), - valid_fields, - hidden_fields, + let fields_ids_map = index.fields_ids_map_with_metadata(rtxn)?; + let fid = match fields_ids_map.id_with_metadata(&self.facet) { + Some((fid, metadata)) + if metadata + .filterable_attributes_features(&filterable_attributes_rules) + .is_facet_searchable() => + { + fid } - .into()); - } - - let fields_ids_map = index.fields_ids_map(rtxn)?; - let fid = match fields_ids_map.id(&self.facet) { - Some(fid) => fid, // we return an empty list of results when the attribute has been // set as filterable but no document contains this field (yet). - None => return Ok(Vec::new()), + None if is_field_facet_searchable(&self.facet, &filterable_attributes_rules) => { + return Ok(Vec::new()); + } + // we return an error when the attribute is not facet searchable + _otherwise => { + let matching_field_names = filtered_matching_field_names( + &filterable_attributes_rules, + &fields_ids_map, + &|features| features.is_facet_searchable(), + ); + let (valid_fields, hidden_fields) = + index.remove_hidden_fields(rtxn, matching_field_names)?; + + return Err(UserError::InvalidFacetSearchFacetName { + field: self.facet.clone(), + valid_fields, + hidden_fields, + } + .into()); + } }; let fst = match self.search_query.index.facet_id_string_fst.get(rtxn, &fid)? { diff --git a/crates/milli/src/search/mod.rs b/crates/milli/src/search/mod.rs index 15f3b1b4a..7d98f3453 100644 --- a/crates/milli/src/search/mod.rs +++ b/crates/milli/src/search/mod.rs @@ -192,7 +192,7 @@ impl<'a> Search<'a> { // check if the distinct field is in the filterable fields if !is_field_filterable(distinct, &filterable_fields) { // if not, remove the hidden fields from the filterable fields to generate the error message - let fields_ids_map = ctx.index.fields_ids_map(ctx.txn)?; + let fields_ids_map = ctx.index.fields_ids_map_with_metadata(ctx.txn)?; let matching_field_names = filtered_matching_field_names( &filterable_fields, &fields_ids_map, diff --git a/crates/milli/src/update/index_documents/mod.rs b/crates/milli/src/update/index_documents/mod.rs index 19ab1ff34..5ec8b1c7c 100644 --- a/crates/milli/src/update/index_documents/mod.rs +++ b/crates/milli/src/update/index_documents/mod.rs @@ -1256,7 +1256,7 @@ mod tests { let rtxn = index.read_txn().unwrap(); let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap(); - let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let fields_ids_map = index.fields_ids_map_with_metadata(&rtxn).unwrap(); let facets = filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| { features.is_filterable() @@ -1479,7 +1479,7 @@ mod tests { let rtxn = index.read_txn().unwrap(); let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap(); - let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let fields_ids_map = index.fields_ids_map_with_metadata(&rtxn).unwrap(); let facets = filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| { features.is_filterable() @@ -1507,7 +1507,7 @@ mod tests { let rtxn = index.read_txn().unwrap(); let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap(); - let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let fields_ids_map = index.fields_ids_map_with_metadata(&rtxn).unwrap(); let facets = filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| { features.is_filterable() @@ -1745,7 +1745,7 @@ mod tests { let check_ok = |index: &Index| { let rtxn = index.read_txn().unwrap(); let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap(); - let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let fields_ids_map = index.fields_ids_map_with_metadata(&rtxn).unwrap(); let facets = filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| { features.is_filterable() @@ -1856,7 +1856,7 @@ mod tests { let check_ok = |index: &Index| { let rtxn = index.read_txn().unwrap(); let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap(); - let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let fields_ids_map = index.fields_ids_map_with_metadata(&rtxn).unwrap(); let facets = filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| { features.is_filterable() @@ -1925,7 +1925,7 @@ mod tests { let check_ok = |index: &Index| { let rtxn = index.read_txn().unwrap(); let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap(); - let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); + let fields_ids_map = index.fields_ids_map_with_metadata(&rtxn).unwrap(); let facets = filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| { features.is_filterable() From 8ec0c322eaf7c329d946a7e2fac12c2010677282 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Thu, 6 Mar 2025 11:42:53 +0100 Subject: [PATCH 20/35] Apply PR requests related to Refactor the FieldIdMapWithMetadata --- crates/milli/src/fields_ids_map/global.rs | 11 +++++-- crates/milli/src/fields_ids_map/metadata.rs | 29 +++++++++++++++---- .../src/search/facet/facet_distribution.rs | 22 +++++++++----- .../src/update/new/indexer/post_processing.rs | 6 ++-- 4 files changed, 49 insertions(+), 19 deletions(-) diff --git a/crates/milli/src/fields_ids_map/global.rs b/crates/milli/src/fields_ids_map/global.rs index e5f1212df..235d509e9 100644 --- a/crates/milli/src/fields_ids_map/global.rs +++ b/crates/milli/src/fields_ids_map/global.rs @@ -107,8 +107,15 @@ impl<'indexing> GlobalFieldsIdsMap<'indexing> { } /// Get the metadata of a field based on its id. - pub fn metadata(&self, id: FieldId) -> Option { - self.local.metadata(id).or_else(|| self.global.read().unwrap().metadata(id)) + pub fn metadata(&mut self, id: FieldId) -> Option { + if self.local.metadata(id).is_none() { + let global = self.global.read().unwrap(); + + let (name, metadata) = global.name_with_metadata(id)?; + self.local.insert(name, id, metadata); + } + + self.local.metadata(id) } } diff --git a/crates/milli/src/fields_ids_map/metadata.rs b/crates/milli/src/fields_ids_map/metadata.rs index 5636256eb..7f81e6b79 100644 --- a/crates/milli/src/fields_ids_map/metadata.rs +++ b/crates/milli/src/fields_ids_map/metadata.rs @@ -22,7 +22,7 @@ pub struct Metadata { pub distinct: bool, /// The field has been defined as asc/desc in the ranking rules. pub asc_desc: bool, - /// The field is a geo field. + /// The field is a geo field (`_geo`, `_geo.lat`, `_geo.lng`). pub geo: bool, /// The id of the localized attributes rule if the field is localized. pub localized_attributes_rule_id: Option, @@ -215,9 +215,8 @@ pub struct MetadataBuilder { impl MetadataBuilder { pub fn from_index(index: &Index, rtxn: &RoTxn) -> Result { let searchable_attributes = match index.user_defined_searchable_fields(rtxn)? { - Some(fields) if fields.contains(&"*") => None, - None => None, Some(fields) => Some(fields.into_iter().map(|s| s.to_string()).collect()), + None => None, }; let filterable_attributes = index.filterable_attributes_rules(rtxn)?; let sortable_attributes = index.sortable_fields(rtxn)?; @@ -225,14 +224,14 @@ impl MetadataBuilder { let distinct_attribute = index.distinct_field(rtxn)?.map(|s| s.to_string()); let asc_desc_attributes = index.asc_desc_fields(rtxn)?; - Ok(Self { + Ok(Self::_new( searchable_attributes, filterable_attributes, sortable_attributes, localized_attributes, distinct_attribute, asc_desc_attributes, - }) + )) } #[cfg(test)] @@ -246,11 +245,29 @@ impl MetadataBuilder { localized_attributes: Option>, distinct_attribute: Option, asc_desc_attributes: HashSet, + ) -> Self { + Self::_new( + searchable_attributes, + filterable_attributes, + sortable_attributes, + localized_attributes, + distinct_attribute, + asc_desc_attributes, + ) + } + + fn _new( + searchable_attributes: Option>, + filterable_attributes: Vec, + sortable_attributes: HashSet, + localized_attributes: Option>, + distinct_attribute: Option, + asc_desc_attributes: HashSet, ) -> Self { let searchable_attributes = match searchable_attributes { Some(fields) if fields.iter().any(|f| f == "*") => None, - None => None, Some(fields) => Some(fields), + None => None, }; Self { diff --git a/crates/milli/src/search/facet/facet_distribution.rs b/crates/milli/src/search/facet/facet_distribution.rs index beb5d2568..5c41a0424 100644 --- a/crates/milli/src/search/facet/facet_distribution.rs +++ b/crates/milli/src/search/facet/facet_distribution.rs @@ -294,11 +294,7 @@ impl<'a> FacetDistribution<'a> { return Ok(Default::default()); }; - let fields_ids_map = self.index.fields_ids_map(self.rtxn)?; - let fields_ids_map = FieldIdMapWithMetadata::new( - fields_ids_map, - MetadataBuilder::from_index(self.index, self.rtxn)?, - ); + let fields_ids_map = self.index.fields_ids_map_with_metadata(self.rtxn)?; let filterable_attributes_rules = self.index.filterable_attributes_rules(self.rtxn)?; self.check_faceted_fields(&fields_ids_map, &filterable_attributes_rules)?; @@ -365,12 +361,17 @@ impl<'a> FacetDistribution<'a> { metadata: &Metadata, filterable_attributes_rules: &[FilterableAttributesRule], ) -> bool { + // If the field is not filterable, we don't want to compute the facet distribution. + if !metadata.filterable_attributes_features(filterable_attributes_rules).is_filterable() { + return false; + } + match &self.facets { Some(facets) => { // The list of facets provided by the user is a legacy pattern ("dog.age" must be selected with "dog"). facets.keys().any(|key| match_field_legacy(key, name) == PatternMatch::Match) } - None => metadata.is_faceted(filterable_attributes_rules), + None => true, } } @@ -385,7 +386,9 @@ impl<'a> FacetDistribution<'a> { for field in facets.keys() { let is_valid_faceted_field = fields_ids_map.id_with_metadata(field).map_or(false, |(_, metadata)| { - metadata.is_faceted(filterable_attributes_rules) + metadata + .filterable_attributes_features(filterable_attributes_rules) + .is_filterable() }); if !is_valid_faceted_field { invalid_facets.insert(field.to_string()); @@ -397,7 +400,10 @@ impl<'a> FacetDistribution<'a> { let valid_facets_name = fields_ids_map .iter() .filter_map(|(_, name, metadata)| { - if metadata.is_faceted(filterable_attributes_rules) { + if metadata + .filterable_attributes_features(filterable_attributes_rules) + .is_filterable() + { Some(name.to_string()) } else { None diff --git a/crates/milli/src/update/new/indexer/post_processing.rs b/crates/milli/src/update/new/indexer/post_processing.rs index 4ea749a85..2a01fccf3 100644 --- a/crates/milli/src/update/new/indexer/post_processing.rs +++ b/crates/milli/src/update/new/indexer/post_processing.rs @@ -25,7 +25,7 @@ use crate::{GlobalFieldsIdsMap, Index, Result}; pub(super) fn post_process( indexing_context: IndexingContext, wtxn: &mut RwTxn<'_>, - global_fields_ids_map: GlobalFieldsIdsMap<'_>, + mut global_fields_ids_map: GlobalFieldsIdsMap<'_>, facet_field_ids_delta: FacetFieldIdsDelta, ) -> Result<()> where @@ -33,7 +33,7 @@ where { let index = indexing_context.index; indexing_context.progress.update_progress(IndexingStep::PostProcessingFacets); - compute_facet_level_database(index, wtxn, facet_field_ids_delta, &global_fields_ids_map)?; + compute_facet_level_database(index, wtxn, facet_field_ids_delta, &mut global_fields_ids_map)?; compute_facet_search_database(index, wtxn, global_fields_ids_map)?; indexing_context.progress.update_progress(IndexingStep::PostProcessingWords); if let Some(prefix_delta) = compute_word_fst(index, wtxn)? { @@ -170,7 +170,7 @@ fn compute_facet_level_database( index: &Index, wtxn: &mut RwTxn, mut facet_field_ids_delta: FacetFieldIdsDelta, - global_fields_ids_map: &GlobalFieldsIdsMap, + global_fields_ids_map: &mut GlobalFieldsIdsMap, ) -> Result<()> { let rtxn = index.read_txn()?; let filterable_attributes_rules = index.filterable_attributes_rules(&rtxn)?; From ca41ce3bbdb157dd3713f3bb30093ae964a6d49d Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Thu, 6 Mar 2025 11:43:42 +0100 Subject: [PATCH 21/35] Old indexer document addition now check if facet search is globally activated --- .../index_documents/extract/extract_facet_string_docids.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/milli/src/update/index_documents/extract/extract_facet_string_docids.rs b/crates/milli/src/update/index_documents/extract/extract_facet_string_docids.rs index 994125c50..5b7639e59 100644 --- a/crates/milli/src/update/index_documents/extract/extract_facet_string_docids.rs +++ b/crates/milli/src/update/index_documents/extract/extract_facet_string_docids.rs @@ -104,7 +104,7 @@ fn extract_facet_string_docids_document_update( // Facet search normalization let features = metadata.filterable_attributes_features(&settings.filterable_attributes_rules); - if features.is_facet_searchable() { + if features.is_facet_searchable() && settings.facet_search { let locales = metadata.locales(&settings.localized_attributes_rules); let hyper_normalized_value = normalize_facet_string(normalized_value, locales); From 5ceddbda8475caa4808d74f46cc4639f4af6653b Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Thu, 6 Mar 2025 13:58:28 +0100 Subject: [PATCH 22/35] Add the max_weight of the weight map if it's lacking --- crates/milli/src/fieldids_weights_map.rs | 5 ----- .../milli/src/search/new/ranking_rule_graph/fid/mod.rs | 10 +++++++--- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/crates/milli/src/fieldids_weights_map.rs b/crates/milli/src/fieldids_weights_map.rs index f23bc1512..57c99f77f 100644 --- a/crates/milli/src/fieldids_weights_map.rs +++ b/crates/milli/src/fieldids_weights_map.rs @@ -48,11 +48,6 @@ impl FieldidsWeightsMap { self.map.values().copied().max() } - /// Returns the field id with the highest weight. - pub fn max_weight_fid(&self) -> Option<(FieldId, Weight)> { - self.map.iter().max_by_key(|(_, weight)| *weight).map(|(fid, weight)| (*fid, *weight)) - } - /// Return an iterator visiting all field ids in arbitrary order. pub fn ids(&self) -> impl Iterator + '_ { self.map.keys().copied() diff --git a/crates/milli/src/search/new/ranking_rule_graph/fid/mod.rs b/crates/milli/src/search/new/ranking_rule_graph/fid/mod.rs index 62d75d2ac..f424b7241 100644 --- a/crates/milli/src/search/new/ranking_rule_graph/fid/mod.rs +++ b/crates/milli/src/search/new/ranking_rule_graph/fid/mod.rs @@ -57,6 +57,7 @@ impl RankingRuleGraphTrait for FidGraph { let term = to_term; let mut all_fields = FxHashSet::default(); + let mut current_max_weight = 0; for word in term.term_subset.all_single_words_except_prefix_db(ctx)? { let fields = ctx.get_db_word_fids(word.interned())?; all_fields.extend(fields); @@ -81,6 +82,9 @@ impl RankingRuleGraphTrait for FidGraph { let weight = weights_map .weight(fid) .ok_or(InternalError::FieldidsWeightsMapMissingEntry { key: fid })?; + if weight > current_max_weight { + current_max_weight = weight; + } edges.push(( weight as u32 * term.term_ids.len() as u32, conditions_interner.insert(FidCondition { term: term.clone(), fid: Some(fid) }), @@ -88,10 +92,10 @@ impl RankingRuleGraphTrait for FidGraph { } // always lookup the max_fid if we don't already and add an artificial condition for max scoring - let max_weight_fid = weights_map.max_weight_fid(); + let max_weight = weights_map.max_weight(); - if let Some((max_fid, max_weight)) = max_weight_fid { - if !all_fields.contains(&max_fid) { + if let Some(max_weight) = max_weight { + if current_max_weight < max_weight { edges.push(( max_weight as u32 * term.term_ids.len() as u32, // TODO improve the fid score i.e. fid^10. conditions_interner.insert(FidCondition { From ed1dcbe0f78fc7350dd8c461dcfef2f56499f612 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Thu, 6 Mar 2025 14:18:25 +0100 Subject: [PATCH 23/35] Fix behavior change in the Attributes criterion --- crates/milli/src/index.rs | 11 ++++ .../search/new/ranking_rule_graph/fid/mod.rs | 5 +- ...__attribute_position_different_fields.snap | 56 +++++++++---------- ...e_position__attribute_position_ngrams.snap | 56 +++++++++---------- ...position__attribute_position_repeated.snap | 20 +++---- ...position__attribute_position_simple-2.snap | 56 +++++++++---------- 6 files changed, 109 insertions(+), 95 deletions(-) diff --git a/crates/milli/src/index.rs b/crates/milli/src/index.rs index f9109a137..798cf3073 100644 --- a/crates/milli/src/index.rs +++ b/crates/milli/src/index.rs @@ -559,6 +559,17 @@ impl Index { self.main.remap_key_type::().delete(wtxn, main_key::FIELDIDS_WEIGHTS_MAP_KEY) } + pub fn max_searchable_attribute_weight(&self, rtxn: &RoTxn<'_>) -> Result> { + let user_defined_searchable_fields = self.user_defined_searchable_fields(rtxn)?; + if let Some(user_defined_searchable_fields) = user_defined_searchable_fields { + if !user_defined_searchable_fields.contains(&"*") { + return Ok(Some(user_defined_searchable_fields.len().saturating_sub(1) as Weight)); + } + } + + Ok(None) + } + pub fn searchable_fields_and_weights<'a>( &self, rtxn: &'a RoTxn<'a>, diff --git a/crates/milli/src/search/new/ranking_rule_graph/fid/mod.rs b/crates/milli/src/search/new/ranking_rule_graph/fid/mod.rs index f424b7241..e55f1febf 100644 --- a/crates/milli/src/search/new/ranking_rule_graph/fid/mod.rs +++ b/crates/milli/src/search/new/ranking_rule_graph/fid/mod.rs @@ -92,7 +92,10 @@ impl RankingRuleGraphTrait for FidGraph { } // always lookup the max_fid if we don't already and add an artificial condition for max scoring - let max_weight = weights_map.max_weight(); + let max_weight = ctx + .index + .max_searchable_attribute_weight(ctx.txn)? + .or_else(|| weights_map.max_weight()); if let Some(max_weight) = max_weight { if current_max_weight < max_weight { diff --git a/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_different_fields.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_different_fields.snap index bf5b14f47..e55e221a2 100644 --- a/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_different_fields.snap +++ b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_different_fields.snap @@ -8,8 +8,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 3, - max_rank: 3, + rank: 5, + max_rank: 5, }, ), Position( @@ -25,8 +25,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 3, - max_rank: 3, + rank: 5, + max_rank: 5, }, ), Position( @@ -42,8 +42,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 3, - max_rank: 3, + rank: 5, + max_rank: 5, }, ), Position( @@ -59,8 +59,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 3, - max_rank: 3, + rank: 5, + max_rank: 5, }, ), Position( @@ -76,8 +76,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 3, - max_rank: 3, + rank: 5, + max_rank: 5, }, ), Position( @@ -93,8 +93,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 3, - max_rank: 3, + rank: 5, + max_rank: 5, }, ), Position( @@ -110,8 +110,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 3, - max_rank: 3, + rank: 5, + max_rank: 5, }, ), Position( @@ -127,8 +127,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 3, - max_rank: 3, + rank: 5, + max_rank: 5, }, ), Position( @@ -144,8 +144,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 3, - max_rank: 3, + rank: 5, + max_rank: 5, }, ), Position( @@ -161,8 +161,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 3, - max_rank: 3, + rank: 5, + max_rank: 5, }, ), Position( @@ -178,8 +178,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 3, - max_rank: 3, + rank: 5, + max_rank: 5, }, ), Position( @@ -195,8 +195,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 3, - max_rank: 3, + rank: 5, + max_rank: 5, }, ), Position( @@ -212,8 +212,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 3, - max_rank: 3, + rank: 5, + max_rank: 5, }, ), Position( @@ -229,8 +229,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 3, - max_rank: 3, + rank: 5, + max_rank: 5, }, ), Position( diff --git a/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_ngrams.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_ngrams.snap index bf5b14f47..e55e221a2 100644 --- a/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_ngrams.snap +++ b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_ngrams.snap @@ -8,8 +8,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 3, - max_rank: 3, + rank: 5, + max_rank: 5, }, ), Position( @@ -25,8 +25,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 3, - max_rank: 3, + rank: 5, + max_rank: 5, }, ), Position( @@ -42,8 +42,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 3, - max_rank: 3, + rank: 5, + max_rank: 5, }, ), Position( @@ -59,8 +59,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 3, - max_rank: 3, + rank: 5, + max_rank: 5, }, ), Position( @@ -76,8 +76,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 3, - max_rank: 3, + rank: 5, + max_rank: 5, }, ), Position( @@ -93,8 +93,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 3, - max_rank: 3, + rank: 5, + max_rank: 5, }, ), Position( @@ -110,8 +110,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 3, - max_rank: 3, + rank: 5, + max_rank: 5, }, ), Position( @@ -127,8 +127,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 3, - max_rank: 3, + rank: 5, + max_rank: 5, }, ), Position( @@ -144,8 +144,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 3, - max_rank: 3, + rank: 5, + max_rank: 5, }, ), Position( @@ -161,8 +161,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 3, - max_rank: 3, + rank: 5, + max_rank: 5, }, ), Position( @@ -178,8 +178,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 3, - max_rank: 3, + rank: 5, + max_rank: 5, }, ), Position( @@ -195,8 +195,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 3, - max_rank: 3, + rank: 5, + max_rank: 5, }, ), Position( @@ -212,8 +212,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 3, - max_rank: 3, + rank: 5, + max_rank: 5, }, ), Position( @@ -229,8 +229,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 3, - max_rank: 3, + rank: 5, + max_rank: 5, }, ), Position( diff --git a/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_repeated.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_repeated.snap index af35d0d8d..5ae6fafc5 100644 --- a/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_repeated.snap +++ b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_repeated.snap @@ -8,8 +8,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 6, - max_rank: 6, + rank: 11, + max_rank: 11, }, ), Position( @@ -25,8 +25,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 6, - max_rank: 6, + rank: 11, + max_rank: 11, }, ), Position( @@ -42,8 +42,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 6, - max_rank: 6, + rank: 11, + max_rank: 11, }, ), Position( @@ -59,8 +59,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 6, - max_rank: 6, + rank: 11, + max_rank: 11, }, ), Position( @@ -76,8 +76,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 6, - max_rank: 6, + rank: 11, + max_rank: 11, }, ), Position( diff --git a/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_simple-2.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_simple-2.snap index bf5b14f47..e55e221a2 100644 --- a/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_simple-2.snap +++ b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__attribute_position__attribute_position_simple-2.snap @@ -8,8 +8,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 3, - max_rank: 3, + rank: 5, + max_rank: 5, }, ), Position( @@ -25,8 +25,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 3, - max_rank: 3, + rank: 5, + max_rank: 5, }, ), Position( @@ -42,8 +42,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 3, - max_rank: 3, + rank: 5, + max_rank: 5, }, ), Position( @@ -59,8 +59,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 3, - max_rank: 3, + rank: 5, + max_rank: 5, }, ), Position( @@ -76,8 +76,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 3, - max_rank: 3, + rank: 5, + max_rank: 5, }, ), Position( @@ -93,8 +93,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 3, - max_rank: 3, + rank: 5, + max_rank: 5, }, ), Position( @@ -110,8 +110,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 3, - max_rank: 3, + rank: 5, + max_rank: 5, }, ), Position( @@ -127,8 +127,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 3, - max_rank: 3, + rank: 5, + max_rank: 5, }, ), Position( @@ -144,8 +144,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 3, - max_rank: 3, + rank: 5, + max_rank: 5, }, ), Position( @@ -161,8 +161,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 3, - max_rank: 3, + rank: 5, + max_rank: 5, }, ), Position( @@ -178,8 +178,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 3, - max_rank: 3, + rank: 5, + max_rank: 5, }, ), Position( @@ -195,8 +195,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 3, - max_rank: 3, + rank: 5, + max_rank: 5, }, ), Position( @@ -212,8 +212,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 3, - max_rank: 3, + rank: 5, + max_rank: 5, }, ), Position( @@ -229,8 +229,8 @@ expression: "format!(\"{document_ids_scores:#?}\")" [ Fid( Rank { - rank: 3, - max_rank: 3, + rank: 5, + max_rank: 5, }, ), Position( From 689e69d6d2f710dba7325ed7b13f7a919a25fb2e Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Mon, 10 Mar 2025 13:46:33 +0100 Subject: [PATCH 24/35] Take into account PR messages --- crates/milli/src/attribute_patterns.rs | 2 +- crates/milli/src/fieldids_weights_map.rs | 5 ----- .../src/search/new/ranking_rule_graph/fid/mod.rs | 5 +---- .../src/update/index_documents/transform.rs | 14 ++++++++------ .../update/new/extract/faceted/extract_facets.rs | 16 ++++++++-------- 5 files changed, 18 insertions(+), 24 deletions(-) diff --git a/crates/milli/src/attribute_patterns.rs b/crates/milli/src/attribute_patterns.rs index c7045c68e..00caa2a6d 100644 --- a/crates/milli/src/attribute_patterns.rs +++ b/crates/milli/src/attribute_patterns.rs @@ -55,7 +55,7 @@ fn match_pattern(pattern: &str, str: &str) -> PatternMatch { if pattern == "*" { return PatternMatch::Match; } else if pattern.starts_with('*') && pattern.ends_with('*') { - // If the starts and ends with a wildcard, return Match if the string contains the pattern without the wildcards + // If the pattern starts and ends with a wildcard, return Match if the string contains the pattern without the wildcards if str.contains(&pattern[1..pattern.len() - 1]) { return PatternMatch::Match; } diff --git a/crates/milli/src/fieldids_weights_map.rs b/crates/milli/src/fieldids_weights_map.rs index 57c99f77f..0c57ba109 100644 --- a/crates/milli/src/fieldids_weights_map.rs +++ b/crates/milli/src/fieldids_weights_map.rs @@ -43,11 +43,6 @@ impl FieldidsWeightsMap { self.map.get(&fid).copied() } - /// Returns highest weight contained in the map if any. - pub fn max_weight(&self) -> Option { - self.map.values().copied().max() - } - /// Return an iterator visiting all field ids in arbitrary order. pub fn ids(&self) -> impl Iterator + '_ { self.map.keys().copied() diff --git a/crates/milli/src/search/new/ranking_rule_graph/fid/mod.rs b/crates/milli/src/search/new/ranking_rule_graph/fid/mod.rs index e55f1febf..5f0c37cc3 100644 --- a/crates/milli/src/search/new/ranking_rule_graph/fid/mod.rs +++ b/crates/milli/src/search/new/ranking_rule_graph/fid/mod.rs @@ -92,10 +92,7 @@ impl RankingRuleGraphTrait for FidGraph { } // always lookup the max_fid if we don't already and add an artificial condition for max scoring - let max_weight = ctx - .index - .max_searchable_attribute_weight(ctx.txn)? - .or_else(|| weights_map.max_weight()); + let max_weight = ctx.index.max_searchable_attribute_weight(ctx.txn)?; if let Some(max_weight) = max_weight { if current_max_weight < max_weight { diff --git a/crates/milli/src/update/index_documents/transform.rs b/crates/milli/src/update/index_documents/transform.rs index b2ee21cbf..769e86b39 100644 --- a/crates/milli/src/update/index_documents/transform.rs +++ b/crates/milli/src/update/index_documents/transform.rs @@ -745,12 +745,14 @@ impl<'a, 'i> Transform<'a, 'i> { } else { let facet_operation = necessary_faceted_field(id); let searchable_operation = settings_diff.reindex_searchable_id(id); - let operation = facet_operation - // TODO: replace `zip.map` with `zip_with` once stable - .zip(searchable_operation) - .map(|(op1, op2)| op1.merge(op2)) - .or(facet_operation) - .or(searchable_operation); + let operation = match (facet_operation, searchable_operation) { + (Some(facet_operation), Some(searchable_operation)) => { + Some(facet_operation.merge(searchable_operation)) + } + (Some(operation), None) | (None, Some(operation)) => Some(operation), + (None, None) => None, + }; + if let Some(operation) = operation { operations.insert(id, operation); obkv_writer.insert(id, val)?; diff --git a/crates/milli/src/update/new/extract/faceted/extract_facets.rs b/crates/milli/src/update/new/extract/faceted/extract_facets.rs index 3201e23f9..05fcdf72a 100644 --- a/crates/milli/src/update/new/extract/faceted/extract_facets.rs +++ b/crates/milli/src/update/new/extract/faceted/extract_facets.rs @@ -30,10 +30,10 @@ pub struct FacetedExtractorData<'a, 'b> { sender: &'a FieldIdDocidFacetSender<'a, 'b>, grenad_parameters: &'a GrenadParameters, buckets: usize, - filterable_attributes: Vec, - sortable_fields: HashSet, - asc_desc_fields: HashSet, - distinct_field: Option, + filterable_attributes: &'a [FilterableAttributesRule], + sortable_fields: &'a HashSet, + asc_desc_fields: &'a HashSet, + distinct_field: &'a Option, is_geo_enabled: bool, } @@ -478,10 +478,10 @@ impl FacetedDocidsExtractor { grenad_parameters: indexing_context.grenad_parameters, buckets: rayon::current_num_threads(), sender, - filterable_attributes, - sortable_fields, - asc_desc_fields, - distinct_field, + filterable_attributes: &filterable_attributes, + sortable_fields: &sortable_fields, + asc_desc_fields: &asc_desc_fields, + distinct_field: &distinct_field, is_geo_enabled, }; extract( From c9a4c6ed964891d86323670034b0014ae861fe3a Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Mon, 10 Mar 2025 14:29:44 +0100 Subject: [PATCH 25/35] REvert metadata creation when computing filters at search time --- .../after_removing_the_documents.snap | 2 +- crates/meilisearch/tests/documents/errors.rs | 4 +- crates/meilisearch/tests/search/errors.rs | 4 +- crates/meilisearch/tests/search/filters.rs | 4 +- crates/meilisearch/tests/search/mod.rs | 4 +- crates/milli/src/error.rs | 4 +- .../milli/src/filterable_attributes_rules.rs | 32 +++++ crates/milli/src/search/facet/filter.rs | 112 +++++++++--------- 8 files changed, 96 insertions(+), 70 deletions(-) diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/after_removing_the_documents.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/after_removing_the_documents.snap index c28ea8b95..7c88e55b2 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/after_removing_the_documents.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/after_removing_the_documents.snap @@ -10,7 +10,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs 1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }} 2 {uid: 2, batch_uid: 2, status: succeeded, details: { received_document_ids: 1, deleted_documents: Some(1) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1"] }} 3 {uid: 3, batch_uid: 2, status: failed, error: ResponseError { code: 200, message: "Index `doggos`: Invalid type for filter subexpression: expected: String, Array, found: true.", error_code: "invalid_document_filter", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#invalid_document_filter" }, details: { original_filter: true, deleted_documents: Some(0) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: Bool(true) }} -4 {uid: 4, batch_uid: 2, status: failed, error: ResponseError { code: 200, message: "Index `doggos`: Attribute `id` is not filterable. Available filterable attributes are: `catto`.\n1:3 id = 2", error_code: "invalid_document_filter", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#invalid_document_filter" }, details: { original_filter: "id = 2", deleted_documents: Some(0) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: String("id = 2") }} +4 {uid: 4, batch_uid: 2, status: failed, error: ResponseError { code: 200, message: "Index `doggos`: Attribute `id` is not filterable. Available filterable attributes patterns are: `catto`.\n1:3 id = 2", error_code: "invalid_document_filter", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#invalid_document_filter" }, details: { original_filter: "id = 2", deleted_documents: Some(0) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: String("id = 2") }} 5 {uid: 5, batch_uid: 2, status: succeeded, details: { original_filter: "catto EXISTS", deleted_documents: Some(1) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: String("catto EXISTS") }} ---------------------------------------------------------------------- ### Status: diff --git a/crates/meilisearch/tests/documents/errors.rs b/crates/meilisearch/tests/documents/errors.rs index 7b2ca8b5e..73a3f2e4f 100644 --- a/crates/meilisearch/tests/documents/errors.rs +++ b/crates/meilisearch/tests/documents/errors.rs @@ -636,7 +636,7 @@ async fn delete_document_by_filter() { "originalFilter": "\"catto = jorts\"" }, "error": { - "message": "Index `SHARED_DOCUMENTS`: Attribute `catto` is not filterable. Available filterable attributes are: `id`, `title`.\n1:6 catto = jorts", + "message": "Index `SHARED_DOCUMENTS`: Attribute `catto` is not filterable. Available filterable attributes patterns are: `id`, `title`.\n1:6 catto = jorts", "code": "invalid_document_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_document_filter" @@ -738,7 +738,7 @@ async fn fetch_document_by_filter() { snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { - "message": "Attribute `doggo` is not filterable. Available filterable attributes are: `color`.\n1:6 doggo = bernese", + "message": "Attribute `doggo` is not filterable. Available filterable attributes patterns are: `color`.\n1:6 doggo = bernese", "code": "invalid_document_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_document_filter" diff --git a/crates/meilisearch/tests/search/errors.rs b/crates/meilisearch/tests/search/errors.rs index 0ea121a7d..46a03e56f 100644 --- a/crates/meilisearch/tests/search/errors.rs +++ b/crates/meilisearch/tests/search/errors.rs @@ -716,7 +716,7 @@ async fn filter_invalid_attribute_array() { |response, code| { snapshot!(response, @r###" { - "message": "Index `test`: Attribute `many` is not filterable. Available filterable attributes are: `title`.\n1:5 many = Glass", + "message": "Index `test`: Attribute `many` is not filterable. Available filterable attributes patterns are: `title`.\n1:5 many = Glass", "code": "invalid_search_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_filter" @@ -737,7 +737,7 @@ async fn filter_invalid_attribute_string() { |response, code| { snapshot!(response, @r###" { - "message": "Index `test`: Attribute `many` is not filterable. Available filterable attributes are: `title`.\n1:5 many = Glass", + "message": "Index `test`: Attribute `many` is not filterable. Available filterable attributes patterns are: `title`.\n1:5 many = Glass", "code": "invalid_search_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_filter" diff --git a/crates/meilisearch/tests/search/filters.rs b/crates/meilisearch/tests/search/filters.rs index 4ee280646..fac3bbebc 100644 --- a/crates/meilisearch/tests/search/filters.rs +++ b/crates/meilisearch/tests/search/filters.rs @@ -720,7 +720,7 @@ async fn test_filterable_attributes_priority() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Index `test`: Attribute `doggos.age` is not filterable. Available filterable attributes are: `doggos.name`.\n1:11 doggos.age > 2", + "message": "Index `test`: Attribute `doggos.age` is not filterable. Available filterable attributes patterns are: `doggos.*`.\n1:11 doggos.age > 2", "code": "invalid_search_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_filter" @@ -746,7 +746,7 @@ async fn test_filterable_attributes_priority() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Index `test`: Attribute `doggos` is not filterable. Available filterable attributes are: `doggos.age`, `doggos.name`.\n1:7 doggos EXISTS", + "message": "Index `test`: Attribute `doggos` is not filterable. Available filterable attributes patterns are: `doggos.*`.\n1:7 doggos EXISTS", "code": "invalid_search_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_filter" diff --git a/crates/meilisearch/tests/search/mod.rs b/crates/meilisearch/tests/search/mod.rs index 2f3e60f34..dc6048ea2 100644 --- a/crates/meilisearch/tests/search/mod.rs +++ b/crates/meilisearch/tests/search/mod.rs @@ -1753,7 +1753,7 @@ async fn test_nested_fields() { assert_eq!(code, 400, "{}", response); snapshot!(json_string!(response), @r###" { - "message": "Index `test`: Attribute `nested` is not filterable. Available filterable attributes are: `nested.machin`, `nested.object`, `title`.\n1:7 nested = array", + "message": "Index `test`: Attribute `nested` is not filterable. Available filterable attributes patterns are: `nested.machin`, `nested.object`, `title`.\n1:7 nested = array", "code": "invalid_search_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_filter" @@ -1772,7 +1772,7 @@ async fn test_nested_fields() { assert_eq!(code, 400, "{}", response); snapshot!(json_string!(response), @r###" { - "message": "Index `test`: Attribute `nested` is not filterable. Available filterable attributes are: `nested.machin`, `nested.object`, `title`.\n1:7 nested = \"I lied\"", + "message": "Index `test`: Attribute `nested` is not filterable. Available filterable attributes patterns are: `nested.machin`, `nested.object`, `title`.\n1:7 nested = \"I lied\"", "code": "invalid_search_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_filter" diff --git a/crates/milli/src/error.rs b/crates/milli/src/error.rs index 3121f5405..bfcb4f780 100644 --- a/crates/milli/src/error.rs +++ b/crates/milli/src/error.rs @@ -138,12 +138,12 @@ and can not be more than 511 bytes.", .document_id.to_string() InvalidFilter(String), #[error("Invalid type for filter subexpression: expected: {}, found: {}.", .0.join(", "), .1)] InvalidFilterExpression(&'static [&'static str], Value), - #[error("Filter operator `{operator}` is not allowed for the attribute `{field}`.\n - Note: allowed operators: {}.\n - Note: field `{field}` {} in `filterableAttributes`", allowed_operators.join(", "), rule_index.map_or("did not match any rule".to_string(), |rule_index| format!("matched rule #{rule_index}")))] + #[error("Filter operator `{operator}` is not allowed for the attribute `{field}`.\n - Note: allowed operators: {}.\n - Note: field `{field}` {} in `filterableAttributes`", allowed_operators.join(", "), format!("matched rule #{rule_index}"))] FilterOperatorNotAllowed { field: String, allowed_operators: Vec, operator: String, - rule_index: Option, + rule_index: usize, }, #[error("Attribute `{}` is not sortable. {}", .field, diff --git a/crates/milli/src/filterable_attributes_rules.rs b/crates/milli/src/filterable_attributes_rules.rs index 50f9b8e9d..d70734567 100644 --- a/crates/milli/src/filterable_attributes_rules.rs +++ b/crates/milli/src/filterable_attributes_rules.rs @@ -249,6 +249,38 @@ pub fn filtered_matching_field_names<'fim>( result } +/// Match a field against a set of filterable attributes rules. +/// +/// This function will return the set of patterns that match the given filter. +/// +/// # Arguments +/// +/// * `filterable_attributes` - The set of filterable attributes rules to match against. +/// * `filter` - The filter function to apply to the filterable attributes rules. +pub fn filtered_matching_patterns<'patterns>( + filterable_attributes: &'patterns [FilterableAttributesRule], + filter: &impl Fn(FilterableAttributesFeatures) -> bool, +) -> BTreeSet<&'patterns str> { + let mut result = BTreeSet::new(); + + for rule in filterable_attributes { + if filter(rule.features()) { + match rule { + FilterableAttributesRule::Field(field) => { + result.insert(field.as_str()); + } + FilterableAttributesRule::Pattern(patterns) => { + patterns.attribute_patterns.patterns.iter().for_each(|pattern| { + result.insert(pattern); + }); + } + } + } + } + + result +} + /// Match a field against a set of filterable attributes rules. /// /// This function will return the features that match the given field name. diff --git a/crates/milli/src/search/facet/filter.rs b/crates/milli/src/search/facet/filter.rs index b8c9cddfc..4bf357239 100644 --- a/crates/milli/src/search/facet/filter.rs +++ b/crates/milli/src/search/facet/filter.rs @@ -12,15 +12,17 @@ use serde_json::Value; use super::facet_range_search; use crate::constants::RESERVED_GEO_FIELD_NAME; use crate::error::{Error, UserError}; -use crate::fields_ids_map::metadata::FieldIdMapWithMetadata; -use crate::filterable_attributes_rules::{filtered_matching_field_names, is_field_filterable}; +use crate::filterable_attributes_rules::{ + filtered_matching_patterns, is_field_filterable, matching_features, +}; use crate::heed_codec::facet::{ FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, OrderedF64Codec, }; use crate::index::db_name::FACET_ID_STRING_DOCIDS; use crate::{ - distance_between_two_points, lat_lng_to_xyz, FieldId, FilterableAttributesFeatures, - FilterableAttributesRule, Index, InternalError, Result, SerializationError, + distance_between_two_points, lat_lng_to_xyz, FieldId, FieldsIdsMap, + FilterableAttributesFeatures, FilterableAttributesRule, Index, InternalError, Result, + SerializationError, }; /// The maximum number of filters the filter AST can process. @@ -62,7 +64,7 @@ impl Display for BadGeoError { #[derive(Debug)] enum FilterError<'a> { - AttributeNotFilterable { attribute: &'a str, filterable_fields: BTreeSet<&'a str> }, + AttributeNotFilterable { attribute: &'a str, filterable_patterns: BTreeSet<&'a str> }, ParseGeoError(BadGeoError), TooDeep, } @@ -77,14 +79,14 @@ impl<'a> From for FilterError<'a> { impl<'a> Display for FilterError<'a> { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { - Self::AttributeNotFilterable { attribute, filterable_fields } => { + Self::AttributeNotFilterable { attribute, filterable_patterns } => { write!(f, "Attribute `{attribute}` is not filterable.")?; - if filterable_fields.is_empty() { + if filterable_patterns.is_empty() { write!(f, " This index does not have configured filterable attributes.") } else { - write!(f, " Available filterable attributes are: ")?; + write!(f, " Available filterable attributes patterns are: ")?; let mut filterables_list = - filterable_fields.iter().map(AsRef::as_ref).collect::>(); + filterable_patterns.iter().map(AsRef::as_ref).collect::>(); filterables_list.sort_unstable(); for (idx, filterable) in filterables_list.iter().enumerate() { write!(f, "`{filterable}`")?; @@ -232,27 +234,19 @@ impl<'a> Filter<'a> { impl<'a> Filter<'a> { pub fn evaluate(&self, rtxn: &heed::RoTxn<'_>, index: &Index) -> Result { // to avoid doing this for each recursive call we're going to do it ONCE ahead of time - let fields_ids_map = index.fields_ids_map_with_metadata(rtxn)?; + let fields_ids_map = index.fields_ids_map(rtxn)?; let filterable_attributes_rules = index.filterable_attributes_rules(rtxn)?; for fid in self.condition.fids(MAX_FILTER_DEPTH) { let attribute = fid.value(); - if let Some((_, metadata)) = fields_ids_map.id_with_metadata(fid.value()) { - if metadata - .filterable_attributes_features(&filterable_attributes_rules) - .is_filterable() - { - continue; - } - } else if is_field_filterable(attribute, &filterable_attributes_rules) { + if is_field_filterable(attribute, &filterable_attributes_rules) { continue; } // If the field is not filterable, return an error return Err(fid.as_external_error(FilterError::AttributeNotFilterable { attribute, - filterable_fields: filtered_matching_field_names( + filterable_patterns: filtered_matching_patterns( &filterable_attributes_rules, - &fields_ids_map, &|features| features.is_filterable(), ), }))?; @@ -268,7 +262,7 @@ impl<'a> Filter<'a> { universe: Option<&RoaringBitmap>, operator: &Condition<'a>, features: &FilterableAttributesFeatures, - rule_index: Option, + rule_index: usize, ) -> Result { let numbers_db = index.facet_id_f64_docids; let strings_db = index.facet_id_string_docids; @@ -463,7 +457,7 @@ impl<'a> Filter<'a> { &self, rtxn: &heed::RoTxn<'_>, index: &Index, - field_ids_map: &FieldIdMapWithMetadata, + field_ids_map: &FieldsIdsMap, filterable_attribute_rules: &[FilterableAttributesRule], universe: Option<&RoaringBitmap>, ) -> Result { @@ -489,34 +483,36 @@ impl<'a> Filter<'a> { } } } - FilterCondition::In { fid, els } => match field_ids_map.id_with_metadata(fid.value()) { - Some((fid, metadata)) => { - let (rule_index, features) = metadata - .filterable_attributes_features_with_rule_index(filterable_attribute_rules); - els.iter() - .map(|el| Condition::Equal(el.clone())) - .map(|op| { - Self::evaluate_operator( - rtxn, index, fid, universe, &op, &features, rule_index, - ) - }) - .union() - } - None => Ok(RoaringBitmap::new()), - }, - FilterCondition::Condition { fid, op } => { - match field_ids_map.id_with_metadata(fid.value()) { - Some((fid, metadata)) => { - let (rule_index, features) = metadata - .filterable_attributes_features_with_rule_index( - filterable_attribute_rules, - ); + FilterCondition::In { fid, els } => { + let Some(field_id) = field_ids_map.id(fid.value()) else { + return Ok(RoaringBitmap::new()); + }; + let Some((rule_index, features)) = + matching_features(fid.value(), filterable_attribute_rules) + else { + return Ok(RoaringBitmap::new()); + }; + + els.iter() + .map(|el| Condition::Equal(el.clone())) + .map(|op| { Self::evaluate_operator( - rtxn, index, fid, universe, op, &features, rule_index, + rtxn, index, field_id, universe, &op, &features, rule_index, ) - } - None => Ok(RoaringBitmap::new()), - } + }) + .union() + } + FilterCondition::Condition { fid, op } => { + let Some(field_id) = field_ids_map.id(fid.value()) else { + return Ok(RoaringBitmap::new()); + }; + let Some((rule_index, features)) = + matching_features(fid.value(), filterable_attribute_rules) + else { + return Ok(RoaringBitmap::new()); + }; + + Self::evaluate_operator(rtxn, index, field_id, universe, op, &features, rule_index) } FilterCondition::Or(subfilters) => subfilters .iter() @@ -595,9 +591,8 @@ impl<'a> Filter<'a> { } else { Err(point[0].as_external_error(FilterError::AttributeNotFilterable { attribute: RESERVED_GEO_FIELD_NAME, - filterable_fields: filtered_matching_field_names( + filterable_patterns: filtered_matching_patterns( filterable_attribute_rules, - &field_ids_map, &|features| features.is_filterable(), ), }))? @@ -736,9 +731,8 @@ impl<'a> Filter<'a> { Err(top_right_point[0].as_external_error( FilterError::AttributeNotFilterable { attribute: RESERVED_GEO_FIELD_NAME, - filterable_fields: filtered_matching_field_names( + filterable_patterns: filtered_matching_patterns( filterable_attribute_rules, - &field_ids_map, &|features| features.is_filterable(), ), }, @@ -755,7 +749,7 @@ fn generate_filter_error( field_id: FieldId, operator: &Condition<'_>, features: &FilterableAttributesFeatures, - rule_index: Option, + rule_index: usize, ) -> Error { match index.fields_ids_map(rtxn) { Ok(fields_ids_map) => { @@ -917,42 +911,42 @@ mod tests { let filter = Filter::from_str("_geoRadius(-100, 150, 10)").unwrap().unwrap(); let error = filter.evaluate(&rtxn, &index).unwrap_err(); snapshot!(error.to_string(), @r###" - Attribute `_geo` is not filterable. This index does not have configured filterable attributes. + Attribute `_geo` is not filterable. Available filterable attributes patterns are: `title`. 12:16 _geoRadius(-100, 150, 10) "###); let filter = Filter::from_str("_geoBoundingBox([42, 150], [30, 10])").unwrap().unwrap(); let error = filter.evaluate(&rtxn, &index).unwrap_err(); snapshot!(error.to_string(), @r###" - Attribute `_geo` is not filterable. This index does not have configured filterable attributes. + Attribute `_geo` is not filterable. Available filterable attributes patterns are: `title`. 18:20 _geoBoundingBox([42, 150], [30, 10]) "###); let filter = Filter::from_str("name = 12").unwrap().unwrap(); let error = filter.evaluate(&rtxn, &index).unwrap_err(); snapshot!(error.to_string(), @r###" - Attribute `name` is not filterable. This index does not have configured filterable attributes. + Attribute `name` is not filterable. Available filterable attributes patterns are: `title`. 1:5 name = 12 "###); let filter = Filter::from_str("title = \"test\" AND name = 12").unwrap().unwrap(); let error = filter.evaluate(&rtxn, &index).unwrap_err(); snapshot!(error.to_string(), @r###" - Attribute `name` is not filterable. This index does not have configured filterable attributes. + Attribute `name` is not filterable. Available filterable attributes patterns are: `title`. 20:24 title = "test" AND name = 12 "###); let filter = Filter::from_str("title = \"test\" AND name IN [12]").unwrap().unwrap(); let error = filter.evaluate(&rtxn, &index).unwrap_err(); snapshot!(error.to_string(), @r###" - Attribute `name` is not filterable. This index does not have configured filterable attributes. + Attribute `name` is not filterable. Available filterable attributes patterns are: `title`. 20:24 title = "test" AND name IN [12] "###); let filter = Filter::from_str("title = \"test\" AND name != 12").unwrap().unwrap(); let error = filter.evaluate(&rtxn, &index).unwrap_err(); snapshot!(error.to_string(), @r###" - Attribute `name` is not filterable. This index does not have configured filterable attributes. + Attribute `name` is not filterable. Available filterable attributes patterns are: `title`. 20:24 title = "test" AND name != 12 "###); } From b12ffd13569e1c90f7ae1b3a45211eec4594b0e2 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Mon, 10 Mar 2025 13:55:54 +0100 Subject: [PATCH 26/35] Remove filter pre-check --- crates/milli/src/search/facet/filter.rs | 53 ++++++++++++------------- 1 file changed, 25 insertions(+), 28 deletions(-) diff --git a/crates/milli/src/search/facet/filter.rs b/crates/milli/src/search/facet/filter.rs index 4bf357239..9844809e9 100644 --- a/crates/milli/src/search/facet/filter.rs +++ b/crates/milli/src/search/facet/filter.rs @@ -12,9 +12,7 @@ use serde_json::Value; use super::facet_range_search; use crate::constants::RESERVED_GEO_FIELD_NAME; use crate::error::{Error, UserError}; -use crate::filterable_attributes_rules::{ - filtered_matching_patterns, is_field_filterable, matching_features, -}; +use crate::filterable_attributes_rules::{filtered_matching_patterns, matching_features}; use crate::heed_codec::facet::{ FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, OrderedF64Codec, }; @@ -233,24 +231,8 @@ impl<'a> Filter<'a> { impl<'a> Filter<'a> { pub fn evaluate(&self, rtxn: &heed::RoTxn<'_>, index: &Index) -> Result { - // to avoid doing this for each recursive call we're going to do it ONCE ahead of time let fields_ids_map = index.fields_ids_map(rtxn)?; let filterable_attributes_rules = index.filterable_attributes_rules(rtxn)?; - for fid in self.condition.fids(MAX_FILTER_DEPTH) { - let attribute = fid.value(); - if is_field_filterable(attribute, &filterable_attributes_rules) { - continue; - } - - // If the field is not filterable, return an error - return Err(fid.as_external_error(FilterError::AttributeNotFilterable { - attribute, - filterable_patterns: filtered_matching_patterns( - &filterable_attributes_rules, - &|features| features.is_filterable(), - ), - }))?; - } self.inner_evaluate(rtxn, index, &fields_ids_map, &filterable_attributes_rules, None) } @@ -484,15 +466,22 @@ impl<'a> Filter<'a> { } } FilterCondition::In { fid, els } => { + let Some((rule_index, features)) = + matching_features(fid.value(), filterable_attribute_rules) + .filter(|(_, features)| features.is_filterable()) + else { + // If the field is not filterable, return an error + return Err(fid.as_external_error(FilterError::AttributeNotFilterable { + attribute: fid.value(), + filterable_patterns: filtered_matching_patterns( + filterable_attribute_rules, + &|features| features.is_filterable(), + ), + }))?; + }; let Some(field_id) = field_ids_map.id(fid.value()) else { return Ok(RoaringBitmap::new()); }; - let Some((rule_index, features)) = - matching_features(fid.value(), filterable_attribute_rules) - else { - return Ok(RoaringBitmap::new()); - }; - els.iter() .map(|el| Condition::Equal(el.clone())) .map(|op| { @@ -503,12 +492,20 @@ impl<'a> Filter<'a> { .union() } FilterCondition::Condition { fid, op } => { - let Some(field_id) = field_ids_map.id(fid.value()) else { - return Ok(RoaringBitmap::new()); - }; let Some((rule_index, features)) = matching_features(fid.value(), filterable_attribute_rules) + .filter(|(_, features)| features.is_filterable()) else { + // If the field is not filterable, return an error + return Err(fid.as_external_error(FilterError::AttributeNotFilterable { + attribute: fid.value(), + filterable_patterns: filtered_matching_patterns( + filterable_attribute_rules, + &|features| features.is_filterable(), + ), + }))?; + }; + let Some(field_id) = field_ids_map.id(fid.value()) else { return Ok(RoaringBitmap::new()); }; From abef65584919f462c75e8d478081ed7dd1683d21 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Mon, 10 Mar 2025 15:27:17 +0100 Subject: [PATCH 27/35] Revert metadata creation when computing facet search and distinct --- .../meilisearch/tests/search/facet_search.rs | 2 +- crates/milli/src/error.rs | 20 ++++--- .../milli/src/filterable_attributes_rules.rs | 53 ------------------- crates/milli/src/search/facet/search.rs | 52 +++++++----------- crates/milli/src/search/mod.rs | 22 ++++---- .../milli/src/update/index_documents/mod.rs | 52 +----------------- 6 files changed, 45 insertions(+), 156 deletions(-) diff --git a/crates/meilisearch/tests/search/facet_search.rs b/crates/meilisearch/tests/search/facet_search.rs index 25f894757..45b7a381a 100644 --- a/crates/meilisearch/tests/search/facet_search.rs +++ b/crates/meilisearch/tests/search/facet_search.rs @@ -548,7 +548,7 @@ async fn facet_search_with_filterable_attributes_rules_errors() { &json!({"facetName": "invalid", "facetQuery": "a"}), |response, code| { snapshot!(code, @"400 Bad Request"); - snapshot!(response["message"], @r###""Attribute `invalid` is not facet-searchable. Available facet-searchable attributes are: `genres`. To make it facet-searchable add it to the `filterableAttributes` index settings.""###); + snapshot!(response["message"], @r###""Attribute `invalid` is not facet-searchable. Available facet-searchable attributes patterns are: `genres`. To make it facet-searchable add it to the `filterableAttributes` index settings.""###); }, ) .await; diff --git a/crates/milli/src/error.rs b/crates/milli/src/error.rs index bfcb4f780..67a770148 100644 --- a/crates/milli/src/error.rs +++ b/crates/milli/src/error.rs @@ -158,28 +158,32 @@ and can not be more than 511 bytes.", .document_id.to_string() InvalidSortableAttribute { field: String, valid_fields: BTreeSet, hidden_fields: bool }, #[error("Attribute `{}` is not filterable and thus, cannot be used as distinct attribute. {}", .field, - match .valid_fields.is_empty() { + match .valid_patterns.is_empty() { true => "This index does not have configured filterable attributes.".to_string(), - false => format!("Available filterable attributes are: `{}{}`.", - valid_fields.iter().map(AsRef::as_ref).collect::>().join(", "), + false => format!("Available filterable attributes patterns are: `{}{}`.", + valid_patterns.iter().map(AsRef::as_ref).collect::>().join(", "), .hidden_fields.then_some(", <..hidden-attributes>").unwrap_or(""), ), } )] - InvalidDistinctAttribute { field: String, valid_fields: BTreeSet, hidden_fields: bool }, + InvalidDistinctAttribute { + field: String, + valid_patterns: BTreeSet, + hidden_fields: bool, + }, #[error("Attribute `{}` is not facet-searchable. {}", .field, - match .valid_fields.is_empty() { + match .valid_patterns.is_empty() { true => "This index does not have configured facet-searchable attributes. To make it facet-searchable add it to the `filterableAttributes` index settings.".to_string(), - false => format!("Available facet-searchable attributes are: `{}{}`. To make it facet-searchable add it to the `filterableAttributes` index settings.", - valid_fields.iter().map(AsRef::as_ref).collect::>().join(", "), + false => format!("Available facet-searchable attributes patterns are: `{}{}`. To make it facet-searchable add it to the `filterableAttributes` index settings.", + valid_patterns.iter().map(AsRef::as_ref).collect::>().join(", "), .hidden_fields.then_some(", <..hidden-attributes>").unwrap_or(""), ), } )] InvalidFacetSearchFacetName { field: String, - valid_fields: BTreeSet, + valid_patterns: BTreeSet, hidden_fields: bool, }, #[error("Attribute `{}` is not searchable. Available searchable attributes are: `{}{}`.", diff --git a/crates/milli/src/filterable_attributes_rules.rs b/crates/milli/src/filterable_attributes_rules.rs index d70734567..ab20971a4 100644 --- a/crates/milli/src/filterable_attributes_rules.rs +++ b/crates/milli/src/filterable_attributes_rules.rs @@ -6,7 +6,6 @@ use utoipa::ToSchema; use crate::{ attribute_patterns::{match_distinct_field, match_field_legacy, PatternMatch}, constants::RESERVED_GEO_FIELD_NAME, - fields_ids_map::metadata::FieldIdMapWithMetadata, AttributePatterns, }; @@ -225,30 +224,6 @@ impl Default for FilterFeatures { } } -/// Match a field against a set of filterable attributes rules. -/// -/// This function will return the set of field names that match the given filter. -/// -/// # Arguments -/// -/// * `filterable_attributes` - The set of filterable attributes rules to match against. -/// * `fields_ids_map` - The map of field names to field ids. -/// * `filter` - The filter function to apply to the filterable attributes rules. -pub fn filtered_matching_field_names<'fim>( - filterable_attributes: &[FilterableAttributesRule], - fields_ids_map: &'fim FieldIdMapWithMetadata, - filter: &impl Fn(FilterableAttributesFeatures) -> bool, -) -> BTreeSet<&'fim str> { - let mut result = BTreeSet::new(); - for (_, field_name, metadata) in fields_ids_map.iter() { - let features = metadata.filterable_attributes_features(filterable_attributes); - if filter(features) { - result.insert(field_name); - } - } - result -} - /// Match a field against a set of filterable attributes rules. /// /// This function will return the set of patterns that match the given filter. @@ -306,34 +281,6 @@ pub fn matching_features( None } -/// Check if a field is filterable calling the method `FilterableAttributesFeatures::is_filterable()`. -/// -/// # Arguments -/// -/// * `field_name` - The field name to check. -/// * `filterable_attributes` - The set of filterable attributes rules to match against. -pub fn is_field_filterable( - field_name: &str, - filterable_attributes: &[FilterableAttributesRule], -) -> bool { - matching_features(field_name, filterable_attributes) - .map_or(false, |(_, features)| features.is_filterable()) -} - -/// Check if a field is facet searchable calling the method `FilterableAttributesFeatures::is_facet_searchable()`. -/// -/// # Arguments -/// -/// * `field_name` - The field name to check. -/// * `filterable_attributes` - The set of filterable attributes rules to match against. -pub fn is_field_facet_searchable( - field_name: &str, - filterable_attributes: &[FilterableAttributesRule], -) -> bool { - matching_features(field_name, filterable_attributes) - .map_or(false, |(_, features)| features.is_facet_searchable()) -} - /// Match a field against a set of filterable, facet searchable fields, distinct field, sortable fields, and asc_desc fields. pub fn match_faceted_field( field_name: &str, diff --git a/crates/milli/src/search/facet/search.rs b/crates/milli/src/search/facet/search.rs index da1e1610b..719028a24 100644 --- a/crates/milli/src/search/facet/search.rs +++ b/crates/milli/src/search/facet/search.rs @@ -10,9 +10,7 @@ use roaring::RoaringBitmap; use tracing::error; use crate::error::UserError; -use crate::filterable_attributes_rules::{ - filtered_matching_field_names, is_field_facet_searchable, -}; +use crate::filterable_attributes_rules::{filtered_matching_patterns, matching_features}; use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue}; use crate::search::build_dfa; use crate::{DocumentId, FieldId, OrderBy, Result, Search}; @@ -77,37 +75,27 @@ impl<'a> SearchForFacetValues<'a> { let rtxn = self.search_query.rtxn; let filterable_attributes_rules = index.filterable_attributes_rules(rtxn)?; - let fields_ids_map = index.fields_ids_map_with_metadata(rtxn)?; - let fid = match fields_ids_map.id_with_metadata(&self.facet) { - Some((fid, metadata)) - if metadata - .filterable_attributes_features(&filterable_attributes_rules) - .is_facet_searchable() => - { - fid - } - // we return an empty list of results when the attribute has been - // set as filterable but no document contains this field (yet). - None if is_field_facet_searchable(&self.facet, &filterable_attributes_rules) => { - return Ok(Vec::new()); - } - // we return an error when the attribute is not facet searchable - _otherwise => { - let matching_field_names = filtered_matching_field_names( - &filterable_attributes_rules, - &fields_ids_map, - &|features| features.is_facet_searchable(), - ); - let (valid_fields, hidden_fields) = - index.remove_hidden_fields(rtxn, matching_field_names)?; + if !matching_features(&self.facet, &filterable_attributes_rules) + .map_or(false, |(_, features)| features.is_facet_searchable()) + { + let matching_field_names = + filtered_matching_patterns(&filterable_attributes_rules, &|features| { + features.is_facet_searchable() + }); + let (valid_patterns, hidden_fields) = + index.remove_hidden_fields(rtxn, matching_field_names)?; - return Err(UserError::InvalidFacetSearchFacetName { - field: self.facet.clone(), - valid_fields, - hidden_fields, - } - .into()); + return Err(UserError::InvalidFacetSearchFacetName { + field: self.facet.clone(), + valid_patterns, + hidden_fields, } + .into()); + }; + + let fields_ids_map = index.fields_ids_map(rtxn)?; + let Some(fid) = fields_ids_map.id(&self.facet) else { + return Ok(Vec::new()); }; let fst = match self.search_query.index.facet_id_string_fst.get(rtxn, &fid)? { diff --git a/crates/milli/src/search/mod.rs b/crates/milli/src/search/mod.rs index 7d98f3453..694a872c4 100644 --- a/crates/milli/src/search/mod.rs +++ b/crates/milli/src/search/mod.rs @@ -9,7 +9,7 @@ use roaring::bitmap::RoaringBitmap; pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET}; pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords}; use self::new::{execute_vector_search, PartialSearchResult}; -use crate::filterable_attributes_rules::{filtered_matching_field_names, is_field_filterable}; +use crate::filterable_attributes_rules::{filtered_matching_patterns, matching_features}; use crate::score_details::{ScoreDetails, ScoringStrategy}; use crate::vector::Embedder; use crate::{ @@ -190,20 +190,20 @@ impl<'a> Search<'a> { if let Some(distinct) = &self.distinct { let filterable_fields = ctx.index.filterable_attributes_rules(ctx.txn)?; // check if the distinct field is in the filterable fields - if !is_field_filterable(distinct, &filterable_fields) { + if !matching_features(distinct, &filterable_fields) + .map_or(false, |(_, features)| features.is_filterable()) + { // if not, remove the hidden fields from the filterable fields to generate the error message - let fields_ids_map = ctx.index.fields_ids_map_with_metadata(ctx.txn)?; - let matching_field_names = filtered_matching_field_names( - &filterable_fields, - &fields_ids_map, - &|features| features.is_filterable(), - ); - let (valid_fields, hidden_fields) = - ctx.index.remove_hidden_fields(ctx.txn, matching_field_names)?; + let matching_patterns = + filtered_matching_patterns(&filterable_fields, &|features| { + features.is_filterable() + }); + let (valid_patterns, hidden_fields) = + ctx.index.remove_hidden_fields(ctx.txn, matching_patterns)?; // and return the error return Err(Error::UserError(UserError::InvalidDistinctAttribute { field: distinct.clone(), - valid_fields, + valid_patterns, hidden_fields, })); } diff --git a/crates/milli/src/update/index_documents/mod.rs b/crates/milli/src/update/index_documents/mod.rs index 5ec8b1c7c..2ae3fa4dd 100644 --- a/crates/milli/src/update/index_documents/mod.rs +++ b/crates/milli/src/update/index_documents/mod.rs @@ -770,12 +770,11 @@ mod tests { use bumpalo::Bump; use fst::IntoStreamer; use heed::RwTxn; - use maplit::{btreeset, hashset}; + use maplit::hashset; use super::*; use crate::constants::RESERVED_GEO_FIELD_NAME; use crate::documents::mmap_from_objects; - use crate::filterable_attributes_rules::filtered_matching_field_names; use crate::index::tests::TempIndex; use crate::index::IndexEmbeddingConfig; use crate::progress::Progress; @@ -1255,14 +1254,6 @@ mod tests { let rtxn = index.read_txn().unwrap(); - let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap(); - let fields_ids_map = index.fields_ids_map_with_metadata(&rtxn).unwrap(); - let facets = - filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| { - features.is_filterable() - }); - assert_eq!(facets, btreeset!("title", "nested.object", "nested.machin")); - // testing the simple query search let mut search = crate::Search::new(&rtxn, &index); search.query("document"); @@ -1478,15 +1469,6 @@ mod tests { let rtxn = index.read_txn().unwrap(); - let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap(); - let fields_ids_map = index.fields_ids_map_with_metadata(&rtxn).unwrap(); - let facets = - filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| { - features.is_filterable() - }); - - assert_eq!(facets, btreeset!("dog", "dog.race", "dog.race.bernese mountain")); - for (s, i) in [("zeroth", 0), ("first", 1), ("second", 2), ("third", 3)] { let mut search = crate::Search::new(&rtxn, &index); let filter = format!(r#""dog.race.bernese mountain" = {s}"#); @@ -1504,17 +1486,6 @@ mod tests { db_snap!(index, facet_id_string_docids, @""); db_snap!(index, field_id_docid_facet_strings, @""); - let rtxn = index.read_txn().unwrap(); - - let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap(); - let fields_ids_map = index.fields_ids_map_with_metadata(&rtxn).unwrap(); - let facets = - filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| { - features.is_filterable() - }); - - assert_eq!(facets, btreeset!()); - // update the settings to test the sortable index .update_settings(|settings| { @@ -1744,13 +1715,6 @@ mod tests { let check_ok = |index: &Index| { let rtxn = index.read_txn().unwrap(); - let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap(); - let fields_ids_map = index.fields_ids_map_with_metadata(&rtxn).unwrap(); - let facets = - filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| { - features.is_filterable() - }); - assert_eq!(facets, btreeset!("colour", "colour.green", "colour.green.blue")); let colour_id = index.fields_ids_map(&rtxn).unwrap().id("colour").unwrap(); let colour_green_id = index.fields_ids_map(&rtxn).unwrap().id("colour.green").unwrap(); @@ -1855,13 +1819,6 @@ mod tests { let check_ok = |index: &Index| { let rtxn = index.read_txn().unwrap(); - let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap(); - let fields_ids_map = index.fields_ids_map_with_metadata(&rtxn).unwrap(); - let facets = - filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| { - features.is_filterable() - }); - assert_eq!(facets, btreeset!("colour", "colour.green", "colour.green.blue")); let colour_id = index.fields_ids_map(&rtxn).unwrap().id("colour").unwrap(); let colour_green_id = index.fields_ids_map(&rtxn).unwrap().id("colour.green").unwrap(); @@ -1924,13 +1881,6 @@ mod tests { let check_ok = |index: &Index| { let rtxn = index.read_txn().unwrap(); - let filterable_fields = index.filterable_attributes_rules(&rtxn).unwrap(); - let fields_ids_map = index.fields_ids_map_with_metadata(&rtxn).unwrap(); - let facets = - filtered_matching_field_names(&filterable_fields, &fields_ids_map, &|features| { - features.is_filterable() - }); - assert_eq!(facets, btreeset!("tags", "tags.green", "tags.green.blue")); let tags_id = index.fields_ids_map(&rtxn).unwrap().id("tags").unwrap(); let tags_green_id = index.fields_ids_map(&rtxn).unwrap().id("tags.green").unwrap(); From 40c5f911fd1ae75607ce5bb2bfdd88eb6b19ad26 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Mon, 10 Mar 2025 17:05:41 +0100 Subject: [PATCH 28/35] Revert metadata creation when computing the facet-distribution --- crates/meilisearch/tests/search/errors.rs | 67 ++++++++++--------- crates/meilisearch/tests/search/multi/mod.rs | 4 +- crates/meilisearch/tests/similar/errors.rs | 34 +++++----- crates/milli/src/error.rs | 18 ++--- .../src/search/facet/facet_distribution.rs | 55 ++++++--------- 5 files changed, 85 insertions(+), 93 deletions(-) diff --git a/crates/meilisearch/tests/search/errors.rs b/crates/meilisearch/tests/search/errors.rs index 46a03e56f..8561aa490 100644 --- a/crates/meilisearch/tests/search/errors.rs +++ b/crates/meilisearch/tests/search/errors.rs @@ -434,7 +434,7 @@ async fn search_non_filterable_facets() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Invalid facet distribution, attribute `doggo` is not filterable. The available filterable attribute is `title`.", + "message": "Invalid facet distribution, attribute `doggo` is not filterable. The available filterable attribute pattern is `title`.", "code": "invalid_search_facets", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_facets" @@ -445,7 +445,7 @@ async fn search_non_filterable_facets() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Invalid facet distribution, attribute `doggo` is not filterable. The available filterable attribute is `title`.", + "message": "Invalid facet distribution, attribute `doggo` is not filterable. The available filterable attribute pattern is `title`.", "code": "invalid_search_facets", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_facets" @@ -468,7 +468,7 @@ async fn search_non_filterable_facets_multiple_filterable() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Invalid facet distribution, attribute `doggo` is not filterable. The available filterable attributes are `genres, title`.", + "message": "Invalid facet distribution, attribute `doggo` is not filterable. The available filterable attribute patterns are `genres, title`.", "code": "invalid_search_facets", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_facets" @@ -479,7 +479,7 @@ async fn search_non_filterable_facets_multiple_filterable() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Invalid facet distribution, attribute `doggo` is not filterable. The available filterable attributes are `genres, title`.", + "message": "Invalid facet distribution, attribute `doggo` is not filterable. The available filterable attribute patterns are `genres, title`.", "code": "invalid_search_facets", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_facets" @@ -532,7 +532,7 @@ async fn search_non_filterable_facets_multiple_facets() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Invalid facet distribution, attributes `doggo, neko` are not filterable. The available filterable attributes are `genres, title`.", + "message": "Invalid facet distribution, attributes `doggo, neko` are not filterable. The available filterable attribute patterns are `genres, title`.", "code": "invalid_search_facets", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_facets" @@ -543,7 +543,7 @@ async fn search_non_filterable_facets_multiple_facets() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Invalid facet distribution, attributes `doggo, neko` are not filterable. The available filterable attributes are `genres, title`.", + "message": "Invalid facet distribution, attributes `doggo, neko` are not filterable. The available filterable attribute patterns are `genres, title`.", "code": "invalid_search_facets", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_facets" @@ -1204,52 +1204,55 @@ async fn search_on_unknown_field_plus_joker() { #[actix_rt::test] async fn distinct_at_search_time() { - let server = Server::new_shared(); - let index = server.unique_index(); + let server = Server::new().await; + let index = server.index("test"); let (task, _) = index.create(None).await; index.wait_task(task.uid()).await.succeeded(); let (response, _code) = index.add_documents(json!([{"id": 1, "color": "Doggo", "machin": "Action"}]), None).await; index.wait_task(response.uid()).await.succeeded(); - let expected_response = json!({ - "message": format!("Index `{}`: Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. This index does not have configured filterable attributes.", index.uid), - "code": "invalid_search_distinct", - "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#invalid_search_distinct" - }); let (response, code) = index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": "doggo.truc"})).await; - assert_eq!(response, expected_response); - assert_eq!(code, 400); + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Index `test`: Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. This index does not have configured filterable attributes.", + "code": "invalid_search_distinct", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_search_distinct" + } + "###); let (task, _) = index.update_settings_filterable_attributes(json!(["color", "machin"])).await; index.wait_task(task.uid()).await.succeeded(); - let expected_response = json!({ - "message": format!("Index `{}`: Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. Available filterable attributes are: `color, machin`.", index.uid), - "code": "invalid_search_distinct", - "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#invalid_search_distinct" - }); let (response, code) = index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": "doggo.truc"})).await; - assert_eq!(response, expected_response); - assert_eq!(code, 400); + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Index `test`: Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. Available filterable attributes patterns are: `color, machin`.", + "code": "invalid_search_distinct", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_search_distinct" + } + "###); let (task, _) = index.update_settings_displayed_attributes(json!(["color"])).await; index.wait_task(task.uid()).await.succeeded(); - let expected_response = json!({ - "message": format!("Index `{}`: Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. Available filterable attributes are: `color, <..hidden-attributes>`.", index.uid), - "code": "invalid_search_distinct", - "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#invalid_search_distinct" - }); let (response, code) = index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": "doggo.truc"})).await; - assert_eq!(response, expected_response); - assert_eq!(code, 400); + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Index `test`: Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. Available filterable attributes patterns are: `color, <..hidden-attributes>`.", + "code": "invalid_search_distinct", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_search_distinct" + } + "###); let (response, code) = index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": true})).await; diff --git a/crates/meilisearch/tests/search/multi/mod.rs b/crates/meilisearch/tests/search/multi/mod.rs index e5c58268d..be0142c2d 100644 --- a/crates/meilisearch/tests/search/multi/mod.rs +++ b/crates/meilisearch/tests/search/multi/mod.rs @@ -3653,7 +3653,7 @@ async fn federation_non_faceted_for_an_index() { snapshot!(code, @"400 Bad Request"); insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" { - "message": "Inside `.federation.facetsByIndex.fruits-no-name`: Invalid facet distribution, attribute `name` is not filterable. The available filterable attributes are `BOOST, id`.\n - Note: index `fruits-no-name` used in `.queries[1]`", + "message": "Inside `.federation.facetsByIndex.fruits-no-name`: Invalid facet distribution, attribute `name` is not filterable. The available filterable attribute patterns are `BOOST, id`.\n - Note: index `fruits-no-name` used in `.queries[1]`", "code": "invalid_multi_search_facets", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_multi_search_facets" @@ -3675,7 +3675,7 @@ async fn federation_non_faceted_for_an_index() { snapshot!(code, @"400 Bad Request"); insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###" { - "message": "Inside `.federation.facetsByIndex.fruits-no-name`: Invalid facet distribution, attribute `name` is not filterable. The available filterable attributes are `BOOST, id`.\n - Note: index `fruits-no-name` is not used in queries", + "message": "Inside `.federation.facetsByIndex.fruits-no-name`: Invalid facet distribution, attribute `name` is not filterable. The available filterable attribute patterns are `BOOST, id`.\n - Note: index `fruits-no-name` is not used in queries", "code": "invalid_multi_search_facets", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_multi_search_facets" diff --git a/crates/meilisearch/tests/similar/errors.rs b/crates/meilisearch/tests/similar/errors.rs index 75bd6e46b..29e87d4b2 100644 --- a/crates/meilisearch/tests/similar/errors.rs +++ b/crates/meilisearch/tests/similar/errors.rs @@ -452,18 +452,19 @@ async fn filter_invalid_attribute_array() { snapshot!(code, @"202 Accepted"); index.wait_task(value.uid()).await.succeeded(); - let expected_response = json!({ - "message": "Attribute `many` is not filterable. Available filterable attributes are: `title`.\n1:5 many = Glass", - "code": "invalid_similar_filter", - "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#invalid_similar_filter" - }); index .similar( json!({"id": 287947, "filter": ["many = Glass"], "embedder": "manual"}), |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Attribute `many` is not filterable. Available filterable attributes patterns are: `title`.\n1:5 many = Glass", + "code": "invalid_similar_filter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_similar_filter" + } + "###); }, ) .await; @@ -492,18 +493,19 @@ async fn filter_invalid_attribute_string() { snapshot!(code, @"202 Accepted"); index.wait_task(value.uid()).await.succeeded(); - let expected_response = json!({ - "message": "Attribute `many` is not filterable. Available filterable attributes are: `title`.\n1:5 many = Glass", - "code": "invalid_similar_filter", - "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#invalid_similar_filter" - }); index .similar( json!({"id": 287947, "filter": "many = Glass", "embedder": "manual"}), |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Attribute `many` is not filterable. Available filterable attributes patterns are: `title`.\n1:5 many = Glass", + "code": "invalid_similar_filter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_similar_filter" + } + "###); }, ) .await; diff --git a/crates/milli/src/error.rs b/crates/milli/src/error.rs index 67a770148..77017a3fd 100644 --- a/crates/milli/src/error.rs +++ b/crates/milli/src/error.rs @@ -121,10 +121,10 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco and can not be more than 511 bytes.", .document_id.to_string() )] InvalidDocumentId { document_id: Value }, - #[error("Invalid facet distribution, {}", format_invalid_filter_distribution(.invalid_facets_name, .valid_facets_name))] + #[error("Invalid facet distribution, {}", format_invalid_filter_distribution(.invalid_facets_name, .valid_patterns))] InvalidFacetsDistribution { invalid_facets_name: BTreeSet, - valid_facets_name: BTreeSet, + valid_patterns: BTreeSet, }, #[error(transparent)] InvalidGeoField(#[from] GeoError), @@ -357,9 +357,9 @@ pub enum GeoError { fn format_invalid_filter_distribution( invalid_facets_name: &BTreeSet, - valid_facets_name: &BTreeSet, + valid_patterns: &BTreeSet, ) -> String { - if valid_facets_name.is_empty() { + if valid_patterns.is_empty() { return "this index does not have configured filterable attributes.".into(); } @@ -381,17 +381,17 @@ fn format_invalid_filter_distribution( .unwrap(), }; - match valid_facets_name.len() { + match valid_patterns.len() { 1 => write!( result, - " The available filterable attribute is `{}`.", - valid_facets_name.first().unwrap() + " The available filterable attribute pattern is `{}`.", + valid_patterns.first().unwrap() ) .unwrap(), _ => write!( result, - " The available filterable attributes are `{}`.", - valid_facets_name.iter().map(AsRef::as_ref).collect::>().join(", ") + " The available filterable attribute patterns are `{}`.", + valid_patterns.iter().map(AsRef::as_ref).collect::>().join(", ") ) .unwrap(), } diff --git a/crates/milli/src/search/facet/facet_distribution.rs b/crates/milli/src/search/facet/facet_distribution.rs index 5c41a0424..757c18598 100644 --- a/crates/milli/src/search/facet/facet_distribution.rs +++ b/crates/milli/src/search/facet/facet_distribution.rs @@ -11,7 +11,7 @@ use serde::{Deserialize, Serialize}; use crate::attribute_patterns::match_field_legacy; use crate::facet::FacetType; -use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, Metadata, MetadataBuilder}; +use crate::filterable_attributes_rules::{filtered_matching_patterns, matching_features}; use crate::heed_codec::facet::{ FacetGroupKeyCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, OrderedF64Codec, }; @@ -294,13 +294,13 @@ impl<'a> FacetDistribution<'a> { return Ok(Default::default()); }; - let fields_ids_map = self.index.fields_ids_map_with_metadata(self.rtxn)?; + let fields_ids_map = self.index.fields_ids_map(self.rtxn)?; let filterable_attributes_rules = self.index.filterable_attributes_rules(self.rtxn)?; - self.check_faceted_fields(&fields_ids_map, &filterable_attributes_rules)?; + self.check_faceted_fields(&filterable_attributes_rules)?; let mut distribution = BTreeMap::new(); - for (fid, name, metadata) in fields_ids_map.iter() { - if self.select_field(name, &metadata, &filterable_attributes_rules) { + for (fid, name) in fields_ids_map.iter() { + if self.select_field(name, &filterable_attributes_rules) { let min_value = if let Some(min_value) = crate::search::facet::facet_min_value( self.index, self.rtxn, @@ -331,16 +331,12 @@ impl<'a> FacetDistribution<'a> { pub fn execute(&self) -> Result>> { let fields_ids_map = self.index.fields_ids_map(self.rtxn)?; - let fields_ids_map = FieldIdMapWithMetadata::new( - fields_ids_map, - MetadataBuilder::from_index(self.index, self.rtxn)?, - ); let filterable_attributes_rules = self.index.filterable_attributes_rules(self.rtxn)?; - self.check_faceted_fields(&fields_ids_map, &filterable_attributes_rules)?; + self.check_faceted_fields(&filterable_attributes_rules)?; let mut distribution = BTreeMap::new(); - for (fid, name, metadata) in fields_ids_map.iter() { - if self.select_field(name, &metadata, &filterable_attributes_rules) { + for (fid, name) in fields_ids_map.iter() { + if self.select_field(name, &filterable_attributes_rules) { let order_by = self .facets .as_ref() @@ -358,11 +354,12 @@ impl<'a> FacetDistribution<'a> { fn select_field( &self, name: &str, - metadata: &Metadata, filterable_attributes_rules: &[FilterableAttributesRule], ) -> bool { // If the field is not filterable, we don't want to compute the facet distribution. - if !metadata.filterable_attributes_features(filterable_attributes_rules).is_filterable() { + if !matching_features(name, filterable_attributes_rules) + .map_or(false, |(_, features)| features.is_filterable()) + { return false; } @@ -378,41 +375,31 @@ impl<'a> FacetDistribution<'a> { /// Check if the fields in the facets are valid faceted fields. fn check_faceted_fields( &self, - fields_ids_map: &FieldIdMapWithMetadata, filterable_attributes_rules: &[FilterableAttributesRule], ) -> Result<()> { let mut invalid_facets = BTreeSet::new(); if let Some(facets) = &self.facets { for field in facets.keys() { - let is_valid_faceted_field = - fields_ids_map.id_with_metadata(field).map_or(false, |(_, metadata)| { - metadata - .filterable_attributes_features(filterable_attributes_rules) - .is_filterable() - }); - if !is_valid_faceted_field { + let is_valid_filterable_field = + matching_features(field, filterable_attributes_rules) + .map_or(false, |(_, features)| features.is_filterable()); + if !is_valid_filterable_field { invalid_facets.insert(field.to_string()); } } } if !invalid_facets.is_empty() { - let valid_facets_name = fields_ids_map - .iter() - .filter_map(|(_, name, metadata)| { - if metadata - .filterable_attributes_features(filterable_attributes_rules) - .is_filterable() - { - Some(name.to_string()) - } else { - None - } + let valid_patterns = + filtered_matching_patterns(filterable_attributes_rules, &|features| { + features.is_filterable() }) + .into_iter() + .map(String::from) .collect(); return Err(Error::UserError(UserError::InvalidFacetsDistribution { invalid_facets_name: invalid_facets, - valid_facets_name, + valid_patterns, })); } From 6269f757ff508ce573ba8e9a6743f37fb55f119f Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Mon, 10 Mar 2025 18:35:10 +0100 Subject: [PATCH 29/35] Revert document creation in tests --- crates/meilisearch/tests/search/errors.rs | 8 -------- crates/meilisearch/tests/search/multi/mod.rs | 10 ++-------- 2 files changed, 2 insertions(+), 16 deletions(-) diff --git a/crates/meilisearch/tests/search/errors.rs b/crates/meilisearch/tests/search/errors.rs index 8561aa490..ede615748 100644 --- a/crates/meilisearch/tests/search/errors.rs +++ b/crates/meilisearch/tests/search/errors.rs @@ -424,8 +424,6 @@ async fn search_invalid_threshold() { async fn search_non_filterable_facets() { let server = Server::new_shared(); let index = server.unique_index(); - let (response, _code) = index.add_documents(json!([{"id": 1, "title": "Doggo"}]), None).await; - index.wait_task(response.uid()).await.succeeded(); let (response, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await; // Wait for the settings update to complete index.wait_task(response.uid()).await.succeeded(); @@ -457,9 +455,6 @@ async fn search_non_filterable_facets() { async fn search_non_filterable_facets_multiple_filterable() { let server = Server::new_shared(); let index = server.unique_index(); - let (response, _code) = - index.add_documents(json!([{"id": 1, "title": "Doggo", "genres": "Action"}]), None).await; - index.wait_task(response.uid()).await.succeeded(); let (response, _code) = index.update_settings(json!({"filterableAttributes": ["title", "genres"]})).await; index.wait_task(response.uid()).await.succeeded(); @@ -521,9 +516,6 @@ async fn search_non_filterable_facets_no_filterable() { async fn search_non_filterable_facets_multiple_facets() { let server = Server::new_shared(); let index = server.unique_index(); - let (response, _code) = - index.add_documents(json!([{"id": 1, "title": "Doggo", "genres": "Action"}]), None).await; - index.wait_task(response.uid()).await.succeeded(); let (response, _uid) = index.update_settings(json!({"filterableAttributes": ["title", "genres"]})).await; index.wait_task(response.uid()).await.succeeded(); diff --git a/crates/meilisearch/tests/search/multi/mod.rs b/crates/meilisearch/tests/search/multi/mod.rs index be0142c2d..df8b2f1eb 100644 --- a/crates/meilisearch/tests/search/multi/mod.rs +++ b/crates/meilisearch/tests/search/multi/mod.rs @@ -3604,28 +3604,22 @@ async fn federation_non_faceted_for_an_index() { let index = server.index("fruits"); - let documents = FRUITS_DOCUMENTS.clone(); - let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await.succeeded(); - let (value, _) = index .update_settings( json!({"searchableAttributes": ["name"], "filterableAttributes": ["BOOST", "id", "name"]}), ) .await; + index.wait_task(value.uid()).await.succeeded(); let index = server.index("fruits-no-name"); - let documents = FRUITS_DOCUMENTS.clone(); - let (value, _) = index.add_documents(documents, None).await; - index.wait_task(value.uid()).await.succeeded(); - let (value, _) = index .update_settings( json!({"searchableAttributes": ["name"], "filterableAttributes": ["BOOST", "id"]}), ) .await; + index.wait_task(value.uid()).await.succeeded(); let index = server.index("fruits-no-facets"); From dfb841164790a0894005ec6bfbf4cf9de9ce7802 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Tue, 11 Mar 2025 09:46:24 +0100 Subject: [PATCH 30/35] Revert "Remove filter pre-check" This reverts commit b12ffd13569e1c90f7ae1b3a45211eec4594b0e2. --- crates/milli/src/search/facet/filter.rs | 51 +++++++++++++------------ 1 file changed, 27 insertions(+), 24 deletions(-) diff --git a/crates/milli/src/search/facet/filter.rs b/crates/milli/src/search/facet/filter.rs index 9844809e9..707bbd6a8 100644 --- a/crates/milli/src/search/facet/filter.rs +++ b/crates/milli/src/search/facet/filter.rs @@ -231,8 +231,26 @@ impl<'a> Filter<'a> { impl<'a> Filter<'a> { pub fn evaluate(&self, rtxn: &heed::RoTxn<'_>, index: &Index) -> Result { + // to avoid doing this for each recursive call we're going to do it ONCE ahead of time let fields_ids_map = index.fields_ids_map(rtxn)?; let filterable_attributes_rules = index.filterable_attributes_rules(rtxn)?; + for fid in self.condition.fids(MAX_FILTER_DEPTH) { + let attribute = fid.value(); + if matching_features(attribute, &filterable_attributes_rules) + .map_or(false, |(_, features)| features.is_filterable()) + { + continue; + } + + // If the field is not filterable, return an error + return Err(fid.as_external_error(FilterError::AttributeNotFilterable { + attribute, + filterable_patterns: filtered_matching_patterns( + &filterable_attributes_rules, + &|features| features.is_filterable(), + ), + }))?; + } self.inner_evaluate(rtxn, index, &fields_ids_map, &filterable_attributes_rules, None) } @@ -466,22 +484,15 @@ impl<'a> Filter<'a> { } } FilterCondition::In { fid, els } => { - let Some((rule_index, features)) = - matching_features(fid.value(), filterable_attribute_rules) - .filter(|(_, features)| features.is_filterable()) - else { - // If the field is not filterable, return an error - return Err(fid.as_external_error(FilterError::AttributeNotFilterable { - attribute: fid.value(), - filterable_patterns: filtered_matching_patterns( - filterable_attribute_rules, - &|features| features.is_filterable(), - ), - }))?; - }; let Some(field_id) = field_ids_map.id(fid.value()) else { return Ok(RoaringBitmap::new()); }; + let Some((rule_index, features)) = + matching_features(fid.value(), filterable_attribute_rules) + else { + return Ok(RoaringBitmap::new()); + }; + els.iter() .map(|el| Condition::Equal(el.clone())) .map(|op| { @@ -492,20 +503,12 @@ impl<'a> Filter<'a> { .union() } FilterCondition::Condition { fid, op } => { + let Some(field_id) = field_ids_map.id(fid.value()) else { + return Ok(RoaringBitmap::new()); + }; let Some((rule_index, features)) = matching_features(fid.value(), filterable_attribute_rules) - .filter(|(_, features)| features.is_filterable()) else { - // If the field is not filterable, return an error - return Err(fid.as_external_error(FilterError::AttributeNotFilterable { - attribute: fid.value(), - filterable_patterns: filtered_matching_patterns( - filterable_attribute_rules, - &|features| features.is_filterable(), - ), - }))?; - }; - let Some(field_id) = field_ids_map.id(fid.value()) else { return Ok(RoaringBitmap::new()); }; From 7072fe978011d0f1c490f78430c341ac74223cdc Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Tue, 11 Mar 2025 15:22:00 +0100 Subject: [PATCH 31/35] Fix typos in comments and messages --- .../after_removing_the_documents.snap | 2 +- crates/meilisearch/tests/documents/errors.rs | 4 ++-- crates/meilisearch/tests/search/errors.rs | 4 ++-- crates/meilisearch/tests/search/filters.rs | 4 ++-- crates/meilisearch/tests/search/mod.rs | 4 ++-- crates/meilisearch/tests/similar/errors.rs | 4 ++-- .../milli/src/search/facet/facet_distribution.rs | 4 ++-- crates/milli/src/search/facet/filter.rs | 14 +++++++------- 8 files changed, 20 insertions(+), 20 deletions(-) diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/after_removing_the_documents.snap b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/after_removing_the_documents.snap index 7c88e55b2..d06a4e78a 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/after_removing_the_documents.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_failure.rs/fail_in_process_batch_for_document_deletion/after_removing_the_documents.snap @@ -10,7 +10,7 @@ source: crates/index-scheduler/src/scheduler/test_failure.rs 1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }} 2 {uid: 2, batch_uid: 2, status: succeeded, details: { received_document_ids: 1, deleted_documents: Some(1) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1"] }} 3 {uid: 3, batch_uid: 2, status: failed, error: ResponseError { code: 200, message: "Index `doggos`: Invalid type for filter subexpression: expected: String, Array, found: true.", error_code: "invalid_document_filter", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#invalid_document_filter" }, details: { original_filter: true, deleted_documents: Some(0) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: Bool(true) }} -4 {uid: 4, batch_uid: 2, status: failed, error: ResponseError { code: 200, message: "Index `doggos`: Attribute `id` is not filterable. Available filterable attributes patterns are: `catto`.\n1:3 id = 2", error_code: "invalid_document_filter", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#invalid_document_filter" }, details: { original_filter: "id = 2", deleted_documents: Some(0) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: String("id = 2") }} +4 {uid: 4, batch_uid: 2, status: failed, error: ResponseError { code: 200, message: "Index `doggos`: Attribute `id` is not filterable. Available filterable attribute patterns are: `catto`.\n1:3 id = 2", error_code: "invalid_document_filter", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#invalid_document_filter" }, details: { original_filter: "id = 2", deleted_documents: Some(0) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: String("id = 2") }} 5 {uid: 5, batch_uid: 2, status: succeeded, details: { original_filter: "catto EXISTS", deleted_documents: Some(1) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: String("catto EXISTS") }} ---------------------------------------------------------------------- ### Status: diff --git a/crates/meilisearch/tests/documents/errors.rs b/crates/meilisearch/tests/documents/errors.rs index 73a3f2e4f..c02c1f000 100644 --- a/crates/meilisearch/tests/documents/errors.rs +++ b/crates/meilisearch/tests/documents/errors.rs @@ -636,7 +636,7 @@ async fn delete_document_by_filter() { "originalFilter": "\"catto = jorts\"" }, "error": { - "message": "Index `SHARED_DOCUMENTS`: Attribute `catto` is not filterable. Available filterable attributes patterns are: `id`, `title`.\n1:6 catto = jorts", + "message": "Index `SHARED_DOCUMENTS`: Attribute `catto` is not filterable. Available filterable attribute patterns are: `id`, `title`.\n1:6 catto = jorts", "code": "invalid_document_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_document_filter" @@ -738,7 +738,7 @@ async fn fetch_document_by_filter() { snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { - "message": "Attribute `doggo` is not filterable. Available filterable attributes patterns are: `color`.\n1:6 doggo = bernese", + "message": "Attribute `doggo` is not filterable. Available filterable attribute patterns are: `color`.\n1:6 doggo = bernese", "code": "invalid_document_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_document_filter" diff --git a/crates/meilisearch/tests/search/errors.rs b/crates/meilisearch/tests/search/errors.rs index ede615748..c4cba7504 100644 --- a/crates/meilisearch/tests/search/errors.rs +++ b/crates/meilisearch/tests/search/errors.rs @@ -708,7 +708,7 @@ async fn filter_invalid_attribute_array() { |response, code| { snapshot!(response, @r###" { - "message": "Index `test`: Attribute `many` is not filterable. Available filterable attributes patterns are: `title`.\n1:5 many = Glass", + "message": "Index `test`: Attribute `many` is not filterable. Available filterable attribute patterns are: `title`.\n1:5 many = Glass", "code": "invalid_search_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_filter" @@ -729,7 +729,7 @@ async fn filter_invalid_attribute_string() { |response, code| { snapshot!(response, @r###" { - "message": "Index `test`: Attribute `many` is not filterable. Available filterable attributes patterns are: `title`.\n1:5 many = Glass", + "message": "Index `test`: Attribute `many` is not filterable. Available filterable attribute patterns are: `title`.\n1:5 many = Glass", "code": "invalid_search_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_filter" diff --git a/crates/meilisearch/tests/search/filters.rs b/crates/meilisearch/tests/search/filters.rs index fac3bbebc..619160a3b 100644 --- a/crates/meilisearch/tests/search/filters.rs +++ b/crates/meilisearch/tests/search/filters.rs @@ -720,7 +720,7 @@ async fn test_filterable_attributes_priority() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Index `test`: Attribute `doggos.age` is not filterable. Available filterable attributes patterns are: `doggos.*`.\n1:11 doggos.age > 2", + "message": "Index `test`: Attribute `doggos.age` is not filterable. Available filterable attribute patterns are: `doggos.*`.\n1:11 doggos.age > 2", "code": "invalid_search_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_filter" @@ -746,7 +746,7 @@ async fn test_filterable_attributes_priority() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Index `test`: Attribute `doggos` is not filterable. Available filterable attributes patterns are: `doggos.*`.\n1:7 doggos EXISTS", + "message": "Index `test`: Attribute `doggos` is not filterable. Available filterable attribute patterns are: `doggos.*`.\n1:7 doggos EXISTS", "code": "invalid_search_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_filter" diff --git a/crates/meilisearch/tests/search/mod.rs b/crates/meilisearch/tests/search/mod.rs index dc6048ea2..d7a09b58e 100644 --- a/crates/meilisearch/tests/search/mod.rs +++ b/crates/meilisearch/tests/search/mod.rs @@ -1753,7 +1753,7 @@ async fn test_nested_fields() { assert_eq!(code, 400, "{}", response); snapshot!(json_string!(response), @r###" { - "message": "Index `test`: Attribute `nested` is not filterable. Available filterable attributes patterns are: `nested.machin`, `nested.object`, `title`.\n1:7 nested = array", + "message": "Index `test`: Attribute `nested` is not filterable. Available filterable attribute patterns are: `nested.machin`, `nested.object`, `title`.\n1:7 nested = array", "code": "invalid_search_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_filter" @@ -1772,7 +1772,7 @@ async fn test_nested_fields() { assert_eq!(code, 400, "{}", response); snapshot!(json_string!(response), @r###" { - "message": "Index `test`: Attribute `nested` is not filterable. Available filterable attributes patterns are: `nested.machin`, `nested.object`, `title`.\n1:7 nested = \"I lied\"", + "message": "Index `test`: Attribute `nested` is not filterable. Available filterable attribute patterns are: `nested.machin`, `nested.object`, `title`.\n1:7 nested = \"I lied\"", "code": "invalid_search_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_filter" diff --git a/crates/meilisearch/tests/similar/errors.rs b/crates/meilisearch/tests/similar/errors.rs index 29e87d4b2..5c4ac1f38 100644 --- a/crates/meilisearch/tests/similar/errors.rs +++ b/crates/meilisearch/tests/similar/errors.rs @@ -459,7 +459,7 @@ async fn filter_invalid_attribute_array() { snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { - "message": "Attribute `many` is not filterable. Available filterable attributes patterns are: `title`.\n1:5 many = Glass", + "message": "Attribute `many` is not filterable. Available filterable attribute patterns are: `title`.\n1:5 many = Glass", "code": "invalid_similar_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_similar_filter" @@ -500,7 +500,7 @@ async fn filter_invalid_attribute_string() { snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { - "message": "Attribute `many` is not filterable. Available filterable attributes patterns are: `title`.\n1:5 many = Glass", + "message": "Attribute `many` is not filterable. Available filterable attribute patterns are: `title`.\n1:5 many = Glass", "code": "invalid_similar_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_similar_filter" diff --git a/crates/milli/src/search/facet/facet_distribution.rs b/crates/milli/src/search/facet/facet_distribution.rs index 757c18598..4b5c1158e 100644 --- a/crates/milli/src/search/facet/facet_distribution.rs +++ b/crates/milli/src/search/facet/facet_distribution.rs @@ -350,7 +350,7 @@ impl<'a> FacetDistribution<'a> { Ok(distribution) } - /// Select a field if it is faceted and in the facets. + /// Select a field if it is filterable and in the facets. fn select_field( &self, name: &str, @@ -372,7 +372,7 @@ impl<'a> FacetDistribution<'a> { } } - /// Check if the fields in the facets are valid faceted fields. + /// Check if the fields in the facets are valid filterable fields. fn check_faceted_fields( &self, filterable_attributes_rules: &[FilterableAttributesRule], diff --git a/crates/milli/src/search/facet/filter.rs b/crates/milli/src/search/facet/filter.rs index 707bbd6a8..eb370a757 100644 --- a/crates/milli/src/search/facet/filter.rs +++ b/crates/milli/src/search/facet/filter.rs @@ -82,7 +82,7 @@ impl<'a> Display for FilterError<'a> { if filterable_patterns.is_empty() { write!(f, " This index does not have configured filterable attributes.") } else { - write!(f, " Available filterable attributes patterns are: ")?; + write!(f, " Available filterable attribute patterns are: ")?; let mut filterables_list = filterable_patterns.iter().map(AsRef::as_ref).collect::>(); filterables_list.sort_unstable(); @@ -911,42 +911,42 @@ mod tests { let filter = Filter::from_str("_geoRadius(-100, 150, 10)").unwrap().unwrap(); let error = filter.evaluate(&rtxn, &index).unwrap_err(); snapshot!(error.to_string(), @r###" - Attribute `_geo` is not filterable. Available filterable attributes patterns are: `title`. + Attribute `_geo` is not filterable. Available filterable attribute patterns are: `title`. 12:16 _geoRadius(-100, 150, 10) "###); let filter = Filter::from_str("_geoBoundingBox([42, 150], [30, 10])").unwrap().unwrap(); let error = filter.evaluate(&rtxn, &index).unwrap_err(); snapshot!(error.to_string(), @r###" - Attribute `_geo` is not filterable. Available filterable attributes patterns are: `title`. + Attribute `_geo` is not filterable. Available filterable attribute patterns are: `title`. 18:20 _geoBoundingBox([42, 150], [30, 10]) "###); let filter = Filter::from_str("name = 12").unwrap().unwrap(); let error = filter.evaluate(&rtxn, &index).unwrap_err(); snapshot!(error.to_string(), @r###" - Attribute `name` is not filterable. Available filterable attributes patterns are: `title`. + Attribute `name` is not filterable. Available filterable attribute patterns are: `title`. 1:5 name = 12 "###); let filter = Filter::from_str("title = \"test\" AND name = 12").unwrap().unwrap(); let error = filter.evaluate(&rtxn, &index).unwrap_err(); snapshot!(error.to_string(), @r###" - Attribute `name` is not filterable. Available filterable attributes patterns are: `title`. + Attribute `name` is not filterable. Available filterable attribute patterns are: `title`. 20:24 title = "test" AND name = 12 "###); let filter = Filter::from_str("title = \"test\" AND name IN [12]").unwrap().unwrap(); let error = filter.evaluate(&rtxn, &index).unwrap_err(); snapshot!(error.to_string(), @r###" - Attribute `name` is not filterable. Available filterable attributes patterns are: `title`. + Attribute `name` is not filterable. Available filterable attribute patterns are: `title`. 20:24 title = "test" AND name IN [12] "###); let filter = Filter::from_str("title = \"test\" AND name != 12").unwrap().unwrap(); let error = filter.evaluate(&rtxn, &index).unwrap_err(); snapshot!(error.to_string(), @r###" - Attribute `name` is not filterable. Available filterable attributes patterns are: `title`. + Attribute `name` is not filterable. Available filterable attribute patterns are: `title`. 20:24 title = "test" AND name != 12 "###); } From 8790880589706320a692d242e3383a01e7deb3ed Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Tue, 11 Mar 2025 14:33:54 +0100 Subject: [PATCH 32/35] Fix clippy --- crates/milli/src/fields_ids_map/metadata.rs | 7 +++---- .../src/update/new/extract/faceted/extract_facets.rs | 8 ++++---- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/crates/milli/src/fields_ids_map/metadata.rs b/crates/milli/src/fields_ids_map/metadata.rs index 7f81e6b79..0d8c3bd4b 100644 --- a/crates/milli/src/fields_ids_map/metadata.rs +++ b/crates/milli/src/fields_ids_map/metadata.rs @@ -214,10 +214,9 @@ pub struct MetadataBuilder { impl MetadataBuilder { pub fn from_index(index: &Index, rtxn: &RoTxn) -> Result { - let searchable_attributes = match index.user_defined_searchable_fields(rtxn)? { - Some(fields) => Some(fields.into_iter().map(|s| s.to_string()).collect()), - None => None, - }; + let searchable_attributes = index + .user_defined_searchable_fields(rtxn)? + .map(|fields| fields.into_iter().map(|s| s.to_string()).collect()); let filterable_attributes = index.filterable_attributes_rules(rtxn)?; let sortable_attributes = index.sortable_fields(rtxn)?; let localized_attributes = index.localized_attributes_rules(rtxn)?; diff --git a/crates/milli/src/update/new/extract/faceted/extract_facets.rs b/crates/milli/src/update/new/extract/faceted/extract_facets.rs index 05fcdf72a..b3aa8f984 100644 --- a/crates/milli/src/update/new/extract/faceted/extract_facets.rs +++ b/crates/milli/src/update/new/extract/faceted/extract_facets.rs @@ -57,10 +57,10 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for FacetedExtractorData<'a, 'b> let change = change?; FacetedDocidsExtractor::extract_document_change( context, - &self.filterable_attributes, - &self.sortable_fields, - &self.asc_desc_fields, - &self.distinct_field, + self.filterable_attributes, + self.sortable_fields, + self.asc_desc_fields, + self.distinct_field, self.is_geo_enabled, change, self.sender, From a370b467fe055633df4772e2a493b948df4bfd69 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Tue, 11 Mar 2025 15:31:57 +0100 Subject: [PATCH 33/35] Merge `MetadataBuilder::_new` into `MetadataBuilder::new` --- crates/milli/src/fields_ids_map/metadata.rs | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/crates/milli/src/fields_ids_map/metadata.rs b/crates/milli/src/fields_ids_map/metadata.rs index 0d8c3bd4b..89b0a446b 100644 --- a/crates/milli/src/fields_ids_map/metadata.rs +++ b/crates/milli/src/fields_ids_map/metadata.rs @@ -223,7 +223,7 @@ impl MetadataBuilder { let distinct_attribute = index.distinct_field(rtxn)?.map(|s| s.to_string()); let asc_desc_attributes = index.asc_desc_fields(rtxn)?; - Ok(Self::_new( + Ok(Self::new( searchable_attributes, filterable_attributes, sortable_attributes, @@ -233,7 +233,6 @@ impl MetadataBuilder { )) } - #[cfg(test)] /// Build a new `MetadataBuilder` from the given parameters. /// /// This is used for testing, prefer using `MetadataBuilder::from_index` instead. @@ -244,24 +243,6 @@ impl MetadataBuilder { localized_attributes: Option>, distinct_attribute: Option, asc_desc_attributes: HashSet, - ) -> Self { - Self::_new( - searchable_attributes, - filterable_attributes, - sortable_attributes, - localized_attributes, - distinct_attribute, - asc_desc_attributes, - ) - } - - fn _new( - searchable_attributes: Option>, - filterable_attributes: Vec, - sortable_attributes: HashSet, - localized_attributes: Option>, - distinct_attribute: Option, - asc_desc_attributes: HashSet, ) -> Self { let searchable_attributes = match searchable_attributes { Some(fields) if fields.iter().any(|f| f == "*") => None, From ea7e299663cd693921027204a6b0e07005a344ef Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Tue, 11 Mar 2025 16:48:55 +0100 Subject: [PATCH 34/35] Update has_changed_for_fields documentation --- crates/milli/src/update/new/document_change.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/crates/milli/src/update/new/document_change.rs b/crates/milli/src/update/new/document_change.rs index c790b4d32..38369a4d7 100644 --- a/crates/milli/src/update/new/document_change.rs +++ b/crates/milli/src/update/new/document_change.rs @@ -167,10 +167,12 @@ impl<'doc> Update<'doc> { } } - /// Returns whether the updated version of the document is different from the current version for the passed subset of fields. + /// Returns whether the updated version of the document is different from the current version for the subset of fields selected by `selector`. /// - /// `true` if at least one top-level-field that is a exactly a member of field or a parent of a member of field changed. + /// `true` if at least one top-level-field that is exactly a selected field or a parent of a selected field changed. /// Otherwise `false`. + /// + /// - Note: `_geo` and `_vectors` are not taken into account by this function. pub fn has_changed_for_fields<'t, Mapper: FieldIdMapper>( &self, selector: &mut impl FnMut(&str) -> PatternMatch, From d500c7f625f02419f2083c7013b5a249144dc841 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Tue, 11 Mar 2025 17:44:03 +0100 Subject: [PATCH 35/35] Add default deserialize value --- .../tests/settings/get_settings.rs | 67 ++++++++++++++++++- .../milli/src/filterable_attributes_rules.rs | 14 ++++ 2 files changed, 80 insertions(+), 1 deletion(-) diff --git a/crates/meilisearch/tests/settings/get_settings.rs b/crates/meilisearch/tests/settings/get_settings.rs index ff9ae5472..fbb97f999 100644 --- a/crates/meilisearch/tests/settings/get_settings.rs +++ b/crates/meilisearch/tests/settings/get_settings.rs @@ -523,7 +523,12 @@ async fn granular_filterable_attributes() { index.update_settings(json!({ "filterableAttributes": [ { "attributePatterns": ["name"], "features": { "facetSearch": true, "filter": {"equality": true, "comparison": false} } }, { "attributePatterns": ["age"], "features": { "facetSearch": false, "filter": {"equality": true, "comparison": true} } }, - { "attributePatterns": ["id"] } + { "attributePatterns": ["id"] }, + { "attributePatterns": ["default-filterable-features-null"], "features": { "facetSearch": true } }, + { "attributePatterns": ["default-filterable-features-equality"], "features": { "facetSearch": true, "filter": {"comparison": true} } }, + { "attributePatterns": ["default-filterable-features-comparison"], "features": { "facetSearch": true, "filter": {"equality": true} } }, + { "attributePatterns": ["default-filterable-features-empty"], "features": { "facetSearch": true, "filter": {} } }, + { "attributePatterns": ["default-facet-search"], "features": { "filter": {"equality": true, "comparison": true} } }, ] })).await; assert_eq!(code, 202); index.wait_task(response.uid()).await.succeeded(); @@ -567,6 +572,66 @@ async fn granular_filterable_attributes() { "comparison": false } } + }, + { + "attributePatterns": [ + "default-filterable-features-null" + ], + "features": { + "facetSearch": true, + "filter": { + "equality": true, + "comparison": false + } + } + }, + { + "attributePatterns": [ + "default-filterable-features-equality" + ], + "features": { + "facetSearch": true, + "filter": { + "equality": true, + "comparison": true + } + } + }, + { + "attributePatterns": [ + "default-filterable-features-comparison" + ], + "features": { + "facetSearch": true, + "filter": { + "equality": true, + "comparison": false + } + } + }, + { + "attributePatterns": [ + "default-filterable-features-empty" + ], + "features": { + "facetSearch": true, + "filter": { + "equality": true, + "comparison": false + } + } + }, + { + "attributePatterns": [ + "default-facet-search" + ], + "features": { + "facetSearch": false, + "filter": { + "equality": true, + "comparison": true + } + } } ] "###); diff --git a/crates/milli/src/filterable_attributes_rules.rs b/crates/milli/src/filterable_attributes_rules.rs index ab20971a4..53af30fd6 100644 --- a/crates/milli/src/filterable_attributes_rules.rs +++ b/crates/milli/src/filterable_attributes_rules.rs @@ -71,7 +71,11 @@ impl FilterableAttributesPatterns { #[deserr(rename_all = camelCase, deny_unknown_fields)] #[derive(Default)] pub struct FilterableAttributesFeatures { + #[serde(default)] + #[deserr(default)] facet_search: bool, + #[serde(default)] + #[deserr(default)] filter: FilterFeatures, } @@ -144,11 +148,21 @@ impl Deserr for FilterableAttributesRule { } #[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Copy, Debug, Deserr, ToSchema)] +#[serde(deny_unknown_fields, rename_all = "camelCase")] +#[deserr(rename_all = camelCase, deny_unknown_fields)] pub struct FilterFeatures { + #[serde(default = "default_true")] + #[deserr(default = true)] equality: bool, + #[serde(default)] + #[deserr(default)] comparison: bool, } +fn default_true() -> bool { + true +} + impl FilterFeatures { /// Get the allowed operators for the filter. pub fn allowed_operators(&self) -> Vec {