5254: Granular Filterable attribute settings r=ManyTheFish a=ManyTheFish

# Related
**Issue:** https://github.com/meilisearch/meilisearch/issues/5163
**PRD:** https://meilisearch.notion.site/API-usage-Settings-to-opt-out-indexing-features-filterableAttributes-1764b06b651f80aba8bdf359b2df3ca8

# Summary
Change the `filterableAttributes` settings to let the user choose which facet feature he wants to activate or not.
Deactivating a feature will avoid some database computation in the indexing process and save time and disk size.

# Example

`PATCH /indexes/:index_uid/settings`

```json
{
  "filterableAttributes": [
    {
      "patterns": [
        "cattos",
        "doggos.age"
      ],
      "features": {
        "facetSearch": false,
        "filter": {
          "equality": true,
          "comparison": false
        }
      }
    }
  ]
}
```

# Impact on the codebase
- Settings API:
  - `/settings`
  - `/settings/filterable-attributes`
  - OpenAPI 
  - may impact the LocalizedAttributesRules due to the AttributePatterns factorization
- Database:
  - Filterable attributes format changed
  - Faceted field_ids are no more stored in the database
  - FieldIdsMap has no more unexisting fields
- Search:
  - Search using filters
  - Facet search
  - `Attributes` ranking rule
  - Distinct attribute
  - Facet distribution
- Settings reindexing:
  - searchable
  - facet
  - vector
  - geo
- Document indexing:
  - searchable
  - facet
  - vector
  - geo
- Dump import

# Note for the reviewers
The changes are huge and have been split in different commits with a dedicated explanation, I suggest reviewing the commit 1by1

Co-authored-by: ManyTheFish <many@meilisearch.com>
This commit is contained in:
meili-bors[bot] 2025-03-12 09:00:43 +00:00 committed by GitHub
commit a2a86ef4e2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
82 changed files with 4169 additions and 1661 deletions

View File

@ -12,7 +12,7 @@ use milli::progress::Progress;
use milli::update::new::indexer;
use milli::update::{IndexerConfig, Settings};
use milli::vector::EmbeddingConfigs;
use milli::Index;
use milli::{FilterableAttributesRule, Index};
use rand::seq::SliceRandom;
use rand_chacha::rand_core::SeedableRng;
use roaring::RoaringBitmap;
@ -57,7 +57,8 @@ fn setup_settings<'t>(
let searchable_fields = searchable_fields.iter().map(|s| s.to_string()).collect();
builder.set_searchable_fields(searchable_fields);
let filterable_fields = filterable_fields.iter().map(|s| s.to_string()).collect();
let filterable_fields =
filterable_fields.iter().map(|s| FilterableAttributesRule::Field(s.to_string())).collect();
builder.set_filterable_fields(filterable_fields);
let sortable_fields = sortable_fields.iter().map(|s| s.to_string()).collect();

View File

@ -2,7 +2,7 @@ mod datasets_paths;
mod utils;
use criterion::{criterion_group, criterion_main};
use milli::update::Settings;
use milli::{update::Settings, FilterableAttributesRule};
use utils::Conf;
#[cfg(not(windows))]
@ -21,8 +21,10 @@ fn base_conf(builder: &mut Settings) {
["name", "alternatenames", "elevation"].iter().map(|s| s.to_string()).collect();
builder.set_searchable_fields(searchable_fields);
let filterable_fields =
["_geo", "population", "elevation"].iter().map(|s| s.to_string()).collect();
let filterable_fields = ["_geo", "population", "elevation"]
.iter()
.map(|s| FilterableAttributesRule::Field(s.to_string()))
.collect();
builder.set_filterable_fields(filterable_fields);
let sortable_fields =

View File

@ -2,7 +2,7 @@ mod datasets_paths;
mod utils;
use criterion::{criterion_group, criterion_main};
use milli::update::Settings;
use milli::{update::Settings, FilterableAttributesRule};
use utils::Conf;
#[cfg(not(windows))]
@ -22,7 +22,7 @@ fn base_conf(builder: &mut Settings) {
let faceted_fields = ["released-timestamp", "duration-float", "genre", "country", "artist"]
.iter()
.map(|s| s.to_string())
.map(|s| FilterableAttributesRule::Field(s.to_string()))
.collect();
builder.set_filterable_fields(faceted_fields);
}

View File

@ -233,8 +233,8 @@ pub(crate) mod test {
use meilisearch_types::features::{Network, Remote, RuntimeTogglableFeatures};
use meilisearch_types::index_uid_pattern::IndexUidPattern;
use meilisearch_types::keys::{Action, Key};
use meilisearch_types::milli;
use meilisearch_types::milli::update::Setting;
use meilisearch_types::milli::{self, FilterableAttributesRule};
use meilisearch_types::settings::{Checked, FacetingSettings, Settings};
use meilisearch_types::task_view::DetailsView;
use meilisearch_types::tasks::{Details, Kind, Status};
@ -279,7 +279,10 @@ pub(crate) mod test {
let settings = Settings {
displayed_attributes: Setting::Set(vec![S("race"), S("name")]).into(),
searchable_attributes: Setting::Set(vec![S("name"), S("race")]).into(),
filterable_attributes: Setting::Set(btreeset! { S("race"), S("age") }),
filterable_attributes: Setting::Set(vec![
FilterableAttributesRule::Field(S("race")),
FilterableAttributesRule::Field(S("age")),
]),
sortable_attributes: Setting::Set(btreeset! { S("age") }),
ranking_rules: Setting::NotSet,
stop_words: Setting::NotSet,

View File

@ -322,7 +322,16 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
v6::Settings {
displayed_attributes: v6::Setting::from(settings.displayed_attributes).into(),
searchable_attributes: v6::Setting::from(settings.searchable_attributes).into(),
filterable_attributes: settings.filterable_attributes.into(),
filterable_attributes: match settings.filterable_attributes {
v5::settings::Setting::Set(filterable_attributes) => v6::Setting::Set(
filterable_attributes
.into_iter()
.map(v6::FilterableAttributesRule::Field)
.collect(),
),
v5::settings::Setting::Reset => v6::Setting::Reset,
v5::settings::Setting::NotSet => v6::Setting::NotSet,
},
sortable_attributes: settings.sortable_attributes.into(),
ranking_rules: {
match settings.ranking_rules {

View File

@ -46,6 +46,8 @@ pub type ResponseError = meilisearch_types::error::ResponseError;
pub type Code = meilisearch_types::error::Code;
pub type RankingRuleView = meilisearch_types::settings::RankingRuleView;
pub type FilterableAttributesRule = meilisearch_types::milli::FilterableAttributesRule;
pub struct V6Reader {
dump: TempDir,
instance_uid: Option<Uuid>,

View File

@ -30,6 +30,25 @@ pub enum Condition<'a> {
StartsWith { keyword: Token<'a>, word: Token<'a> },
}
impl Condition<'_> {
pub fn operator(&self) -> &str {
match self {
Condition::GreaterThan(_) => ">",
Condition::GreaterThanOrEqual(_) => ">=",
Condition::Equal(_) => "=",
Condition::NotEqual(_) => "!=",
Condition::Null => "IS NULL",
Condition::Empty => "IS EMPTY",
Condition::Exists => "EXISTS",
Condition::LowerThan(_) => "<",
Condition::LowerThanOrEqual(_) => "<=",
Condition::Between { .. } => "TO",
Condition::Contains { .. } => "CONTAINS",
Condition::StartsWith { .. } => "STARTS WITH",
}
}
}
/// condition = value ("==" | ">" ...) value
pub fn parse_condition(input: Span) -> IResult<FilterCondition> {
let operator = alt((tag("<="), tag(">="), tag("!="), tag("<"), tag(">"), tag("=")));

View File

@ -1,13 +1,12 @@
---
source: crates/index-scheduler/src/scheduler/test_failure.rs
snapshot_kind: text
---
### Autobatching Enabled = true
### Processing batch None:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:

View File

@ -1,13 +1,12 @@
---
source: crates/index-scheduler/src/scheduler/test_failure.rs
snapshot_kind: text
---
### Autobatching Enabled = true
### Processing batch None:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:

View File

@ -1,17 +1,16 @@
---
source: crates/index-scheduler/src/scheduler/test_failure.rs
snapshot_kind: text
---
### Autobatching Enabled = true
### Processing batch None:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
2 {uid: 2, batch_uid: 2, status: succeeded, details: { received_document_ids: 1, deleted_documents: Some(1) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1"] }}
3 {uid: 3, batch_uid: 2, status: failed, error: ResponseError { code: 200, message: "Index `doggos`: Invalid type for filter subexpression: expected: String, Array, found: true.", error_code: "invalid_document_filter", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#invalid_document_filter" }, details: { original_filter: true, deleted_documents: Some(0) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: Bool(true) }}
4 {uid: 4, batch_uid: 2, status: failed, error: ResponseError { code: 200, message: "Index `doggos`: Attribute `id` is not filterable. Available filterable attributes are: `catto`.\n1:3 id = 2", error_code: "invalid_document_filter", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#invalid_document_filter" }, details: { original_filter: "id = 2", deleted_documents: Some(0) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: String("id = 2") }}
4 {uid: 4, batch_uid: 2, status: failed, error: ResponseError { code: 200, message: "Index `doggos`: Attribute `id` is not filterable. Available filterable attribute patterns are: `catto`.\n1:3 id = 2", error_code: "invalid_document_filter", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#invalid_document_filter" }, details: { original_filter: "id = 2", deleted_documents: Some(0) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: String("id = 2") }}
5 {uid: 5, batch_uid: 2, status: succeeded, details: { original_filter: "catto EXISTS", deleted_documents: Some(1) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: String("catto EXISTS") }}
----------------------------------------------------------------------
### Status:

View File

@ -1,13 +1,12 @@
---
source: crates/index-scheduler/src/scheduler/test_failure.rs
snapshot_kind: text
---
### Autobatching Enabled = true
### Processing batch None:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
2 {uid: 2, status: enqueued, details: { received_document_ids: 1, deleted_documents: None }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1"] }}
3 {uid: 3, status: enqueued, details: { original_filter: true, deleted_documents: None }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: Bool(true) }}

View File

@ -1,13 +1,12 @@
---
source: crates/index-scheduler/src/scheduler/test_failure.rs
snapshot_kind: text
---
### Autobatching Enabled = true
### Processing batch None:
[]
----------------------------------------------------------------------
### All Tasks:
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
----------------------------------------------------------------------
### Status:

View File

@ -1,11 +1,11 @@
use std::time::Instant;
use big_s::S;
use maplit::btreeset;
use meili_snap::snapshot;
use meilisearch_types::milli::obkv_to_json;
use meilisearch_types::milli::update::IndexDocumentsMethod::*;
use meilisearch_types::milli::update::Setting;
use meilisearch_types::milli::FilterableAttributesRule;
use meilisearch_types::tasks::{Kind, KindWithContent};
use crate::insta_snapshot::snapshot_index_scheduler;
@ -127,7 +127,8 @@ fn fail_in_process_batch_for_document_deletion() {
use meilisearch_types::settings::{Settings, Unchecked};
let mut new_settings: Box<Settings<Unchecked>> = Box::default();
new_settings.filterable_attributes = Setting::Set(btreeset!(S("catto")));
new_settings.filterable_attributes =
Setting::Set(vec![FilterableAttributesRule::Field(S("catto"))]);
index_scheduler
.register(

View File

@ -414,6 +414,7 @@ impl ErrorCode for milli::Error {
UserError::AttributeLimitReached => Code::MaxFieldsLimitExceeded,
UserError::InvalidFilter(_) => Code::InvalidSearchFilter,
UserError::InvalidFilterExpression(..) => Code::InvalidSearchFilter,
UserError::FilterOperatorNotAllowed { .. } => Code::InvalidSearchFilter,
UserError::MissingDocumentId { .. } => Code::MissingDocumentId,
UserError::InvalidDocumentId { .. } | UserError::TooManyDocumentIds { .. } => {
Code::InvalidDocumentId

View File

@ -1,5 +1,5 @@
use deserr::Deserr;
use milli::LocalizedAttributesRule;
use milli::{AttributePatterns, LocalizedAttributesRule};
use serde::{Deserialize, Serialize};
use utoipa::ToSchema;
@ -7,7 +7,7 @@ use utoipa::ToSchema;
#[deserr(rename_all = camelCase)]
#[serde(rename_all = "camelCase")]
pub struct LocalizedAttributesRuleView {
pub attribute_patterns: Vec<String>,
pub attribute_patterns: AttributePatterns,
pub locales: Vec<Locale>,
}

View File

@ -11,7 +11,7 @@ use fst::IntoStreamer;
use milli::index::{IndexEmbeddingConfig, PrefixSearch};
use milli::proximity::ProximityPrecision;
use milli::update::Setting;
use milli::{Criterion, CriterionError, Index, DEFAULT_VALUES_PER_FACET};
use milli::{Criterion, CriterionError, FilterableAttributesRule, Index, DEFAULT_VALUES_PER_FACET};
use serde::{Deserialize, Serialize, Serializer};
use utoipa::ToSchema;
@ -202,8 +202,8 @@ pub struct Settings<T> {
/// Attributes to use for faceting and filtering. See [Filtering and Faceted Search](https://www.meilisearch.com/docs/learn/filtering_and_sorting/search_with_facet_filters).
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsFilterableAttributes>)]
#[schema(value_type = Option<Vec<String>>, example = json!(["release_date", "genre"]))]
pub filterable_attributes: Setting<BTreeSet<String>>,
#[schema(value_type = Option<Vec<FilterableAttributesRule>>, example = json!(["release_date", "genre"]))]
pub filterable_attributes: Setting<Vec<FilterableAttributesRule>>,
/// Attributes to use when sorting search results.
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsSortableAttributes>)]
@ -791,7 +791,7 @@ pub fn settings(
.user_defined_searchable_fields(rtxn)?
.map(|fields| fields.into_iter().map(String::from).collect());
let filterable_attributes = index.filterable_fields(rtxn)?.into_iter().collect();
let filterable_attributes = index.filterable_attributes_rules(rtxn)?.into_iter().collect();
let sortable_attributes = index.sortable_fields(rtxn)?.into_iter().collect();

View File

@ -291,7 +291,7 @@ make_setting_routes!(
{
route: "/filterable-attributes",
update_verb: put,
value_type: std::collections::BTreeSet<String>,
value_type: Vec<meilisearch_types::milli::FilterableAttributesRule>,
err_type: meilisearch_types::deserr::DeserrJsonError<
meilisearch_types::error::deserr_codes::InvalidSettingsFilterableAttributes,
>,

View File

@ -8,6 +8,7 @@ use std::collections::{BTreeMap, BTreeSet, HashSet};
use meilisearch_types::facet_values_sort::FacetValuesSort;
use meilisearch_types::locales::{Locale, LocalizedAttributesRuleView};
use meilisearch_types::milli::update::Setting;
use meilisearch_types::milli::FilterableAttributesRule;
use meilisearch_types::settings::{
FacetingSettings, PaginationSettings, PrefixSearchSettings, ProximityPrecisionView,
RankingRuleView, SettingEmbeddingSettings, TypoSettings,
@ -89,6 +90,10 @@ impl Aggregate for SettingsAnalytics {
filterable_attributes: FilterableAttributesAnalytics {
total: new.filterable_attributes.total.or(self.filterable_attributes.total),
has_geo: new.filterable_attributes.has_geo.or(self.filterable_attributes.has_geo),
has_patterns: new
.filterable_attributes
.has_patterns
.or(self.filterable_attributes.has_patterns),
},
distinct_attribute: DistinctAttributeAnalytics {
set: self.distinct_attribute.set | new.distinct_attribute.set,
@ -328,13 +333,19 @@ impl SortableAttributesAnalytics {
pub struct FilterableAttributesAnalytics {
pub total: Option<usize>,
pub has_geo: Option<bool>,
pub has_patterns: Option<bool>,
}
impl FilterableAttributesAnalytics {
pub fn new(setting: Option<&BTreeSet<String>>) -> Self {
pub fn new(setting: Option<&Vec<FilterableAttributesRule>>) -> Self {
Self {
total: setting.as_ref().map(|filter| filter.len()),
has_geo: setting.as_ref().map(|filter| filter.contains("_geo")),
has_geo: setting
.as_ref()
.map(|filter| filter.iter().any(FilterableAttributesRule::has_geo)),
has_patterns: setting.as_ref().map(|filter| {
filter.iter().any(|rule| matches!(rule, FilterableAttributesRule::Pattern(_)))
}),
}
}

View File

@ -9,6 +9,10 @@ use meilisearch_types::batches::BatchStats;
use meilisearch_types::error::{Code, ErrorType, ResponseError};
use meilisearch_types::index_uid::IndexUid;
use meilisearch_types::keys::CreateApiKey;
use meilisearch_types::milli::{
AttributePatterns, FilterFeatures, FilterableAttributesFeatures, FilterableAttributesPatterns,
FilterableAttributesRule,
};
use meilisearch_types::settings::{
Checked, FacetingSettings, MinWordSizeTyposSetting, PaginationSettings, Settings, TypoSettings,
Unchecked,
@ -88,7 +92,7 @@ pub mod tasks;
url = "/",
description = "Local server",
)),
components(schemas(PaginationView<KeyView>, PaginationView<IndexView>, IndexView, DocumentDeletionByFilter, AllBatches, BatchStats, ProgressStepView, ProgressView, BatchView, RuntimeTogglableFeatures, SwapIndexesPayload, DocumentEditionByFunction, MergeFacets, FederationOptions, SearchQueryWithIndex, Federation, FederatedSearch, FederatedSearchResult, SearchResults, SearchResultWithIndex, SimilarQuery, SimilarResult, PaginationView<serde_json::Value>, BrowseQuery, UpdateIndexRequest, IndexUid, IndexCreateRequest, KeyView, Action, CreateApiKey, UpdateStderrLogs, LogMode, GetLogs, IndexStats, Stats, HealthStatus, HealthResponse, VersionResponse, Code, ErrorType, AllTasks, TaskView, Status, DetailsView, ResponseError, Settings<Unchecked>, Settings<Checked>, TypoSettings, MinWordSizeTyposSetting, FacetingSettings, PaginationSettings, SummarizedTaskView, Kind, Network, Remote))
components(schemas(PaginationView<KeyView>, PaginationView<IndexView>, IndexView, DocumentDeletionByFilter, AllBatches, BatchStats, ProgressStepView, ProgressView, BatchView, RuntimeTogglableFeatures, SwapIndexesPayload, DocumentEditionByFunction, MergeFacets, FederationOptions, SearchQueryWithIndex, Federation, FederatedSearch, FederatedSearchResult, SearchResults, SearchResultWithIndex, SimilarQuery, SimilarResult, PaginationView<serde_json::Value>, BrowseQuery, UpdateIndexRequest, IndexUid, IndexCreateRequest, KeyView, Action, CreateApiKey, UpdateStderrLogs, LogMode, GetLogs, IndexStats, Stats, HealthStatus, HealthResponse, VersionResponse, Code, ErrorType, AllTasks, TaskView, Status, DetailsView, ResponseError, Settings<Unchecked>, Settings<Checked>, TypoSettings, MinWordSizeTyposSetting, FacetingSettings, PaginationSettings, SummarizedTaskView, Kind, Network, Remote, FilterableAttributesRule, FilterableAttributesPatterns, AttributePatterns, FilterableAttributesFeatures, FilterFeatures))
)]
pub struct MeilisearchApi;

View File

@ -20,7 +20,7 @@ use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy};
use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors;
use meilisearch_types::milli::vector::Embedder;
use meilisearch_types::milli::{
FacetValueHit, InternalError, OrderBy, SearchForFacetValues, TimeBudget,
FacetValueHit, InternalError, OrderBy, PatternMatch, SearchForFacetValues, TimeBudget,
};
use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
use meilisearch_types::{milli, Document};
@ -1538,8 +1538,9 @@ pub fn perform_facet_search(
// If the facet string is not localized, we **ignore** the locales provided by the user because the facet data has no locale.
// If the user does not provide locales, we use the locales of the facet string.
let localized_attributes = index.localized_attributes_rules(&rtxn)?.unwrap_or_default();
let localized_attributes_locales =
localized_attributes.into_iter().find(|attr| attr.match_str(&facet_name));
let localized_attributes_locales = localized_attributes
.into_iter()
.find(|attr| attr.match_str(&facet_name) == PatternMatch::Match);
let locales = localized_attributes_locales.map(|attr| {
attr.locales
.into_iter()
@ -1885,7 +1886,7 @@ fn format_fields(
let locales = locales.or_else(|| {
localized_attributes
.iter()
.find(|rule| rule.match_str(key))
.find(|rule| rule.match_str(key) == PatternMatch::Match)
.map(LocalizedAttributesRule::locales)
});

View File

@ -125,6 +125,12 @@ impl Server<Owned> {
self.service.post("/indexes", body).await
}
pub async fn delete_index(&self, uid: impl AsRef<str>) -> (Value, StatusCode) {
let url = format!("/indexes/{}", urlencoding::encode(uid.as_ref()));
let (value, code) = self.service.delete(url).await;
(value, code)
}
pub fn index_with_encoder(&self, uid: impl AsRef<str>, encoder: Encoder) -> Index<'_> {
Index {
uid: uid.as_ref().to_string(),

View File

@ -636,7 +636,7 @@ async fn delete_document_by_filter() {
"originalFilter": "\"catto = jorts\""
},
"error": {
"message": "Index `SHARED_DOCUMENTS`: Attribute `catto` is not filterable. Available filterable attributes are: `id`, `title`.\n1:6 catto = jorts",
"message": "Index `SHARED_DOCUMENTS`: Attribute `catto` is not filterable. Available filterable attribute patterns are: `id`, `title`.\n1:6 catto = jorts",
"code": "invalid_document_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
@ -738,7 +738,7 @@ async fn fetch_document_by_filter() {
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Attribute `doggo` is not filterable. Available filterable attributes are: `color`.\n1:6 doggo = bernese",
"message": "Attribute `doggo` is not filterable. Available filterable attribute patterns are: `color`.\n1:6 doggo = bernese",
"code": "invalid_document_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"

View File

@ -1,8 +1,10 @@
use meili_snap::*;
use crate::common::{shared_does_not_exists_index, Server};
use crate::common::{shared_does_not_exists_index, Server, DOCUMENTS, NESTED_DOCUMENTS};
use crate::json;
use super::test_settings_documents_indexing_swapping_and_search;
#[actix_rt::test]
async fn search_unexisting_index() {
let index = shared_does_not_exists_index().await;
@ -430,7 +432,7 @@ async fn search_non_filterable_facets() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid facet distribution, attribute `doggo` is not filterable. The available filterable attribute is `title`.",
"message": "Invalid facet distribution, attribute `doggo` is not filterable. The available filterable attribute pattern is `title`.",
"code": "invalid_search_facets",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_facets"
@ -441,7 +443,7 @@ async fn search_non_filterable_facets() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid facet distribution, attribute `doggo` is not filterable. The available filterable attribute is `title`.",
"message": "Invalid facet distribution, attribute `doggo` is not filterable. The available filterable attribute pattern is `title`.",
"code": "invalid_search_facets",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_facets"
@ -461,7 +463,7 @@ async fn search_non_filterable_facets_multiple_filterable() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid facet distribution, attribute `doggo` is not filterable. The available filterable attributes are `genres, title`.",
"message": "Invalid facet distribution, attribute `doggo` is not filterable. The available filterable attribute patterns are `genres, title`.",
"code": "invalid_search_facets",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_facets"
@ -472,7 +474,7 @@ async fn search_non_filterable_facets_multiple_filterable() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid facet distribution, attribute `doggo` is not filterable. The available filterable attributes are `genres, title`.",
"message": "Invalid facet distribution, attribute `doggo` is not filterable. The available filterable attribute patterns are `genres, title`.",
"code": "invalid_search_facets",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_facets"
@ -522,7 +524,7 @@ async fn search_non_filterable_facets_multiple_facets() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid facet distribution, attributes `doggo, neko` are not filterable. The available filterable attributes are `genres, title`.",
"message": "Invalid facet distribution, attributes `doggo, neko` are not filterable. The available filterable attribute patterns are `genres, title`.",
"code": "invalid_search_facets",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_facets"
@ -533,7 +535,7 @@ async fn search_non_filterable_facets_multiple_facets() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Invalid facet distribution, attributes `doggo, neko` are not filterable. The available filterable attributes are `genres, title`.",
"message": "Invalid facet distribution, attributes `doggo, neko` are not filterable. The available filterable attribute patterns are `genres, title`.",
"code": "invalid_search_facets",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_facets"
@ -636,14 +638,11 @@ async fn search_bad_matching_strategy() {
#[actix_rt::test]
async fn filter_invalid_syntax_object() {
let server = Server::new_shared();
let index = server.unique_index();
let (task, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await;
index.wait_task(task.uid()).await.succeeded();
index
.search(json!({"filter": "title & Glass"}), |response, code| {
test_settings_documents_indexing_swapping_and_search(
&DOCUMENTS,
&json!({"filterableAttributes": ["title"]}),
&json!({"filter": "title & Glass"}),
|response, code| {
snapshot!(response, @r###"
{
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
@ -653,20 +652,18 @@ async fn filter_invalid_syntax_object() {
}
"###);
snapshot!(code, @"400 Bad Request");
})
.await;
},
)
.await;
}
#[actix_rt::test]
async fn filter_invalid_syntax_array() {
let server = Server::new_shared();
let index = server.unique_index();
let (task, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await;
index.wait_task(task.uid()).await.succeeded();
index
.search(json!({"filter": ["title & Glass"]}), |response, code| {
test_settings_documents_indexing_swapping_and_search(
&DOCUMENTS,
&json!({"filterableAttributes": ["title"]}),
&json!({"filter": ["title & Glass"]}),
|response, code| {
snapshot!(response, @r###"
{
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
@ -676,206 +673,327 @@ async fn filter_invalid_syntax_array() {
}
"###);
snapshot!(code, @"400 Bad Request");
})
.await;
},
)
.await;
}
#[actix_rt::test]
async fn filter_invalid_syntax_string() {
let server = Server::new_shared();
let index = server.unique_index();
let (task, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await;
index.wait_task(task.uid()).await.succeeded();
let expected_response = json!({
"message": "Found unexpected characters at the end of the filter: `XOR title = Glass`. You probably forgot an `OR` or an `AND` rule.\n15:32 title = Glass XOR title = Glass",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
});
index
.search(json!({"filter": "title = Glass XOR title = Glass"}), |response, code| {
assert_eq!(response, expected_response);
assert_eq!(code, 400);
})
.await;
test_settings_documents_indexing_swapping_and_search(
&DOCUMENTS,
&json!({"filterableAttributes": ["title"]}),
&json!({"filter": "title = Glass XOR title = Glass"}),
|response, code| {
snapshot!(response, @r###"
{
"message": "Found unexpected characters at the end of the filter: `XOR title = Glass`. You probably forgot an `OR` or an `AND` rule.\n15:32 title = Glass XOR title = Glass",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
}
"###);
snapshot!(code, @"400 Bad Request");
},
)
.await;
}
#[actix_rt::test]
async fn filter_invalid_attribute_array() {
let server = Server::new_shared();
let index = server.unique_index();
let (task, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await;
index.wait_task(task.uid()).await.succeeded();
let expected_response = json!({
"message": format!("Index `{}`: Attribute `many` is not filterable. Available filterable attributes are: `title`.\n1:5 many = Glass", index.uid),
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
});
index
.search(json!({"filter": ["many = Glass"]}), |response, code| {
assert_eq!(response, expected_response);
assert_eq!(code, 400);
})
.await;
test_settings_documents_indexing_swapping_and_search(
&DOCUMENTS,
&json!({"filterableAttributes": ["title"]}),
&json!({"filter": ["many = Glass"]}),
|response, code| {
snapshot!(response, @r###"
{
"message": "Index `test`: Attribute `many` is not filterable. Available filterable attribute patterns are: `title`.\n1:5 many = Glass",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
}
"###);
snapshot!(code, @"400 Bad Request");
},
)
.await;
}
#[actix_rt::test]
async fn filter_invalid_attribute_string() {
let server = Server::new_shared();
let index = server.unique_index();
let (task, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await;
index.wait_task(task.uid()).await.succeeded();
let expected_response = json!({
"message": format!("Index `{}`: Attribute `many` is not filterable. Available filterable attributes are: `title`.\n1:5 many = Glass", index.uid),
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
});
index
.search(json!({"filter": "many = Glass"}), |response, code| {
assert_eq!(response, expected_response);
assert_eq!(code, 400);
})
.await;
test_settings_documents_indexing_swapping_and_search(
&DOCUMENTS,
&json!({"filterableAttributes": ["title"]}),
&json!({"filter": "many = Glass"}),
|response, code| {
snapshot!(response, @r###"
{
"message": "Index `test`: Attribute `many` is not filterable. Available filterable attribute patterns are: `title`.\n1:5 many = Glass",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
}
"###);
snapshot!(code, @"400 Bad Request");
},
)
.await;
}
#[actix_rt::test]
async fn filter_reserved_geo_attribute_array() {
let server = Server::new_shared();
let index = server.unique_index();
let (task, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await;
index.wait_task(task.uid()).await.succeeded();
let expected_response = json!({
"message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:13 _geo = Glass",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
});
index
.search(json!({"filter": ["_geo = Glass"]}), |response, code| {
assert_eq!(response, expected_response);
assert_eq!(code, 400);
})
.await;
test_settings_documents_indexing_swapping_and_search(
&DOCUMENTS,
&json!({"filterableAttributes": ["title"]}),
&json!({"filter": ["_geo = Glass"]}),
|response, code| {
snapshot!(response, @r###"
{
"message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:13 _geo = Glass",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
}
"###);
snapshot!(code, @"400 Bad Request");
},
)
.await;
}
#[actix_rt::test]
async fn filter_reserved_geo_attribute_string() {
let server = Server::new_shared();
let index = server.unique_index();
let (task, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await;
index.wait_task(task.uid()).await.succeeded();
let expected_response = json!({
"message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:13 _geo = Glass",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
});
index
.search(json!({"filter": "_geo = Glass"}), |response, code| {
assert_eq!(response, expected_response);
assert_eq!(code, 400);
})
.await;
test_settings_documents_indexing_swapping_and_search(
&DOCUMENTS,
&json!({"filterableAttributes": ["title"]}),
&json!({"filter": "_geo = Glass"}),
|response, code| {
snapshot!(response, @r###"
{
"message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:13 _geo = Glass",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
}
"###);
snapshot!(code, @"400 Bad Request");
},
)
.await;
}
#[actix_rt::test]
async fn filter_reserved_attribute_array() {
let server = Server::new_shared();
let index = server.unique_index();
let (task, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await;
index.wait_task(task.uid()).await.succeeded();
let expected_response = json!({
"message": "`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:21 _geoDistance = Glass",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
});
index
.search(json!({"filter": ["_geoDistance = Glass"]}), |response, code| {
assert_eq!(response, expected_response);
assert_eq!(code, 400);
})
.await;
test_settings_documents_indexing_swapping_and_search(
&DOCUMENTS,
&json!({"filterableAttributes": ["title"]}),
&json!({"filter": ["_geoDistance = Glass"]}),
|response, code| {
snapshot!(response, @r###"
{
"message": "`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:21 _geoDistance = Glass",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
}
"###);
snapshot!(code, @"400 Bad Request");
},
)
.await;
}
#[actix_rt::test]
async fn filter_reserved_attribute_string() {
let server = Server::new_shared();
let index = server.unique_index();
let (task, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await;
index.wait_task(task.uid()).await.succeeded();
let expected_response = json!({
"message": "`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:21 _geoDistance = Glass",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
});
index
.search(json!({"filter": "_geoDistance = Glass"}), |response, code| {
assert_eq!(response, expected_response);
assert_eq!(code, 400);
})
.await;
test_settings_documents_indexing_swapping_and_search(
&DOCUMENTS,
&json!({"filterableAttributes": ["title"]}),
&json!({"filter": "_geoDistance = Glass"}),
|response, code| {
snapshot!(response, @r###"
{
"message": "`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:21 _geoDistance = Glass",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
}
"###);
snapshot!(code, @"400 Bad Request");
},
)
.await;
}
#[actix_rt::test]
async fn filter_reserved_geo_point_array() {
let server = Server::new_shared();
let index = server.unique_index();
let (task, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await;
index.wait_task(task.uid()).await.succeeded();
let expected_response = json!({
"message": "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:18 _geoPoint = Glass",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
});
index
.search(json!({"filter": ["_geoPoint = Glass"]}), |response, code| {
assert_eq!(response, expected_response);
assert_eq!(code, 400);
})
.await;
test_settings_documents_indexing_swapping_and_search(
&DOCUMENTS,
&json!({"filterableAttributes": ["title"]}),
&json!({"filter": ["_geoPoint = Glass"]}),
|response, code| {
snapshot!(response, @r###"
{
"message": "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:18 _geoPoint = Glass",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
}
"###);
snapshot!(code, @"400 Bad Request");
},
)
.await;
}
#[actix_rt::test]
async fn filter_reserved_geo_point_string() {
let server = Server::new_shared();
let index = server.unique_index();
test_settings_documents_indexing_swapping_and_search(
&DOCUMENTS,
&json!({"filterableAttributes": ["title"]}),
&json!({"filter": "_geoPoint = Glass"}),
|response, code| {
snapshot!(response, @r###"
{
"message": "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:18 _geoPoint = Glass",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
}
"###);
snapshot!(code, @"400 Bad Request");
},
)
.await;
}
let (task, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await;
index.wait_task(task.uid()).await.succeeded();
#[actix_rt::test]
async fn search_with_pattern_filter_settings_errors() {
// Check if the Equality filter works with patterns
test_settings_documents_indexing_swapping_and_search(
&NESTED_DOCUMENTS,
&json!({"filterableAttributes": [{
"attributePatterns": ["cattos","doggos.age"],
"features": {
"facetSearch": false,
"filter": {"equality": false, "comparison": true}
}
}]}),
&json!({
"filter": "cattos = pésti"
}),
|response, code| {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Index `test`: Filter operator `=` is not allowed for the attribute `cattos`.\n - Note: allowed operators: OR, AND, NOT, <, >, <=, >=, TO, IS EMPTY, IS NULL, EXISTS.\n - Note: field `cattos` matched rule #0 in `filterableAttributes`",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
}
"###);
},
)
.await;
let expected_response = json!({
"message": "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:18 _geoPoint = Glass",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
});
index
.search(json!({"filter": "_geoPoint = Glass"}), |response, code| {
assert_eq!(response, expected_response);
assert_eq!(code, 400);
})
.await;
test_settings_documents_indexing_swapping_and_search(
&NESTED_DOCUMENTS,
&json!({"filterableAttributes": [{
"attributePatterns": ["cattos","doggos.age"],
"features": {
"facetSearch": false,
"filter": {"equality": false, "comparison": true}
}
}]}),
&json!({
"filter": "cattos IN [pésti, simba]"
}),
|response, code| {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Index `test`: Filter operator `=` is not allowed for the attribute `cattos`.\n - Note: allowed operators: OR, AND, NOT, <, >, <=, >=, TO, IS EMPTY, IS NULL, EXISTS.\n - Note: field `cattos` matched rule #0 in `filterableAttributes`",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
}
"###);
},
)
.await;
// Check if the Comparison filter works with patterns
test_settings_documents_indexing_swapping_and_search(
&NESTED_DOCUMENTS,
&json!({"filterableAttributes": [{"attributePatterns": ["cattos","doggos.age"]}]}),
&json!({
"filter": "doggos.age > 2"
}),
|response, code| {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Index `test`: Filter operator `>` is not allowed for the attribute `doggos.age`.\n - Note: allowed operators: OR, AND, NOT, =, !=, IN, IS EMPTY, IS NULL, EXISTS.\n - Note: field `doggos.age` matched rule #0 in `filterableAttributes`",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
}
"###);
},
)
.await;
test_settings_documents_indexing_swapping_and_search(
&NESTED_DOCUMENTS,
&json!({"filterableAttributes": [{
"attributePatterns": ["cattos","doggos.age"],
"features": {
"facetSearch": false,
"filter": {"equality": true, "comparison": false}
}
}]}),
&json!({
"filter": "doggos.age > 2"
}),
|response, code| {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Index `test`: Filter operator `>` is not allowed for the attribute `doggos.age`.\n - Note: allowed operators: OR, AND, NOT, =, !=, IN, IS EMPTY, IS NULL, EXISTS.\n - Note: field `doggos.age` matched rule #0 in `filterableAttributes`",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
}
"###);
},
)
.await;
test_settings_documents_indexing_swapping_and_search(
&NESTED_DOCUMENTS,
&json!({"filterableAttributes": [{
"attributePatterns": ["cattos","doggos.age"],
"features": {
"facetSearch": false,
"filter": {"equality": true, "comparison": false}
}
}]}),
&json!({
"filter": "doggos.age 2 TO 4"
}),
|response, code| {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Index `test`: Filter operator `TO` is not allowed for the attribute `doggos.age`.\n - Note: allowed operators: OR, AND, NOT, =, !=, IN, IS EMPTY, IS NULL, EXISTS.\n - Note: field `doggos.age` matched rule #0 in `filterableAttributes`",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
}
"###);
},
)
.await;
}
#[actix_rt::test]
@ -1018,109 +1136,115 @@ async fn sort_unset_ranking_rule() {
#[actix_rt::test]
async fn search_on_unknown_field() {
let server = Server::new_shared();
let index = server.unique_index();
let (response, _code) =
index.update_settings_searchable_attributes(json!(["id", "title"])).await;
index.wait_task(response.uid()).await.succeeded();
let expected_response = json!({
"message": format!("Index `{}`: Attribute `unknown` is not searchable. Available searchable attributes are: `id, title`.", index.uid),
"code": "invalid_search_attributes_to_search_on",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_attributes_to_search_on"
});
index
.search(
json!({"q": "Captain Marvel", "attributesToSearchOn": ["unknown"]}),
|response, code| {
assert_eq!(response, expected_response);
assert_eq!(code, 400);
},
)
.await;
test_settings_documents_indexing_swapping_and_search(
&DOCUMENTS,
&json!({"searchableAttributes": ["id", "title"]}),
&json!({"q": "Captain Marvel", "attributesToSearchOn": ["unknown"]}),
|response, code| {
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Index `test`: Attribute `unknown` is not searchable. Available searchable attributes are: `id, title`.",
"code": "invalid_search_attributes_to_search_on",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_attributes_to_search_on"
}
"###);
},
)
.await;
}
#[actix_rt::test]
async fn search_on_unknown_field_plus_joker() {
let server = Server::new_shared();
let index = server.unique_index();
let (response, _code) =
index.update_settings_searchable_attributes(json!(["id", "title"])).await;
index.wait_task(response.uid()).await.succeeded();
test_settings_documents_indexing_swapping_and_search(
&DOCUMENTS,
&json!({"searchableAttributes": ["id", "title"]}),
&json!({"q": "Captain Marvel", "attributesToSearchOn": ["*", "unknown"]}),
|response, code| {
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Index `test`: Attribute `unknown` is not searchable. Available searchable attributes are: `id, title`.",
"code": "invalid_search_attributes_to_search_on",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_attributes_to_search_on"
}
"###);
},
)
.await;
let expected_response = json!({
"message": format!("Index `{}`: Attribute `unknown` is not searchable. Available searchable attributes are: `id, title`.", index.uid),
"code": "invalid_search_attributes_to_search_on",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_attributes_to_search_on"
});
index
.search(
json!({"q": "Captain Marvel", "attributesToSearchOn": ["*", "unknown"]}),
|response, code| {
assert_eq!(response, expected_response);
assert_eq!(code, 400);
},
)
.await;
index
.search(
json!({"q": "Captain Marvel", "attributesToSearchOn": ["unknown", "*"]}),
|response, code| {
assert_eq!(response, expected_response);
assert_eq!(code, 400);
},
)
.await;
test_settings_documents_indexing_swapping_and_search(
&DOCUMENTS,
&json!({"searchableAttributes": ["id", "title"]}),
&json!({"q": "Captain Marvel", "attributesToSearchOn": ["unknown", "*"]}),
|response, code| {
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Index `test`: Attribute `unknown` is not searchable. Available searchable attributes are: `id, title`.",
"code": "invalid_search_attributes_to_search_on",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_attributes_to_search_on"
}
"###);
},
)
.await;
}
#[actix_rt::test]
async fn distinct_at_search_time() {
let server = Server::new_shared();
let index = server.unique_index();
let server = Server::new().await;
let index = server.index("test");
let (task, _) = index.create(None).await;
index.wait_task(task.uid()).await.succeeded();
let (response, _code) =
index.add_documents(json!([{"id": 1, "color": "Doggo", "machin": "Action"}]), None).await;
index.wait_task(response.uid()).await.succeeded();
let expected_response = json!({
"message": format!("Index `{}`: Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. This index does not have configured filterable attributes.", index.uid),
"code": "invalid_search_distinct",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_distinct"
});
let (response, code) =
index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": "doggo.truc"})).await;
assert_eq!(response, expected_response);
assert_eq!(code, 400);
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Index `test`: Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. This index does not have configured filterable attributes.",
"code": "invalid_search_distinct",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_distinct"
}
"###);
let (task, _) = index.update_settings_filterable_attributes(json!(["color", "machin"])).await;
index.wait_task(task.uid()).await.succeeded();
let expected_response = json!({
"message": format!("Index `{}`: Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. Available filterable attributes are: `color, machin`.", index.uid),
"code": "invalid_search_distinct",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_distinct"
});
let (response, code) =
index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": "doggo.truc"})).await;
assert_eq!(response, expected_response);
assert_eq!(code, 400);
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Index `test`: Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. Available filterable attributes patterns are: `color, machin`.",
"code": "invalid_search_distinct",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_distinct"
}
"###);
let (task, _) = index.update_settings_displayed_attributes(json!(["color"])).await;
index.wait_task(task.uid()).await.succeeded();
let expected_response = json!({
"message": format!("Index `{}`: Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. Available filterable attributes are: `color, <..hidden-attributes>`.", index.uid),
"code": "invalid_search_distinct",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_distinct"
});
let (response, code) =
index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": "doggo.truc"})).await;
assert_eq!(response, expected_response);
assert_eq!(code, 400);
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Index `test`: Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. Available filterable attributes patterns are: `color, <..hidden-attributes>`.",
"code": "invalid_search_distinct",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_distinct"
}
"###);
let (response, code) =
index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": true})).await;

View File

@ -1,7 +1,9 @@
use meili_snap::snapshot;
use meilisearch::Opt;
use once_cell::sync::Lazy;
use tempfile::TempDir;
use crate::common::{Server, Value};
use crate::common::{default_settings, Server, Value, NESTED_DOCUMENTS};
use crate::json;
static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
@ -34,6 +36,62 @@ static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
])
});
async fn test_settings_documents_indexing_swapping_and_facet_search(
documents: &Value,
settings: &Value,
query: &Value,
test: impl Fn(Value, actix_http::StatusCode) + std::panic::UnwindSafe + Clone,
) {
let temp = TempDir::new().unwrap();
let server = Server::new_with_options(Opt { ..default_settings(temp.path()) }).await.unwrap();
eprintln!("Documents -> Settings -> test");
let index = server.index("test");
let (task, code) = index.add_documents(documents.clone(), None).await;
assert_eq!(code, 202, "{}", task);
let response = index.wait_task(task.uid()).await;
assert!(response.is_success(), "{:?}", response);
let (task, code) = index.update_settings(settings.clone()).await;
assert_eq!(code, 202, "{}", task);
let response = index.wait_task(task.uid()).await;
assert!(response.is_success(), "{:?}", response);
let (response, code) = index.facet_search(query.clone()).await;
insta::allow_duplicates! {
test(response, code);
}
let (task, code) = server.delete_index("test").await;
assert_eq!(code, 202, "{}", task);
let response = server.wait_task(task.uid()).await;
assert!(response.is_success(), "{:?}", response);
eprintln!("Settings -> Documents -> test");
let index = server.index("test");
let (task, code) = index.update_settings(settings.clone()).await;
assert_eq!(code, 202, "{}", task);
let response = index.wait_task(task.uid()).await;
assert!(response.is_success(), "{:?}", response);
let (task, code) = index.add_documents(documents.clone(), None).await;
assert_eq!(code, 202, "{}", task);
let response = index.wait_task(task.uid()).await;
assert!(response.is_success(), "{:?}", response);
let (response, code) = index.facet_search(query.clone()).await;
insta::allow_duplicates! {
test(response, code);
}
let (task, code) = server.delete_index("test").await;
assert_eq!(code, 202, "{}", task);
let response = server.wait_task(task.uid()).await;
assert!(response.is_success(), "{:?}", response);
}
#[actix_rt::test]
async fn simple_facet_search() {
let server = Server::new().await;
@ -436,3 +494,124 @@ async fn deactivate_facet_search_add_documents_and_reset_facet_search() {
assert_eq!(code, 200, "{}", response);
assert_eq!(dbg!(response)["facetHits"].as_array().unwrap().len(), 2);
}
#[actix_rt::test]
async fn facet_search_with_filterable_attributes_rules() {
test_settings_documents_indexing_swapping_and_facet_search(
&DOCUMENTS,
&json!({"filterableAttributes": ["genres"]}),
&json!({"facetName": "genres", "facetQuery": "a"}),
|response, code| {
snapshot!(code, @"200 OK");
snapshot!(response["facetHits"], @r###"[{"value":"Action","count":3},{"value":"Adventure","count":2}]"###);
},
)
.await;
test_settings_documents_indexing_swapping_and_facet_search(
&DOCUMENTS,
&json!({"filterableAttributes": [{"attributePatterns": ["genres"], "features": {"facetSearch": true, "filter": {"equality": false, "comparison": false}}}]}),
&json!({"facetName": "genres", "facetQuery": "a"}),
|response, code| {
snapshot!(code, @"200 OK");
snapshot!(response["facetHits"], @r###"[{"value":"Action","count":3},{"value":"Adventure","count":2}]"###);
},
).await;
test_settings_documents_indexing_swapping_and_facet_search(
&NESTED_DOCUMENTS,
&json!({"filterableAttributes": ["doggos.name"]}),
&json!({"facetName": "doggos.name", "facetQuery": "b"}),
|response, code| {
snapshot!(code, @"200 OK");
snapshot!(response["facetHits"], @r###"[{"value":"bobby","count":1},{"value":"buddy","count":1}]"###);
},
)
.await;
test_settings_documents_indexing_swapping_and_facet_search(
&NESTED_DOCUMENTS,
&json!({"filterableAttributes": [{"attributePatterns": ["doggos.name"], "features": {"facetSearch": true, "filter": {"equality": false, "comparison": false}}}]}),
&json!({"facetName": "doggos.name", "facetQuery": "b"}),
|response, code| {
snapshot!(code, @"200 OK");
snapshot!(response["facetHits"], @r###"[{"value":"bobby","count":1},{"value":"buddy","count":1}]"###);
},
).await;
}
#[actix_rt::test]
async fn facet_search_with_filterable_attributes_rules_errors() {
test_settings_documents_indexing_swapping_and_facet_search(
&DOCUMENTS,
&json!({"filterableAttributes": ["genres"]}),
&json!({"facetName": "invalid", "facetQuery": "a"}),
|response, code| {
snapshot!(code, @"400 Bad Request");
snapshot!(response["message"], @r###""Attribute `invalid` is not facet-searchable. Available facet-searchable attributes patterns are: `genres`. To make it facet-searchable add it to the `filterableAttributes` index settings.""###);
},
)
.await;
test_settings_documents_indexing_swapping_and_facet_search(
&DOCUMENTS,
&json!({"filterableAttributes": [{"attributePatterns": ["genres"]}]}),
&json!({"facetName": "genres", "facetQuery": "a"}),
|response, code| {
snapshot!(code, @"400 Bad Request");
snapshot!(response["message"], @r###""Attribute `genres` is not facet-searchable. This index does not have configured facet-searchable attributes. To make it facet-searchable add it to the `filterableAttributes` index settings.""###);
},
)
.await;
test_settings_documents_indexing_swapping_and_facet_search(
&DOCUMENTS,
&json!({"filterableAttributes": [{"attributePatterns": ["genres"], "features": {"facetSearch": false, "filter": {"equality": true, "comparison": true}}}]}),
&json!({"facetName": "genres", "facetQuery": "a"}),
|response, code| {
snapshot!(code, @"400 Bad Request");
snapshot!(response["message"], @r###""Attribute `genres` is not facet-searchable. This index does not have configured facet-searchable attributes. To make it facet-searchable add it to the `filterableAttributes` index settings.""###);
},
).await;
test_settings_documents_indexing_swapping_and_facet_search(
&DOCUMENTS,
&json!({"filterableAttributes": [{"attributePatterns": ["genres"], "features": {"facetSearch": false, "filter": {"equality": false, "comparison": false}}}]}),
&json!({"facetName": "genres", "facetQuery": "a"}),
|response, code| {
snapshot!(code, @"400 Bad Request");
snapshot!(response["message"], @r###""Attribute `genres` is not facet-searchable. This index does not have configured facet-searchable attributes. To make it facet-searchable add it to the `filterableAttributes` index settings.""###);
},
).await;
test_settings_documents_indexing_swapping_and_facet_search(
&NESTED_DOCUMENTS,
&json!({"filterableAttributes": [{"attributePatterns": ["doggos.name"]}]}),
&json!({"facetName": "invalid.name", "facetQuery": "b"}),
|response, code| {
snapshot!(code, @"400 Bad Request");
snapshot!(response["message"], @r###""Attribute `invalid.name` is not facet-searchable. This index does not have configured facet-searchable attributes. To make it facet-searchable add it to the `filterableAttributes` index settings.""###);
},
)
.await;
test_settings_documents_indexing_swapping_and_facet_search(
&NESTED_DOCUMENTS,
&json!({"filterableAttributes": [{"attributePatterns": ["doggos.name"], "features": {"facetSearch": false, "filter": {"equality": true, "comparison": true}}}]}),
&json!({"facetName": "doggos.name", "facetQuery": "b"}),
|response, code| {
snapshot!(code, @"400 Bad Request");
snapshot!(response["message"], @r###""Attribute `doggos.name` is not facet-searchable. This index does not have configured facet-searchable attributes. To make it facet-searchable add it to the `filterableAttributes` index settings.""###);
},
).await;
test_settings_documents_indexing_swapping_and_facet_search(
&NESTED_DOCUMENTS,
&json!({"filterableAttributes": [{"attributePatterns": ["doggos.name"], "features": {"facetSearch": false, "filter": {"equality": false, "comparison": false}}}]}),
&json!({"facetName": "doggos.name", "facetQuery": "b"}),
|response, code| {
snapshot!(code, @"400 Bad Request");
snapshot!(response["message"], @r###""Attribute `doggos.name` is not facet-searchable. This index does not have configured facet-searchable attributes. To make it facet-searchable add it to the `filterableAttributes` index settings.""###);
},
).await;
}

View File

@ -0,0 +1,758 @@
use meili_snap::{json_string, snapshot};
use meilisearch::Opt;
use tempfile::TempDir;
use super::test_settings_documents_indexing_swapping_and_search;
use crate::{
common::{default_settings, shared_index_with_documents, Server, DOCUMENTS, NESTED_DOCUMENTS},
json,
};
#[actix_rt::test]
async fn search_with_filter_string_notation() {
let server = Server::new().await;
let index = server.index("test");
let (_, code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await;
meili_snap::snapshot!(code, @"202 Accepted");
let documents = DOCUMENTS.clone();
let (task, code) = index.add_documents(documents, None).await;
meili_snap::snapshot!(code, @"202 Accepted");
let res = index.wait_task(task.uid()).await;
meili_snap::snapshot!(res["status"], @r###""succeeded""###);
index
.search(
json!({
"filter": "title = Gläss"
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 1);
},
)
.await;
let index = server.index("nested");
let (_, code) =
index.update_settings(json!({"filterableAttributes": ["cattos", "doggos.age"]})).await;
meili_snap::snapshot!(code, @"202 Accepted");
let documents = NESTED_DOCUMENTS.clone();
let (task, code) = index.add_documents(documents, None).await;
meili_snap::snapshot!(code, @"202 Accepted");
let res = index.wait_task(task.uid()).await;
meili_snap::snapshot!(res["status"], @r###""succeeded""###);
index
.search(
json!({
"filter": "cattos = pésti"
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 1);
assert_eq!(response["hits"][0]["id"], json!(852));
},
)
.await;
index
.search(
json!({
"filter": "doggos.age > 5"
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 2);
assert_eq!(response["hits"][0]["id"], json!(654));
assert_eq!(response["hits"][1]["id"], json!(951));
},
)
.await;
}
#[actix_rt::test]
async fn search_with_filter_array_notation() {
let index = shared_index_with_documents().await;
let (response, code) = index
.search_post(json!({
"filter": ["title = Gläss"]
}))
.await;
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 1);
let (response, code) = index
.search_post(json!({
"filter": [["title = Gläss", "title = \"Shazam!\"", "title = \"Escape Room\""]]
}))
.await;
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 3);
}
#[actix_rt::test]
async fn search_with_contains_filter() {
let temp = TempDir::new().unwrap();
let server = Server::new_with_options(Opt {
experimental_contains_filter: true,
..default_settings(temp.path())
})
.await
.unwrap();
let index = server.index("movies");
index.update_settings(json!({"filterableAttributes": ["title"]})).await;
let documents = DOCUMENTS.clone();
let (request, _code) = index.add_documents(documents, None).await;
index.wait_task(request.uid()).await.succeeded();
let (response, code) = index
.search_post(json!({
"filter": "title CONTAINS cap"
}))
.await;
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 2);
}
#[actix_rt::test]
async fn search_with_pattern_filter_settings() {
// Check if the Equality filter works with patterns
test_settings_documents_indexing_swapping_and_search(
&NESTED_DOCUMENTS,
&json!({"filterableAttributes": [{"attributePatterns": ["cattos","doggos.age"]}]}),
&json!({
"filter": "cattos = pésti"
}),
|response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 852,
"father": "jean",
"mother": "michelle",
"doggos": [
{
"name": "bobby",
"age": 2
},
{
"name": "buddy",
"age": 4
}
],
"cattos": "pésti"
}
]
"###);
},
)
.await;
test_settings_documents_indexing_swapping_and_search(
&NESTED_DOCUMENTS,
&json!({"filterableAttributes": [{
"attributePatterns": ["cattos","doggos.age"],
"features": {
"facetSearch": false,
"filter": {"equality": true, "comparison": false}
}
}]}),
&json!({
"filter": "cattos = pésti"
}),
|response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 852,
"father": "jean",
"mother": "michelle",
"doggos": [
{
"name": "bobby",
"age": 2
},
{
"name": "buddy",
"age": 4
}
],
"cattos": "pésti"
}
]
"###);
},
)
.await;
// Check if the Comparison filter works with patterns
test_settings_documents_indexing_swapping_and_search(
&NESTED_DOCUMENTS,
&json!({"filterableAttributes": [{
"attributePatterns": ["cattos","doggos.age"],
"features": {
"facetSearch": false,
"filter": {"equality": false, "comparison": true}
}
}]}),
&json!({
"filter": "doggos.age > 2"
}),
|response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 852,
"father": "jean",
"mother": "michelle",
"doggos": [
{
"name": "bobby",
"age": 2
},
{
"name": "buddy",
"age": 4
}
],
"cattos": "pésti"
},
{
"id": 654,
"father": "pierre",
"mother": "sabine",
"doggos": [
{
"name": "gros bill",
"age": 8
}
],
"cattos": [
"simba",
"pestiféré"
]
},
{
"id": 951,
"father": "jean-baptiste",
"mother": "sophie",
"doggos": [
{
"name": "turbo",
"age": 5
},
{
"name": "fast",
"age": 6
}
],
"cattos": [
"moumoute",
"gomez"
]
}
]
"###);
},
)
.await;
}
#[actix_rt::test]
async fn search_with_pattern_filter_settings_scenario_1() {
let temp = TempDir::new().unwrap();
let server = Server::new_with_options(Opt { ..default_settings(temp.path()) }).await.unwrap();
eprintln!("Documents -> Settings -> test");
let index = server.index("test");
let (task, code) = index.add_documents(NESTED_DOCUMENTS.clone(), None).await;
assert_eq!(code, 202, "{}", task);
let response = index.wait_task(task.uid()).await;
snapshot!(response["status"], @r###""succeeded""###);
let (task, code) = index
.update_settings(json!({"filterableAttributes": [{
"attributePatterns": ["cattos","doggos.age"],
"features": {
"facetSearch": false,
"filter": {"equality": true, "comparison": false}
}
}]}))
.await;
assert_eq!(code, 202, "{}", task);
let response = index.wait_task(task.uid()).await;
snapshot!(response["status"], @r###""succeeded""###);
// Check if the Equality filter works
index
.search(
json!({
"filter": "cattos = pésti"
}),
|response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 852,
"father": "jean",
"mother": "michelle",
"doggos": [
{
"name": "bobby",
"age": 2
},
{
"name": "buddy",
"age": 4
}
],
"cattos": "pésti"
}
]
"###);
},
)
.await;
// Check if the Comparison filter returns an error
index
.search(
json!({
"filter": "doggos.age > 2"
}),
|response, code| {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Index `test`: Filter operator `>` is not allowed for the attribute `doggos.age`.\n - Note: allowed operators: OR, AND, NOT, =, !=, IN, IS EMPTY, IS NULL, EXISTS.\n - Note: field `doggos.age` matched rule #0 in `filterableAttributes`",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
}
"###);
},
)
.await;
// Update the settings activate comparison filter
let (task, code) = index
.update_settings(json!({"filterableAttributes": [{
"attributePatterns": ["cattos","doggos.age"],
"features": {
"facetSearch": false,
"filter": {"equality": true, "comparison": true}
}
}]}))
.await;
assert_eq!(code, 202, "{}", task);
let response = index.wait_task(task.uid()).await;
snapshot!(response["status"], @r###""succeeded""###);
// Check if the Equality filter works
index
.search(
json!({
"filter": "cattos = pésti"
}),
|response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 852,
"father": "jean",
"mother": "michelle",
"doggos": [
{
"name": "bobby",
"age": 2
},
{
"name": "buddy",
"age": 4
}
],
"cattos": "pésti"
}
]
"###);
},
)
.await;
// Check if the Comparison filter works
index
.search(
json!({
"filter": "doggos.age > 2"
}),
|response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 852,
"father": "jean",
"mother": "michelle",
"doggos": [
{
"name": "bobby",
"age": 2
},
{
"name": "buddy",
"age": 4
}
],
"cattos": "pésti"
},
{
"id": 654,
"father": "pierre",
"mother": "sabine",
"doggos": [
{
"name": "gros bill",
"age": 8
}
],
"cattos": [
"simba",
"pestiféré"
]
},
{
"id": 951,
"father": "jean-baptiste",
"mother": "sophie",
"doggos": [
{
"name": "turbo",
"age": 5
},
{
"name": "fast",
"age": 6
}
],
"cattos": [
"moumoute",
"gomez"
]
}
]
"###);
},
)
.await;
// Update the settings deactivate equality filter
let (task, code) = index
.update_settings(json!({"filterableAttributes": [{
"attributePatterns": ["cattos","doggos.age"],
"features": {
"facetSearch": false,
"filter": {"equality": false, "comparison": true}
}
}]}))
.await;
assert_eq!(code, 202, "{}", task);
let response = index.wait_task(task.uid()).await;
snapshot!(response["status"], @r###""succeeded""###);
// Check if the Equality filter returns an error
index
.search(
json!({
"filter": "cattos = pésti"
}),
|response, code| {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Index `test`: Filter operator `=` is not allowed for the attribute `cattos`.\n - Note: allowed operators: OR, AND, NOT, <, >, <=, >=, TO, IS EMPTY, IS NULL, EXISTS.\n - Note: field `cattos` matched rule #0 in `filterableAttributes`",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
}
"###);
},
)
.await;
// Check if the Comparison filter works
index
.search(
json!({
"filter": "doggos.age > 2"
}),
|response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 852,
"father": "jean",
"mother": "michelle",
"doggos": [
{
"name": "bobby",
"age": 2
},
{
"name": "buddy",
"age": 4
}
],
"cattos": "pésti"
},
{
"id": 654,
"father": "pierre",
"mother": "sabine",
"doggos": [
{
"name": "gros bill",
"age": 8
}
],
"cattos": [
"simba",
"pestiféré"
]
},
{
"id": 951,
"father": "jean-baptiste",
"mother": "sophie",
"doggos": [
{
"name": "turbo",
"age": 5
},
{
"name": "fast",
"age": 6
}
],
"cattos": [
"moumoute",
"gomez"
]
}
]
"###);
},
)
.await;
// rollback the settings
let (task, code) = index
.update_settings(json!({"filterableAttributes": [{
"attributePatterns": ["cattos","doggos.age"],
"features": {
"facetSearch": false,
"filter": {"equality": true, "comparison": false}
}
}]}))
.await;
assert_eq!(code, 202, "{}", task);
let response = index.wait_task(task.uid()).await;
snapshot!(response["status"], @r###""succeeded""###);
// Check if the Equality filter works
index
.search(
json!({
"filter": "cattos = pésti"
}),
|response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 852,
"father": "jean",
"mother": "michelle",
"doggos": [
{
"name": "bobby",
"age": 2
},
{
"name": "buddy",
"age": 4
}
],
"cattos": "pésti"
}
]
"###);
},
)
.await;
// Check if the Comparison filter returns an error
index
.search(
json!({
"filter": "doggos.age > 2"
}),
|response, code| {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Index `test`: Filter operator `>` is not allowed for the attribute `doggos.age`.\n - Note: allowed operators: OR, AND, NOT, =, !=, IN, IS EMPTY, IS NULL, EXISTS.\n - Note: field `doggos.age` matched rule #0 in `filterableAttributes`",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
}
"###);
},
)
.await;
}
#[actix_rt::test]
async fn test_filterable_attributes_priority() {
// Test that the filterable attributes priority is respected
// check if doggos.name is filterable
test_settings_documents_indexing_swapping_and_search(
&NESTED_DOCUMENTS,
&json!({"filterableAttributes": [
// deactivated filter
{"attributePatterns": ["doggos.a*"], "features": {"facetSearch": false, "filter": {"equality": false, "comparison": false}}},
// activated filter
{"attributePatterns": ["doggos.*"]},
]}),
&json!({
"filter": "doggos.name = bobby"
}),
|response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 852,
"father": "jean",
"mother": "michelle",
"doggos": [
{
"name": "bobby",
"age": 2
},
{
"name": "buddy",
"age": 4
}
],
"cattos": "pésti"
}
]
"###);
},
)
.await;
// check if doggos.name is filterable 2
test_settings_documents_indexing_swapping_and_search(
&NESTED_DOCUMENTS,
&json!({"filterableAttributes": [
// deactivated filter
{"attributePatterns": ["doggos"], "features": {"facetSearch": false, "filter": {"equality": false, "comparison": false}}},
// activated filter
{"attributePatterns": ["doggos.*"]},
]}),
&json!({
"filter": "doggos.name = bobby"
}),
|response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 852,
"father": "jean",
"mother": "michelle",
"doggos": [
{
"name": "bobby",
"age": 2
},
{
"name": "buddy",
"age": 4
}
],
"cattos": "pésti"
}
]
"###);
},
)
.await;
// check if doggos.age is not filterable
test_settings_documents_indexing_swapping_and_search(
&NESTED_DOCUMENTS,
&json!({"filterableAttributes": [
// deactivated filter
{"attributePatterns": ["doggos.a*"], "features": {"facetSearch": false, "filter": {"equality": false, "comparison": false}}},
// activated filter
{"attributePatterns": ["doggos.*"]},
]}),
&json!({
"filter": "doggos.age > 2"
}),
|response, code| {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Index `test`: Attribute `doggos.age` is not filterable. Available filterable attribute patterns are: `doggos.*`.\n1:11 doggos.age > 2",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
}
"###);
},
)
.await;
// check if doggos is not filterable
test_settings_documents_indexing_swapping_and_search(
&NESTED_DOCUMENTS,
&json!({"filterableAttributes": [
// deactivated filter
{"attributePatterns": ["doggos"], "features": {"facetSearch": false, "filter": {"equality": false, "comparison": false}}},
// activated filter
{"attributePatterns": ["doggos.*"]},
]}),
&json!({
"filter": "doggos EXISTS"
}),
|response, code| {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Index `test`: Attribute `doggos` is not filterable. Available filterable attribute patterns are: `doggos.*`.\n1:7 doggos EXISTS",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
}
"###);
},
)
.await;
}

View File

@ -1,9 +1,12 @@
use meili_snap::{json_string, snapshot};
use meilisearch_types::milli::constants::RESERVED_GEO_FIELD_NAME;
use once_cell::sync::Lazy;
use crate::common::{Server, Value};
use crate::json;
use super::test_settings_documents_indexing_swapping_and_search;
static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([
{
@ -184,3 +187,184 @@ async fn bug_4640() {
)
.await;
}
#[actix_rt::test]
async fn geo_asc_with_words() {
let documents = json!([
{ "id": 0, "doggo": "jean", RESERVED_GEO_FIELD_NAME: { "lat": 0, "lng": 0 } },
{ "id": 1, "doggo": "intel", RESERVED_GEO_FIELD_NAME: { "lat": 88, "lng": 0 } },
{ "id": 2, "doggo": "jean bob", RESERVED_GEO_FIELD_NAME: { "lat": -89, "lng": 0 } },
{ "id": 3, "doggo": "jean michel", RESERVED_GEO_FIELD_NAME: { "lat": 0, "lng": 178 } },
{ "id": 4, "doggo": "bob marley", RESERVED_GEO_FIELD_NAME: { "lat": 0, "lng": -179 } },
]);
test_settings_documents_indexing_swapping_and_search(
&documents,
&json!({"searchableAttributes": ["id", "doggo"], "rankingRules": ["words", "geo:asc"]}),
&json!({"q": "jean"}),
|response, code| {
assert_eq!(code, 200, "{}", response);
snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###"
{
"hits": [
{
"id": 0,
"doggo": "jean",
"_geo": {
"lat": 0,
"lng": 0
}
},
{
"id": 2,
"doggo": "jean bob",
"_geo": {
"lat": -89,
"lng": 0
}
},
{
"id": 3,
"doggo": "jean michel",
"_geo": {
"lat": 0,
"lng": 178
}
}
],
"query": "jean",
"processingTimeMs": "[time]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 3
}
"###);
},
)
.await;
test_settings_documents_indexing_swapping_and_search(
&documents,
&json!({"searchableAttributes": ["id", "doggo"], "rankingRules": ["words", "geo:asc"]}),
&json!({"q": "bob"}),
|response, code| {
assert_eq!(code, 200, "{}", response);
snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###"
{
"hits": [
{
"id": 2,
"doggo": "jean bob",
"_geo": {
"lat": -89,
"lng": 0
}
},
{
"id": 4,
"doggo": "bob marley",
"_geo": {
"lat": 0,
"lng": -179
}
}
],
"query": "bob",
"processingTimeMs": "[time]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 2
}
"###);
},
)
.await;
test_settings_documents_indexing_swapping_and_search(
&documents,
&json!({"searchableAttributes": ["id", "doggo"], "rankingRules": ["words", "geo:asc"]}),
&json!({"q": "intel"}),
|response, code| {
assert_eq!(code, 200, "{}", response);
snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###"
{
"hits": [
{
"id": 1,
"doggo": "intel",
"_geo": {
"lat": 88,
"lng": 0
}
}
],
"query": "intel",
"processingTimeMs": "[time]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 1
}
"###);
},
)
.await;
}
#[actix_rt::test]
async fn geo_sort_with_words() {
let documents = json!([
{ "id": 0, "doggo": "jean", RESERVED_GEO_FIELD_NAME: { "lat": 0, "lng": 0 } },
{ "id": 1, "doggo": "intel", RESERVED_GEO_FIELD_NAME: { "lat": 88, "lng": 0 } },
{ "id": 2, "doggo": "jean bob", RESERVED_GEO_FIELD_NAME: { "lat": -89, "lng": 0 } },
{ "id": 3, "doggo": "jean michel", RESERVED_GEO_FIELD_NAME: { "lat": 0, "lng": 178 } },
{ "id": 4, "doggo": "bob marley", RESERVED_GEO_FIELD_NAME: { "lat": 0, "lng": -179 } },
]);
test_settings_documents_indexing_swapping_and_search(
&documents,
&json!({"searchableAttributes": ["id", "doggo"], "rankingRules": ["words", "sort"], "sortableAttributes": [RESERVED_GEO_FIELD_NAME]}),
&json!({"q": "jean", "sort": ["_geoPoint(0.0, 0.0):asc"]}),
|response, code| {
assert_eq!(code, 200, "{}", response);
snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###"
{
"hits": [
{
"id": 0,
"doggo": "jean",
"_geo": {
"lat": 0,
"lng": 0
},
"_geoDistance": 0
},
{
"id": 2,
"doggo": "jean bob",
"_geo": {
"lat": -89,
"lng": 0
},
"_geoDistance": 9896348
},
{
"id": 3,
"doggo": "jean michel",
"_geo": {
"lat": 0,
"lng": 178
},
"_geoDistance": 19792697
}
],
"query": "jean",
"processingTimeMs": "[time]",
"limit": 20,
"offset": 0,
"estimatedTotalHits": 3
}
"###);
},
)
.await;
}

View File

@ -4,6 +4,7 @@
mod distinct;
mod errors;
mod facet_search;
mod filters;
mod formatted;
mod geo;
mod hybrid;
@ -21,10 +22,58 @@ use tempfile::TempDir;
use crate::common::{
default_settings, shared_index_with_documents, shared_index_with_nested_documents, Server,
DOCUMENTS, FRUITS_DOCUMENTS, NESTED_DOCUMENTS, SCORE_DOCUMENTS, VECTOR_DOCUMENTS,
Value, DOCUMENTS, FRUITS_DOCUMENTS, NESTED_DOCUMENTS, SCORE_DOCUMENTS, VECTOR_DOCUMENTS,
};
use crate::json;
async fn test_settings_documents_indexing_swapping_and_search(
documents: &Value,
settings: &Value,
query: &Value,
test: impl Fn(Value, actix_http::StatusCode) + std::panic::UnwindSafe + Clone,
) {
let temp = TempDir::new().unwrap();
let server = Server::new_with_options(Opt { ..default_settings(temp.path()) }).await.unwrap();
eprintln!("Documents -> Settings -> test");
let index = server.index("test");
let (task, code) = index.add_documents(documents.clone(), None).await;
assert_eq!(code, 202, "{}", task);
let response = index.wait_task(task.uid()).await;
assert!(response.is_success(), "{:?}", response);
let (task, code) = index.update_settings(settings.clone()).await;
assert_eq!(code, 202, "{}", task);
let response = index.wait_task(task.uid()).await;
assert!(response.is_success(), "{:?}", response);
index.search(query.clone(), test.clone()).await;
let (task, code) = server.delete_index("test").await;
assert_eq!(code, 202, "{}", task);
let response = server.wait_task(task.uid()).await;
assert!(response.is_success(), "{:?}", response);
eprintln!("Settings -> Documents -> test");
let index = server.index("test");
let (task, code) = index.update_settings(settings.clone()).await;
assert_eq!(code, 202, "{}", task);
let response = index.wait_task(task.uid()).await;
assert!(response.is_success(), "{:?}", response);
let (task, code) = index.add_documents(documents.clone(), None).await;
assert_eq!(code, 202, "{}", task);
let response = index.wait_task(task.uid()).await;
assert!(response.is_success(), "{:?}", response);
index.search(query.clone(), test.clone()).await;
let (task, code) = server.delete_index("test").await;
assert_eq!(code, 202, "{}", task);
let response = server.wait_task(task.uid()).await;
assert!(response.is_success(), "{:?}", response);
}
#[actix_rt::test]
async fn simple_placeholder_search() {
let index = shared_index_with_documents().await;
@ -355,118 +404,6 @@ async fn search_multiple_params() {
.await;
}
#[actix_rt::test]
async fn search_with_filter_string_notation() {
let server = Server::new().await;
let index = server.index("test");
let (_, code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await;
meili_snap::snapshot!(code, @"202 Accepted");
let documents = DOCUMENTS.clone();
let (task, code) = index.add_documents(documents, None).await;
meili_snap::snapshot!(code, @"202 Accepted");
let res = index.wait_task(task.uid()).await;
meili_snap::snapshot!(res["status"], @r###""succeeded""###);
index
.search(
json!({
"filter": "title = Gläss"
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 1);
},
)
.await;
let index = server.index("nested");
let (_, code) =
index.update_settings(json!({"filterableAttributes": ["cattos", "doggos.age"]})).await;
meili_snap::snapshot!(code, @"202 Accepted");
let documents = NESTED_DOCUMENTS.clone();
let (task, code) = index.add_documents(documents, None).await;
meili_snap::snapshot!(code, @"202 Accepted");
let res = index.wait_task(task.uid()).await;
meili_snap::snapshot!(res["status"], @r###""succeeded""###);
index
.search(
json!({
"filter": "cattos = pésti"
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 1);
assert_eq!(response["hits"][0]["id"], json!(852));
},
)
.await;
index
.search(
json!({
"filter": "doggos.age > 5"
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 2);
assert_eq!(response["hits"][0]["id"], json!(654));
assert_eq!(response["hits"][1]["id"], json!(951));
},
)
.await;
}
#[actix_rt::test]
async fn search_with_filter_array_notation() {
let index = shared_index_with_documents().await;
let (response, code) = index
.search_post(json!({
"filter": ["title = Gläss"]
}))
.await;
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 1);
let (response, code) = index
.search_post(json!({
"filter": [["title = Gläss", "title = \"Shazam!\"", "title = \"Escape Room\""]]
}))
.await;
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 3);
}
#[actix_rt::test]
async fn search_with_contains_filter() {
let temp = TempDir::new().unwrap();
let server = Server::new_with_options(Opt {
experimental_contains_filter: true,
..default_settings(temp.path())
})
.await
.unwrap();
let index = server.index("movies");
index.update_settings(json!({"filterableAttributes": ["title"]})).await;
let documents = DOCUMENTS.clone();
let (request, _code) = index.add_documents(documents, None).await;
index.wait_task(request.uid()).await.succeeded();
let (response, code) = index
.search_post(json!({
"filter": "title CONTAINS cap"
}))
.await;
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 2);
}
#[actix_rt::test]
async fn search_with_sort_on_numbers() {
let index = shared_index_with_documents().await;
@ -589,7 +526,7 @@ async fn search_facet_distribution() {
|response, code| {
assert_eq!(code, 200, "{}", response);
let dist = response["facetDistribution"].as_object().unwrap();
assert_eq!(dist.len(), 1);
assert_eq!(dist.len(), 1, "{:?}", dist);
assert_eq!(
dist["doggos.name"],
json!({ "bobby": 1, "buddy": 1, "gros bill": 1, "turbo": 1, "fast": 1})
@ -606,7 +543,7 @@ async fn search_facet_distribution() {
|response, code| {
assert_eq!(code, 200, "{}", response);
let dist = response["facetDistribution"].as_object().unwrap();
assert_eq!(dist.len(), 3);
assert_eq!(dist.len(), 3, "{:?}", dist);
assert_eq!(
dist["doggos.name"],
json!({ "bobby": 1, "buddy": 1, "gros bill": 1, "turbo": 1, "fast": 1})
@ -1559,6 +1496,293 @@ async fn change_attributes_settings() {
.await;
}
#[actix_rt::test]
async fn test_nested_fields() {
let documents = json!([
{
"id": 0,
"title": "The zeroth document",
},
{
"id": 1,
"title": "The first document",
"nested": {
"object": "field",
"machin": "bidule",
},
},
{
"id": 2,
"title": "The second document",
"nested": [
"array",
{
"object": "field",
},
{
"prout": "truc",
"machin": "lol",
},
],
},
{
"id": 3,
"title": "The third document",
"nested": "I lied",
},
]);
let settings = json!({
"searchableAttributes": ["title", "nested.object", "nested.machin"],
"filterableAttributes": ["title", "nested.object", "nested.machin"]
});
// Test empty search returns all documents
test_settings_documents_indexing_swapping_and_search(
&documents,
&settings,
&json!({"q": "document"}),
|response, code| {
assert_eq!(code, 200, "{}", response);
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 0,
"title": "The zeroth document"
},
{
"id": 1,
"title": "The first document",
"nested": {
"object": "field",
"machin": "bidule"
}
},
{
"id": 2,
"title": "The second document",
"nested": [
"array",
{
"object": "field"
},
{
"prout": "truc",
"machin": "lol"
}
]
},
{
"id": 3,
"title": "The third document",
"nested": "I lied"
}
]
"###);
},
)
.await;
// Test searching specific documents
test_settings_documents_indexing_swapping_and_search(
&documents,
&settings,
&json!({"q": "zeroth"}),
|response, code| {
assert_eq!(code, 200, "{}", response);
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 0,
"title": "The zeroth document"
}
]
"###);
},
)
.await;
test_settings_documents_indexing_swapping_and_search(
&documents,
&settings,
&json!({"q": "first"}),
|response, code| {
assert_eq!(code, 200, "{}", response);
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 1,
"title": "The first document",
"nested": {
"object": "field",
"machin": "bidule"
}
}
]
"###);
},
)
.await;
// Test searching nested fields
test_settings_documents_indexing_swapping_and_search(
&documents,
&settings,
&json!({"q": "field"}),
|response, code| {
assert_eq!(code, 200, "{}", response);
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 1,
"title": "The first document",
"nested": {
"object": "field",
"machin": "bidule"
}
},
{
"id": 2,
"title": "The second document",
"nested": [
"array",
{
"object": "field"
},
{
"prout": "truc",
"machin": "lol"
}
]
}
]
"###);
},
)
.await;
test_settings_documents_indexing_swapping_and_search(
&documents,
&settings,
&json!({"q": "array"}),
|response, code| {
assert_eq!(code, 200, "{}", response);
// nested is not searchable
snapshot!(json_string!(response["hits"]), @"[]");
},
)
.await;
test_settings_documents_indexing_swapping_and_search(
&documents,
&settings,
&json!({"q": "lied"}),
|response, code| {
assert_eq!(code, 200, "{}", response);
// nested is not searchable
snapshot!(json_string!(response["hits"]), @"[]");
},
)
.await;
// Test filtering on nested fields
test_settings_documents_indexing_swapping_and_search(
&documents,
&settings,
&json!({"filter": "nested.object = field"}),
|response, code| {
assert_eq!(code, 200, "{}", response);
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 1,
"title": "The first document",
"nested": {
"object": "field",
"machin": "bidule"
}
},
{
"id": 2,
"title": "The second document",
"nested": [
"array",
{
"object": "field"
},
{
"prout": "truc",
"machin": "lol"
}
]
}
]
"###);
},
)
.await;
test_settings_documents_indexing_swapping_and_search(
&documents,
&settings,
&json!({"filter": "nested.machin = bidule"}),
|response, code| {
assert_eq!(code, 200, "{}", response);
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 1,
"title": "The first document",
"nested": {
"object": "field",
"machin": "bidule"
}
}
]
"###);
},
)
.await;
// Test filtering on non-filterable nested field fails
test_settings_documents_indexing_swapping_and_search(
&documents,
&settings,
&json!({"filter": "nested = array"}),
|response, code| {
assert_eq!(code, 400, "{}", response);
snapshot!(json_string!(response), @r###"
{
"message": "Index `test`: Attribute `nested` is not filterable. Available filterable attribute patterns are: `nested.machin`, `nested.object`, `title`.\n1:7 nested = array",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
}
"###);
},
)
.await;
// Test filtering on non-filterable nested field fails
test_settings_documents_indexing_swapping_and_search(
&documents,
&settings,
&json!({"filter": r#"nested = "I lied""#}),
|response, code| {
assert_eq!(code, 400, "{}", response);
snapshot!(json_string!(response), @r###"
{
"message": "Index `test`: Attribute `nested` is not filterable. Available filterable attribute patterns are: `nested.machin`, `nested.object`, `title`.\n1:7 nested = \"I lied\"",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
}
"###);
},
)
.await;
}
/// Modifying facets with different casing should work correctly
#[actix_rt::test]
async fn change_facet_casing() {

View File

@ -3647,7 +3647,7 @@ async fn federation_non_faceted_for_an_index() {
snapshot!(code, @"400 Bad Request");
insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###"
{
"message": "Inside `.federation.facetsByIndex.fruits-no-name`: Invalid facet distribution, attribute `name` is not filterable. The available filterable attributes are `BOOST, id`.\n - Note: index `fruits-no-name` used in `.queries[1]`",
"message": "Inside `.federation.facetsByIndex.fruits-no-name`: Invalid facet distribution, attribute `name` is not filterable. The available filterable attribute patterns are `BOOST, id`.\n - Note: index `fruits-no-name` used in `.queries[1]`",
"code": "invalid_multi_search_facets",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_multi_search_facets"
@ -3669,7 +3669,7 @@ async fn federation_non_faceted_for_an_index() {
snapshot!(code, @"400 Bad Request");
insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###"
{
"message": "Inside `.federation.facetsByIndex.fruits-no-name`: Invalid facet distribution, attribute `name` is not filterable. The available filterable attributes are `BOOST, id`.\n - Note: index `fruits-no-name` is not used in queries",
"message": "Inside `.federation.facetsByIndex.fruits-no-name`: Invalid facet distribution, attribute `name` is not filterable. The available filterable attribute patterns are `BOOST, id`.\n - Note: index `fruits-no-name` is not used in queries",
"code": "invalid_multi_search_facets",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_multi_search_facets"

View File

@ -1,3 +1,5 @@
use meili_snap::{json_string, snapshot};
use crate::common::Server;
use crate::json;
@ -510,3 +512,127 @@ async fn set_and_reset_distinct_attribute_with_dedicated_route() {
assert_eq!(response, json!(null));
}
#[actix_rt::test]
async fn granular_filterable_attributes() {
let server = Server::new().await;
let index = server.index("test");
index.create(None).await;
let (response, code) =
index.update_settings(json!({ "filterableAttributes": [
{ "attributePatterns": ["name"], "features": { "facetSearch": true, "filter": {"equality": true, "comparison": false} } },
{ "attributePatterns": ["age"], "features": { "facetSearch": false, "filter": {"equality": true, "comparison": true} } },
{ "attributePatterns": ["id"] },
{ "attributePatterns": ["default-filterable-features-null"], "features": { "facetSearch": true } },
{ "attributePatterns": ["default-filterable-features-equality"], "features": { "facetSearch": true, "filter": {"comparison": true} } },
{ "attributePatterns": ["default-filterable-features-comparison"], "features": { "facetSearch": true, "filter": {"equality": true} } },
{ "attributePatterns": ["default-filterable-features-empty"], "features": { "facetSearch": true, "filter": {} } },
{ "attributePatterns": ["default-facet-search"], "features": { "filter": {"equality": true, "comparison": true} } },
] })).await;
assert_eq!(code, 202);
index.wait_task(response.uid()).await.succeeded();
let (response, code) = index.settings().await;
assert_eq!(code, 200, "{}", response);
snapshot!(json_string!(response["filterableAttributes"]), @r###"
[
{
"attributePatterns": [
"name"
],
"features": {
"facetSearch": true,
"filter": {
"equality": true,
"comparison": false
}
}
},
{
"attributePatterns": [
"age"
],
"features": {
"facetSearch": false,
"filter": {
"equality": true,
"comparison": true
}
}
},
{
"attributePatterns": [
"id"
],
"features": {
"facetSearch": false,
"filter": {
"equality": true,
"comparison": false
}
}
},
{
"attributePatterns": [
"default-filterable-features-null"
],
"features": {
"facetSearch": true,
"filter": {
"equality": true,
"comparison": false
}
}
},
{
"attributePatterns": [
"default-filterable-features-equality"
],
"features": {
"facetSearch": true,
"filter": {
"equality": true,
"comparison": true
}
}
},
{
"attributePatterns": [
"default-filterable-features-comparison"
],
"features": {
"facetSearch": true,
"filter": {
"equality": true,
"comparison": false
}
}
},
{
"attributePatterns": [
"default-filterable-features-empty"
],
"features": {
"facetSearch": true,
"filter": {
"equality": true,
"comparison": false
}
}
},
{
"attributePatterns": [
"default-facet-search"
],
"features": {
"facetSearch": false,
"filter": {
"equality": true,
"comparison": true
}
}
}
]
"###);
}

View File

@ -452,18 +452,19 @@ async fn filter_invalid_attribute_array() {
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await.succeeded();
let expected_response = json!({
"message": "Attribute `many` is not filterable. Available filterable attributes are: `title`.\n1:5 many = Glass",
"code": "invalid_similar_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
});
index
.similar(
json!({"id": 287947, "filter": ["many = Glass"], "embedder": "manual"}),
|response, code| {
assert_eq!(response, expected_response);
assert_eq!(code, 400);
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Attribute `many` is not filterable. Available filterable attribute patterns are: `title`.\n1:5 many = Glass",
"code": "invalid_similar_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
}
"###);
},
)
.await;
@ -492,18 +493,19 @@ async fn filter_invalid_attribute_string() {
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await.succeeded();
let expected_response = json!({
"message": "Attribute `many` is not filterable. Available filterable attributes are: `title`.\n1:5 many = Glass",
"code": "invalid_similar_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
});
index
.similar(
json!({"id": 287947, "filter": "many = Glass", "embedder": "manual"}),
|response, code| {
assert_eq!(response, expected_response);
assert_eq!(code, 400);
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Attribute `many` is not filterable. Available filterable attribute patterns are: `title`.\n1:5 many = Glass",
"code": "invalid_similar_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
}
"###);
},
)
.await;

View File

@ -0,0 +1,152 @@
use deserr::Deserr;
use serde::{Deserialize, Serialize};
use utoipa::ToSchema;
use crate::is_faceted_by;
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, ToSchema)]
#[repr(transparent)]
#[serde(transparent)]
pub struct AttributePatterns {
#[schema(example = json!(["title", "overview_*", "release_date"]))]
pub patterns: Vec<String>,
}
impl<E: deserr::DeserializeError> Deserr<E> for AttributePatterns {
fn deserialize_from_value<V: deserr::IntoValue>(
value: deserr::Value<V>,
location: deserr::ValuePointerRef,
) -> Result<Self, E> {
Vec::<String>::deserialize_from_value(value, location).map(|patterns| Self { patterns })
}
}
impl From<Vec<String>> for AttributePatterns {
fn from(patterns: Vec<String>) -> Self {
Self { patterns }
}
}
impl AttributePatterns {
/// Match a string against the attribute patterns using the match_pattern function.
pub fn match_str(&self, str: &str) -> PatternMatch {
let mut pattern_match = PatternMatch::NoMatch;
for pattern in &self.patterns {
match match_pattern(pattern, str) {
PatternMatch::Match => return PatternMatch::Match,
PatternMatch::Parent => pattern_match = PatternMatch::Parent,
PatternMatch::NoMatch => {}
}
}
pattern_match
}
}
/// Match a string against a pattern.
///
/// The pattern can be a wildcard, a prefix, a suffix or an exact match.
///
/// # Arguments
///
/// * `pattern` - The pattern to match against.
/// * `str` - The string to match against the pattern.
fn match_pattern(pattern: &str, str: &str) -> PatternMatch {
// If the pattern is a wildcard, return Match
if pattern == "*" {
return PatternMatch::Match;
} else if pattern.starts_with('*') && pattern.ends_with('*') {
// If the pattern starts and ends with a wildcard, return Match if the string contains the pattern without the wildcards
if str.contains(&pattern[1..pattern.len() - 1]) {
return PatternMatch::Match;
}
} else if let Some(pattern) = pattern.strip_prefix('*') {
// If the pattern starts with a wildcard, return Match if the string ends with the pattern without the wildcard
if str.ends_with(pattern) {
return PatternMatch::Match;
}
} else if let Some(pattern) = pattern.strip_suffix('*') {
// If the pattern ends with a wildcard, return Match if the string starts with the pattern without the wildcard
if str.starts_with(pattern) {
return PatternMatch::Match;
}
} else if pattern == str {
// If the pattern is exactly the string, return Match
return PatternMatch::Match;
}
// If the field is a parent field of the pattern, return Parent
if is_faceted_by(pattern, str) {
PatternMatch::Parent
} else {
PatternMatch::NoMatch
}
}
/// Match a field against a pattern using the legacy behavior.
///
/// A field matches a pattern if it is a parent of the pattern or if it is the pattern itself.
/// This behavior is used to match the sortable attributes, the searchable attributes and the filterable attributes rules `Field`.
///
/// # Arguments
///
/// * `pattern` - The pattern to match against.
/// * `field` - The field to match against the pattern.
pub fn match_field_legacy(pattern: &str, field: &str) -> PatternMatch {
if is_faceted_by(field, pattern) {
// If the field matches the pattern or is a nested field of the pattern, return Match (legacy behavior)
PatternMatch::Match
} else if is_faceted_by(pattern, field) {
// If the field is a parent field of the pattern, return Parent
PatternMatch::Parent
} else {
// If the field does not match the pattern and is not a parent of a nested field that matches the pattern, return NoMatch
PatternMatch::NoMatch
}
}
/// Match a field against a distinct field.
pub fn match_distinct_field(distinct_field: Option<&str>, field: &str) -> PatternMatch {
if let Some(distinct_field) = distinct_field {
if field == distinct_field {
// If the field matches exactly the distinct field, return Match
return PatternMatch::Match;
} else if is_faceted_by(distinct_field, field) {
// If the field is a parent field of the distinct field, return Parent
return PatternMatch::Parent;
}
}
// If the field does not match the distinct field and is not a parent of a nested field that matches the distinct field, return NoMatch
PatternMatch::NoMatch
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum PatternMatch {
/// The field is a parent of a nested field that matches the pattern
/// For example, the field is `toto`, and the pattern is `toto.titi`
Parent,
/// The field matches the pattern
Match,
/// The field does not match the pattern
NoMatch,
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_match_pattern() {
assert_eq!(match_pattern("*", "test"), PatternMatch::Match);
assert_eq!(match_pattern("test*", "test"), PatternMatch::Match);
assert_eq!(match_pattern("test*", "testa"), PatternMatch::Match);
assert_eq!(match_pattern("*test", "test"), PatternMatch::Match);
assert_eq!(match_pattern("*test", "atest"), PatternMatch::Match);
assert_eq!(match_pattern("*test*", "test"), PatternMatch::Match);
assert_eq!(match_pattern("*test*", "atesta"), PatternMatch::Match);
assert_eq!(match_pattern("*test*", "atest"), PatternMatch::Match);
assert_eq!(match_pattern("*test*", "testa"), PatternMatch::Match);
assert_eq!(match_pattern("test*test", "test"), PatternMatch::NoMatch);
assert_eq!(match_pattern("*test", "testa"), PatternMatch::NoMatch);
assert_eq!(match_pattern("test*", "atest"), PatternMatch::NoMatch);
}
}

View File

@ -122,10 +122,10 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
and can not be more than 511 bytes.", .document_id.to_string()
)]
InvalidDocumentId { document_id: Value },
#[error("Invalid facet distribution, {}", format_invalid_filter_distribution(.invalid_facets_name, .valid_facets_name))]
#[error("Invalid facet distribution, {}", format_invalid_filter_distribution(.invalid_facets_name, .valid_patterns))]
InvalidFacetsDistribution {
invalid_facets_name: BTreeSet<String>,
valid_facets_name: BTreeSet<String>,
valid_patterns: BTreeSet<String>,
},
#[error(transparent)]
InvalidGeoField(#[from] GeoError),
@ -139,6 +139,13 @@ and can not be more than 511 bytes.", .document_id.to_string()
InvalidFilter(String),
#[error("Invalid type for filter subexpression: expected: {}, found: {}.", .0.join(", "), .1)]
InvalidFilterExpression(&'static [&'static str], Value),
#[error("Filter operator `{operator}` is not allowed for the attribute `{field}`.\n - Note: allowed operators: {}.\n - Note: field `{field}` {} in `filterableAttributes`", allowed_operators.join(", "), format!("matched rule #{rule_index}"))]
FilterOperatorNotAllowed {
field: String,
allowed_operators: Vec<String>,
operator: String,
rule_index: usize,
},
#[error("Attribute `{}` is not sortable. {}",
.field,
match .valid_fields.is_empty() {
@ -152,28 +159,32 @@ and can not be more than 511 bytes.", .document_id.to_string()
InvalidSortableAttribute { field: String, valid_fields: BTreeSet<String>, hidden_fields: bool },
#[error("Attribute `{}` is not filterable and thus, cannot be used as distinct attribute. {}",
.field,
match .valid_fields.is_empty() {
match .valid_patterns.is_empty() {
true => "This index does not have configured filterable attributes.".to_string(),
false => format!("Available filterable attributes are: `{}{}`.",
valid_fields.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", "),
false => format!("Available filterable attributes patterns are: `{}{}`.",
valid_patterns.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", "),
.hidden_fields.then_some(", <..hidden-attributes>").unwrap_or(""),
),
}
)]
InvalidDistinctAttribute { field: String, valid_fields: BTreeSet<String>, hidden_fields: bool },
InvalidDistinctAttribute {
field: String,
valid_patterns: BTreeSet<String>,
hidden_fields: bool,
},
#[error("Attribute `{}` is not facet-searchable. {}",
.field,
match .valid_fields.is_empty() {
match .valid_patterns.is_empty() {
true => "This index does not have configured facet-searchable attributes. To make it facet-searchable add it to the `filterableAttributes` index settings.".to_string(),
false => format!("Available facet-searchable attributes are: `{}{}`. To make it facet-searchable add it to the `filterableAttributes` index settings.",
valid_fields.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", "),
false => format!("Available facet-searchable attributes patterns are: `{}{}`. To make it facet-searchable add it to the `filterableAttributes` index settings.",
valid_patterns.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", "),
.hidden_fields.then_some(", <..hidden-attributes>").unwrap_or(""),
),
}
)]
InvalidFacetSearchFacetName {
field: String,
valid_fields: BTreeSet<String>,
valid_patterns: BTreeSet<String>,
hidden_fields: bool,
},
#[error("Attribute `{}` is not searchable. Available searchable attributes are: `{}{}`.",
@ -380,9 +391,9 @@ pub enum GeoError {
fn format_invalid_filter_distribution(
invalid_facets_name: &BTreeSet<String>,
valid_facets_name: &BTreeSet<String>,
valid_patterns: &BTreeSet<String>,
) -> String {
if valid_facets_name.is_empty() {
if valid_patterns.is_empty() {
return "this index does not have configured filterable attributes.".into();
}
@ -404,17 +415,17 @@ fn format_invalid_filter_distribution(
.unwrap(),
};
match valid_facets_name.len() {
match valid_patterns.len() {
1 => write!(
result,
" The available filterable attribute is `{}`.",
valid_facets_name.first().unwrap()
" The available filterable attribute pattern is `{}`.",
valid_patterns.first().unwrap()
)
.unwrap(),
_ => write!(
result,
" The available filterable attributes are `{}`.",
valid_facets_name.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", ")
" The available filterable attribute patterns are `{}`.",
valid_patterns.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", ")
)
.unwrap(),
}

View File

@ -43,11 +43,6 @@ impl FieldidsWeightsMap {
self.map.get(&fid).copied()
}
/// Returns highest weight contained in the map if any.
pub fn max_weight(&self) -> Option<Weight> {
self.map.values().copied().max()
}
/// Return an iterator visiting all field ids in arbitrary order.
pub fn ids(&self) -> impl Iterator<Item = FieldId> + '_ {
self.map.keys().copied()

View File

@ -105,6 +105,18 @@ impl<'indexing> GlobalFieldsIdsMap<'indexing> {
self.local.name(id)
}
/// Get the metadata of a field based on its id.
pub fn metadata(&mut self, id: FieldId) -> Option<Metadata> {
if self.local.metadata(id).is_none() {
let global = self.global.read().unwrap();
let (name, metadata) = global.name_with_metadata(id)?;
self.local.insert(name, id, metadata);
}
self.local.metadata(id)
}
}
impl<'indexing> MutFieldIdMapper for GlobalFieldsIdsMap<'indexing> {

View File

@ -5,14 +5,29 @@ use charabia::Language;
use heed::RoTxn;
use super::FieldsIdsMap;
use crate::{FieldId, Index, LocalizedAttributesRule, Result};
use crate::attribute_patterns::{match_field_legacy, PatternMatch};
use crate::constants::{RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME};
use crate::{
is_faceted_by, FieldId, FilterableAttributesFeatures, FilterableAttributesRule, Index,
LocalizedAttributesRule, Result, Weight,
};
#[derive(Debug, Clone, Copy)]
pub struct Metadata {
pub searchable: bool,
pub filterable: bool,
/// The weight as defined in the FieldidsWeightsMap of the searchable attribute if it is searchable.
pub searchable: Option<Weight>,
/// The field is part of the sortable attributes.
pub sortable: bool,
localized_attributes_rule_id: Option<NonZeroU16>,
/// The field is defined as the distinct attribute.
pub distinct: bool,
/// The field has been defined as asc/desc in the ranking rules.
pub asc_desc: bool,
/// The field is a geo field (`_geo`, `_geo.lat`, `_geo.lng`).
pub geo: bool,
/// The id of the localized attributes rule if the field is localized.
pub localized_attributes_rule_id: Option<NonZeroU16>,
/// The id of the filterable attributes rule if the field is filterable.
pub filterable_attributes_rule_id: Option<NonZeroU16>,
}
#[derive(Debug, Clone)]
@ -106,76 +121,227 @@ impl Metadata {
let rule = rules.get((localized_attributes_rule_id - 1) as usize).unwrap();
Some(rule.locales())
}
pub fn filterable_attributes<'rules>(
&self,
rules: &'rules [FilterableAttributesRule],
) -> Option<&'rules FilterableAttributesRule> {
self.filterable_attributes_with_rule_index(rules).map(|(_, rule)| rule)
}
pub fn filterable_attributes_with_rule_index<'rules>(
&self,
rules: &'rules [FilterableAttributesRule],
) -> Option<(usize, &'rules FilterableAttributesRule)> {
let filterable_attributes_rule_id = self.filterable_attributes_rule_id?.get();
let rule_id = (filterable_attributes_rule_id - 1) as usize;
let rule = rules.get(rule_id).unwrap();
Some((rule_id, rule))
}
pub fn filterable_attributes_features(
&self,
rules: &[FilterableAttributesRule],
) -> FilterableAttributesFeatures {
let (_, features) = self.filterable_attributes_features_with_rule_index(rules);
features
}
pub fn filterable_attributes_features_with_rule_index(
&self,
rules: &[FilterableAttributesRule],
) -> (Option<usize>, FilterableAttributesFeatures) {
self.filterable_attributes_with_rule_index(rules)
.map(|(rule_index, rule)| (Some(rule_index), rule.features()))
// if there is no filterable attributes rule, return no features
.unwrap_or_else(|| (None, FilterableAttributesFeatures::no_features()))
}
pub fn is_sortable(&self) -> bool {
self.sortable
}
pub fn is_searchable(&self) -> bool {
self.searchable.is_some()
}
pub fn searchable_weight(&self) -> Option<Weight> {
self.searchable
}
pub fn is_distinct(&self) -> bool {
self.distinct
}
pub fn is_asc_desc(&self) -> bool {
self.asc_desc
}
pub fn is_geo(&self) -> bool {
self.geo
}
/// Returns `true` if the field is part of the facet databases. (sortable, distinct, asc_desc, filterable or facet searchable)
pub fn is_faceted(&self, rules: &[FilterableAttributesRule]) -> bool {
if self.is_distinct() || self.is_sortable() || self.is_asc_desc() {
return true;
}
let features = self.filterable_attributes_features(rules);
if features.is_filterable() || features.is_facet_searchable() {
return true;
}
false
}
pub fn require_facet_level_database(&self, rules: &[FilterableAttributesRule]) -> bool {
let features = self.filterable_attributes_features(rules);
self.is_sortable() || self.is_asc_desc() || features.is_filterable_comparison()
}
}
#[derive(Debug, Clone)]
pub struct MetadataBuilder {
searchable_attributes: Vec<String>,
filterable_attributes: HashSet<String>,
searchable_attributes: Option<Vec<String>>,
filterable_attributes: Vec<FilterableAttributesRule>,
sortable_attributes: HashSet<String>,
localized_attributes: Option<Vec<LocalizedAttributesRule>>,
distinct_attribute: Option<String>,
asc_desc_attributes: HashSet<String>,
}
impl MetadataBuilder {
pub fn from_index(index: &Index, rtxn: &RoTxn) -> Result<Self> {
let searchable_attributes =
index.searchable_fields(rtxn)?.into_iter().map(|s| s.to_string()).collect();
let filterable_attributes = index.filterable_fields(rtxn)?;
let searchable_attributes = index
.user_defined_searchable_fields(rtxn)?
.map(|fields| fields.into_iter().map(|s| s.to_string()).collect());
let filterable_attributes = index.filterable_attributes_rules(rtxn)?;
let sortable_attributes = index.sortable_fields(rtxn)?;
let localized_attributes = index.localized_attributes_rules(rtxn)?;
let distinct_attribute = index.distinct_field(rtxn)?.map(|s| s.to_string());
let asc_desc_attributes = index.asc_desc_fields(rtxn)?;
Ok(Self {
Ok(Self::new(
searchable_attributes,
filterable_attributes,
sortable_attributes,
localized_attributes,
})
distinct_attribute,
asc_desc_attributes,
))
}
/// Build a new `MetadataBuilder` from the given parameters.
///
/// This is used for testing, prefer using `MetadataBuilder::from_index` instead.
pub fn new(
searchable_attributes: Vec<String>,
filterable_attributes: HashSet<String>,
searchable_attributes: Option<Vec<String>>,
filterable_attributes: Vec<FilterableAttributesRule>,
sortable_attributes: HashSet<String>,
localized_attributes: Option<Vec<LocalizedAttributesRule>>,
distinct_attribute: Option<String>,
asc_desc_attributes: HashSet<String>,
) -> Self {
let searchable_attributes = match searchable_attributes {
Some(fields) if fields.iter().any(|f| f == "*") => None,
Some(fields) => Some(fields),
None => None,
};
Self {
searchable_attributes,
filterable_attributes,
sortable_attributes,
localized_attributes,
distinct_attribute,
asc_desc_attributes,
}
}
pub fn metadata_for_field(&self, field: &str) -> Metadata {
let searchable = self
.searchable_attributes
if is_faceted_by(field, RESERVED_VECTORS_FIELD_NAME) {
// Vectors fields are not searchable, filterable, distinct or asc_desc
return Metadata {
searchable: None,
sortable: false,
distinct: false,
asc_desc: false,
geo: false,
localized_attributes_rule_id: None,
filterable_attributes_rule_id: None,
};
}
// A field is sortable if it is faceted by a sortable attribute
let sortable = self
.sortable_attributes
.iter()
.any(|attribute| attribute == "*" || attribute == field);
.any(|pattern| match_field_legacy(pattern, field) == PatternMatch::Match);
let filterable = self.filterable_attributes.contains(field);
let filterable_attributes_rule_id = self
.filterable_attributes
.iter()
.position(|attribute| attribute.match_str(field) == PatternMatch::Match)
// saturating_add(1): make `id` `NonZero`
.map(|id| NonZeroU16::new(id.saturating_add(1).try_into().unwrap()).unwrap());
let sortable = self.sortable_attributes.contains(field);
if match_field_legacy(RESERVED_GEO_FIELD_NAME, field) == PatternMatch::Match {
// Geo fields are not searchable, distinct or asc_desc
return Metadata {
searchable: None,
sortable,
distinct: false,
asc_desc: false,
geo: true,
localized_attributes_rule_id: None,
filterable_attributes_rule_id,
};
}
let searchable = match &self.searchable_attributes {
// A field is searchable if it is faceted by a searchable attribute
Some(attributes) => attributes
.iter()
.enumerate()
.find(|(_i, pattern)| is_faceted_by(field, pattern))
.map(|(i, _)| i as u16),
None => Some(0),
};
let distinct =
self.distinct_attribute.as_ref().is_some_and(|distinct_field| field == distinct_field);
let asc_desc = self.asc_desc_attributes.contains(field);
let localized_attributes_rule_id = self
.localized_attributes
.iter()
.flat_map(|v| v.iter())
.position(|rule| rule.match_str(field))
.position(|rule| rule.match_str(field) == PatternMatch::Match)
// saturating_add(1): make `id` `NonZero`
.map(|id| NonZeroU16::new(id.saturating_add(1).try_into().unwrap()).unwrap());
Metadata { searchable, filterable, sortable, localized_attributes_rule_id }
Metadata {
searchable,
sortable,
distinct,
asc_desc,
geo: false,
localized_attributes_rule_id,
filterable_attributes_rule_id,
}
}
pub fn searchable_attributes(&self) -> &[String] {
self.searchable_attributes.as_slice()
pub fn searchable_attributes(&self) -> Option<&[String]> {
self.searchable_attributes.as_deref()
}
pub fn sortable_attributes(&self) -> &HashSet<String> {
&self.sortable_attributes
}
pub fn filterable_attributes(&self) -> &HashSet<String> {
pub fn filterable_attributes(&self) -> &[FilterableAttributesRule] {
&self.filterable_attributes
}

View File

@ -0,0 +1,368 @@
use deserr::{DeserializeError, Deserr, ValuePointerRef};
use serde::{Deserialize, Serialize};
use std::collections::{BTreeSet, HashSet};
use utoipa::ToSchema;
use crate::{
attribute_patterns::{match_distinct_field, match_field_legacy, PatternMatch},
constants::RESERVED_GEO_FIELD_NAME,
AttributePatterns,
};
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug, ToSchema)]
#[serde(untagged)]
pub enum FilterableAttributesRule {
Field(String),
Pattern(FilterableAttributesPatterns),
}
impl FilterableAttributesRule {
/// Match a field against the filterable attributes rule.
pub fn match_str(&self, field: &str) -> PatternMatch {
match self {
// If the rule is a field, match the field against the pattern using the legacy behavior
FilterableAttributesRule::Field(pattern) => match_field_legacy(pattern, field),
// If the rule is a pattern, match the field against the pattern using the new behavior
FilterableAttributesRule::Pattern(patterns) => patterns.match_str(field),
}
}
/// Check if the rule is a geo field.
///
/// prefer using `index.is_geo_enabled`, `index.is_geo_filtering_enabled` or `index.is_geo_sorting_enabled`
/// to check if the geo feature is enabled.
pub fn has_geo(&self) -> bool {
matches!(self, FilterableAttributesRule::Field(field_name) if field_name == RESERVED_GEO_FIELD_NAME)
}
/// Get the features of the rule.
pub fn features(&self) -> FilterableAttributesFeatures {
match self {
// If the rule is a field, return the legacy default features
FilterableAttributesRule::Field(_) => FilterableAttributesFeatures::legacy_default(),
// If the rule is a pattern, return the features of the pattern
FilterableAttributesRule::Pattern(patterns) => patterns.features(),
}
}
}
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug, Deserr, ToSchema)]
#[serde(deny_unknown_fields, rename_all = "camelCase")]
#[deserr(rename_all = camelCase, deny_unknown_fields)]
pub struct FilterableAttributesPatterns {
pub attribute_patterns: AttributePatterns,
#[serde(default)]
#[deserr(default)]
pub features: FilterableAttributesFeatures,
}
impl FilterableAttributesPatterns {
pub fn match_str(&self, field: &str) -> PatternMatch {
self.attribute_patterns.match_str(field)
}
pub fn features(&self) -> FilterableAttributesFeatures {
self.features
}
}
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Copy, Debug, Deserr, ToSchema)]
#[serde(deny_unknown_fields, rename_all = "camelCase")]
#[deserr(rename_all = camelCase, deny_unknown_fields)]
#[derive(Default)]
pub struct FilterableAttributesFeatures {
#[serde(default)]
#[deserr(default)]
facet_search: bool,
#[serde(default)]
#[deserr(default)]
filter: FilterFeatures,
}
impl FilterableAttributesFeatures {
/// Create a new `FilterableAttributesFeatures` with the legacy default features.
///
/// This is the default behavior for `FilterableAttributesRule::Field`.
/// This will set the facet search to true and activate all the filter operators.
pub fn legacy_default() -> Self {
Self { facet_search: true, filter: FilterFeatures::legacy_default() }
}
/// Create a new `FilterableAttributesFeatures` with no features.
pub fn no_features() -> Self {
Self { facet_search: false, filter: FilterFeatures::no_features() }
}
pub fn is_filterable(&self) -> bool {
self.filter.is_filterable()
}
/// Check if `IS EMPTY` is allowed
pub fn is_filterable_empty(&self) -> bool {
self.filter.is_filterable_empty()
}
/// Check if `=` and `IN` are allowed
pub fn is_filterable_equality(&self) -> bool {
self.filter.is_filterable_equality()
}
/// Check if `IS NULL` is allowed
pub fn is_filterable_null(&self) -> bool {
self.filter.is_filterable_null()
}
/// Check if `IS EXISTS` is allowed
pub fn is_filterable_exists(&self) -> bool {
self.filter.is_filterable_exists()
}
/// Check if `<`, `>`, `<=`, `>=` or `TO` are allowed
pub fn is_filterable_comparison(&self) -> bool {
self.filter.is_filterable_comparison()
}
/// Check if the facet search is allowed
pub fn is_facet_searchable(&self) -> bool {
self.facet_search
}
pub fn allowed_filter_operators(&self) -> Vec<String> {
self.filter.allowed_operators()
}
}
impl<E: DeserializeError> Deserr<E> for FilterableAttributesRule {
fn deserialize_from_value<V: deserr::IntoValue>(
value: deserr::Value<V>,
location: ValuePointerRef,
) -> Result<Self, E> {
if value.kind() == deserr::ValueKind::Map {
Ok(Self::Pattern(FilterableAttributesPatterns::deserialize_from_value(
value, location,
)?))
} else {
Ok(Self::Field(String::deserialize_from_value(value, location)?))
}
}
}
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Copy, Debug, Deserr, ToSchema)]
#[serde(deny_unknown_fields, rename_all = "camelCase")]
#[deserr(rename_all = camelCase, deny_unknown_fields)]
pub struct FilterFeatures {
#[serde(default = "default_true")]
#[deserr(default = true)]
equality: bool,
#[serde(default)]
#[deserr(default)]
comparison: bool,
}
fn default_true() -> bool {
true
}
impl FilterFeatures {
/// Get the allowed operators for the filter.
pub fn allowed_operators(&self) -> Vec<String> {
if !self.is_filterable() {
return vec![];
}
let mut operators = vec!["OR", "AND", "NOT"];
if self.is_filterable_equality() {
operators.extend_from_slice(&["=", "!=", "IN"]);
}
if self.is_filterable_comparison() {
operators.extend_from_slice(&["<", ">", "<=", ">=", "TO"]);
}
if self.is_filterable_empty() {
operators.push("IS EMPTY");
}
if self.is_filterable_null() {
operators.push("IS NULL");
}
if self.is_filterable_exists() {
operators.push("EXISTS");
}
operators.into_iter().map(String::from).collect()
}
pub fn is_filterable(&self) -> bool {
self.equality || self.comparison
}
pub fn is_filterable_equality(&self) -> bool {
self.equality
}
/// Check if `<`, `>`, `<=`, `>=` or `TO` are allowed
pub fn is_filterable_comparison(&self) -> bool {
self.comparison
}
/// Check if `IS EMPTY` is allowed
pub fn is_filterable_empty(&self) -> bool {
self.is_filterable()
}
/// Check if `IS EXISTS` is allowed
pub fn is_filterable_exists(&self) -> bool {
self.is_filterable()
}
/// Check if `IS NULL` is allowed
pub fn is_filterable_null(&self) -> bool {
self.is_filterable()
}
/// Create a new `FilterFeatures` with the legacy default features.
///
/// This is the default behavior for `FilterableAttributesRule::Field`.
/// This will set the equality and comparison to true.
pub fn legacy_default() -> Self {
Self { equality: true, comparison: true }
}
/// Create a new `FilterFeatures` with no features.
pub fn no_features() -> Self {
Self { equality: false, comparison: false }
}
}
impl Default for FilterFeatures {
fn default() -> Self {
Self { equality: true, comparison: false }
}
}
/// Match a field against a set of filterable attributes rules.
///
/// This function will return the set of patterns that match the given filter.
///
/// # Arguments
///
/// * `filterable_attributes` - The set of filterable attributes rules to match against.
/// * `filter` - The filter function to apply to the filterable attributes rules.
pub fn filtered_matching_patterns<'patterns>(
filterable_attributes: &'patterns [FilterableAttributesRule],
filter: &impl Fn(FilterableAttributesFeatures) -> bool,
) -> BTreeSet<&'patterns str> {
let mut result = BTreeSet::new();
for rule in filterable_attributes {
if filter(rule.features()) {
match rule {
FilterableAttributesRule::Field(field) => {
result.insert(field.as_str());
}
FilterableAttributesRule::Pattern(patterns) => {
patterns.attribute_patterns.patterns.iter().for_each(|pattern| {
result.insert(pattern);
});
}
}
}
}
result
}
/// Match a field against a set of filterable attributes rules.
///
/// This function will return the features that match the given field name.
///
/// # Arguments
///
/// * `field_name` - The field name to match against.
/// * `filterable_attributes` - The set of filterable attributes rules to match against.
///
/// # Returns
///
/// * `Some((rule_index, features))` - The features of the matching rule and the index of the rule in the `filterable_attributes` array.
/// * `None` - No matching rule was found.
pub fn matching_features(
field_name: &str,
filterable_attributes: &[FilterableAttributesRule],
) -> Option<(usize, FilterableAttributesFeatures)> {
for (id, filterable_attribute) in filterable_attributes.iter().enumerate() {
if filterable_attribute.match_str(field_name) == PatternMatch::Match {
return Some((id, filterable_attribute.features()));
}
}
None
}
/// Match a field against a set of filterable, facet searchable fields, distinct field, sortable fields, and asc_desc fields.
pub fn match_faceted_field(
field_name: &str,
filterable_fields: &[FilterableAttributesRule],
sortable_fields: &HashSet<String>,
asc_desc_fields: &HashSet<String>,
distinct_field: &Option<String>,
) -> PatternMatch {
// Check if the field matches any filterable or facet searchable field
let mut selection = match_pattern_by_features(field_name, filterable_fields, &|features| {
features.is_facet_searchable() || features.is_filterable()
});
// If the field matches the pattern, return Match
if selection == PatternMatch::Match {
return selection;
}
match match_distinct_field(distinct_field.as_deref(), field_name) {
PatternMatch::Match => return PatternMatch::Match,
PatternMatch::Parent => selection = PatternMatch::Parent,
PatternMatch::NoMatch => (),
}
// Otherwise, check if the field matches any sortable/asc_desc field
for pattern in sortable_fields.iter().chain(asc_desc_fields.iter()) {
match match_field_legacy(pattern, field_name) {
PatternMatch::Match => return PatternMatch::Match,
PatternMatch::Parent => selection = PatternMatch::Parent,
PatternMatch::NoMatch => (),
}
}
selection
}
fn match_pattern_by_features(
field_name: &str,
filterable_attributes: &[FilterableAttributesRule],
filter: &impl Fn(FilterableAttributesFeatures) -> bool,
) -> PatternMatch {
let mut selection = PatternMatch::NoMatch;
// `can_match` becomes false if the field name matches (PatternMatch::Match) any pattern that is not facet searchable or filterable,
// this ensures that the field doesn't match a pattern with a lower priority, however it can still match a pattern for a nested field as a parent (PatternMatch::Parent).
// See the test `search::filters::test_filterable_attributes_priority` for more details.
let mut can_match = true;
// Check if the field name matches any pattern that is facet searchable or filterable
for pattern in filterable_attributes {
match pattern.match_str(field_name) {
PatternMatch::Match => {
let features = pattern.features();
if filter(features) && can_match {
return PatternMatch::Match;
} else {
can_match = false;
}
}
PatternMatch::Parent => {
let features = pattern.features();
if filter(features) {
selection = PatternMatch::Parent;
}
}
PatternMatch::NoMatch => (),
}
}
selection
}

View File

@ -1,6 +1,5 @@
use std::borrow::Cow;
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
use std::convert::TryInto;
use std::fs::File;
use std::path::Path;
@ -10,10 +9,11 @@ use roaring::RoaringBitmap;
use rstar::RTree;
use serde::{Deserialize, Serialize};
use crate::constants::{self, RESERVED_VECTORS_FIELD_NAME};
use crate::constants::{self, RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME};
use crate::database_stats::DatabaseStats;
use crate::documents::PrimaryKey;
use crate::error::{InternalError, UserError};
use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
use crate::fields_ids_map::FieldsIdsMap;
use crate::heed_codec::facet::{
FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec,
@ -27,8 +27,9 @@ use crate::vector::{ArroyStats, ArroyWrapper, Embedding, EmbeddingConfig};
use crate::{
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec,
FieldidsWeightsMap, GeoPoint, LocalizedAttributesRule, ObkvCodec, Result, RoaringBitmapCodec,
RoaringBitmapLenCodec, Search, U8StrStrCodec, Weight, BEU16, BEU32, BEU64,
FieldidsWeightsMap, FilterableAttributesRule, GeoPoint, LocalizedAttributesRule, ObkvCodec,
Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, Weight, BEU16, BEU32,
BEU64,
};
pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5;
@ -513,6 +514,16 @@ impl Index {
.unwrap_or_default())
}
/// Returns the fields ids map with metadata.
///
/// This structure is not yet stored in the index, and is generated on the fly.
pub fn fields_ids_map_with_metadata(&self, rtxn: &RoTxn<'_>) -> Result<FieldIdMapWithMetadata> {
Ok(FieldIdMapWithMetadata::new(
self.fields_ids_map(rtxn)?,
MetadataBuilder::from_index(self, rtxn)?,
))
}
/* fieldids weights map */
// This maps the fields ids to their weights.
// Their weights is defined by the ordering of the searchable attributes.
@ -548,6 +559,17 @@ impl Index {
self.main.remap_key_type::<Str>().delete(wtxn, main_key::FIELDIDS_WEIGHTS_MAP_KEY)
}
pub fn max_searchable_attribute_weight(&self, rtxn: &RoTxn<'_>) -> Result<Option<Weight>> {
let user_defined_searchable_fields = self.user_defined_searchable_fields(rtxn)?;
if let Some(user_defined_searchable_fields) = user_defined_searchable_fields {
if !user_defined_searchable_fields.contains(&"*") {
return Ok(Some(user_defined_searchable_fields.len().saturating_sub(1) as Weight));
}
}
Ok(None)
}
pub fn searchable_fields_and_weights<'a>(
&self,
rtxn: &'a RoTxn<'a>,
@ -738,8 +760,7 @@ impl Index {
&self,
wtxn: &mut RwTxn<'_>,
user_fields: &[&str],
non_searchable_fields_ids: &[FieldId],
fields_ids_map: &FieldsIdsMap,
fields_ids_map: &FieldIdMapWithMetadata,
) -> Result<()> {
// We can write the user defined searchable fields as-is.
self.put_user_defined_searchable_fields(wtxn, user_fields)?;
@ -747,29 +768,17 @@ impl Index {
let mut weights = FieldidsWeightsMap::default();
// Now we generate the real searchable fields:
// 1. Take the user defined searchable fields as-is to keep the priority defined by the attributes criterion.
// 2. Iterate over the user defined searchable fields.
// 3. If a user defined field is a subset of a field defined in the fields_ids_map
// (ie doggo.name is a subset of doggo) right after doggo and with the same weight.
let mut real_fields = Vec::new();
for (id, field_from_map) in fields_ids_map.iter() {
for (weight, user_field) in user_fields.iter().enumerate() {
if crate::is_faceted_by(field_from_map, user_field)
&& !real_fields.contains(&field_from_map)
&& !non_searchable_fields_ids.contains(&id)
{
real_fields.push(field_from_map);
let weight: u16 =
weight.try_into().map_err(|_| UserError::AttributeLimitReached)?;
weights.insert(id, weight);
}
for (id, field_from_map, metadata) in fields_ids_map.iter() {
if let Some(weight) = metadata.searchable_weight() {
real_fields.push(field_from_map);
weights.insert(id, weight);
}
}
self.put_searchable_fields(wtxn, &real_fields)?;
self.put_fieldids_weights_map(wtxn, &weights)?;
Ok(())
}
@ -876,26 +885,32 @@ impl Index {
/* filterable fields */
/// Writes the filterable fields names in the database.
pub(crate) fn put_filterable_fields(
/// Writes the filterable attributes rules in the database.
pub(crate) fn put_filterable_attributes_rules(
&self,
wtxn: &mut RwTxn<'_>,
fields: &HashSet<String>,
fields: &[FilterableAttributesRule],
) -> heed::Result<()> {
self.main.remap_types::<Str, SerdeJson<_>>().put(
wtxn,
main_key::FILTERABLE_FIELDS_KEY,
fields,
&fields,
)
}
/// Deletes the filterable fields ids in the database.
pub(crate) fn delete_filterable_fields(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
/// Deletes the filterable attributes rules in the database.
pub(crate) fn delete_filterable_attributes_rules(
&self,
wtxn: &mut RwTxn<'_>,
) -> heed::Result<bool> {
self.main.remap_key_type::<Str>().delete(wtxn, main_key::FILTERABLE_FIELDS_KEY)
}
/// Returns the filterable fields names.
pub fn filterable_fields(&self, rtxn: &RoTxn<'_>) -> heed::Result<HashSet<String>> {
/// Returns the filterable attributes rules.
pub fn filterable_attributes_rules(
&self,
rtxn: &RoTxn<'_>,
) -> heed::Result<Vec<FilterableAttributesRule>> {
Ok(self
.main
.remap_types::<Str, SerdeJson<_>>()
@ -903,21 +918,6 @@ impl Index {
.unwrap_or_default())
}
/// Identical to `filterable_fields`, but returns ids instead.
pub fn filterable_fields_ids(&self, rtxn: &RoTxn<'_>) -> Result<HashSet<FieldId>> {
let fields = self.filterable_fields(rtxn)?;
let fields_ids_map = self.fields_ids_map(rtxn)?;
let mut fields_ids = HashSet::new();
for name in fields {
if let Some(field_id) = fields_ids_map.id(&name) {
fields_ids.insert(field_id);
}
}
Ok(fields_ids)
}
/* sortable fields */
/// Writes the sortable fields names in the database.
@ -954,83 +954,37 @@ impl Index {
Ok(fields.into_iter().filter_map(|name| fields_ids_map.id(&name)).collect())
}
/* faceted fields */
/// Writes the faceted fields in the database.
pub(crate) fn put_faceted_fields(
&self,
wtxn: &mut RwTxn<'_>,
fields: &HashSet<String>,
) -> heed::Result<()> {
self.main.remap_types::<Str, SerdeJson<_>>().put(
wtxn,
main_key::HIDDEN_FACETED_FIELDS_KEY,
fields,
)
/// Returns true if the geo feature is enabled.
pub fn is_geo_enabled(&self, rtxn: &RoTxn<'_>) -> Result<bool> {
let geo_filter = self.is_geo_filtering_enabled(rtxn)?;
let geo_sortable = self.is_geo_sorting_enabled(rtxn)?;
Ok(geo_filter || geo_sortable)
}
/// Returns the faceted fields names.
pub fn faceted_fields(&self, rtxn: &RoTxn<'_>) -> heed::Result<HashSet<String>> {
Ok(self
.main
.remap_types::<Str, SerdeJson<_>>()
.get(rtxn, main_key::HIDDEN_FACETED_FIELDS_KEY)?
.unwrap_or_default())
/// Returns true if the geo sorting feature is enabled.
pub fn is_geo_sorting_enabled(&self, rtxn: &RoTxn<'_>) -> Result<bool> {
let geo_sortable = self.sortable_fields(rtxn)?.contains(RESERVED_GEO_FIELD_NAME);
Ok(geo_sortable)
}
/// Identical to `faceted_fields`, but returns ids instead.
pub fn faceted_fields_ids(&self, rtxn: &RoTxn<'_>) -> Result<HashSet<FieldId>> {
let fields = self.faceted_fields(rtxn)?;
let fields_ids_map = self.fields_ids_map(rtxn)?;
let mut fields_ids = HashSet::new();
for name in fields {
if let Some(field_id) = fields_ids_map.id(&name) {
fields_ids.insert(field_id);
}
}
Ok(fields_ids)
/// Returns true if the geo filtering feature is enabled.
pub fn is_geo_filtering_enabled(&self, rtxn: &RoTxn<'_>) -> Result<bool> {
let geo_filter =
self.filterable_attributes_rules(rtxn)?.iter().any(|field| field.has_geo());
Ok(geo_filter)
}
/* faceted documents ids */
/// Returns the user defined faceted fields names.
///
/// The user faceted fields are the union of all the filterable, sortable, distinct, and Asc/Desc fields.
pub fn user_defined_faceted_fields(&self, rtxn: &RoTxn<'_>) -> Result<HashSet<String>> {
let filterable_fields = self.filterable_fields(rtxn)?;
let sortable_fields = self.sortable_fields(rtxn)?;
let distinct_field = self.distinct_field(rtxn)?;
let asc_desc_fields =
self.criteria(rtxn)?.into_iter().filter_map(|criterion| match criterion {
pub fn asc_desc_fields(&self, rtxn: &RoTxn<'_>) -> Result<HashSet<String>> {
let asc_desc_fields = self
.criteria(rtxn)?
.into_iter()
.filter_map(|criterion| match criterion {
Criterion::Asc(field) | Criterion::Desc(field) => Some(field),
_otherwise => None,
});
})
.collect();
let mut faceted_fields = filterable_fields;
faceted_fields.extend(sortable_fields);
faceted_fields.extend(asc_desc_fields);
if let Some(field) = distinct_field {
faceted_fields.insert(field.to_owned());
}
Ok(faceted_fields)
}
/// Identical to `user_defined_faceted_fields`, but returns ids instead.
pub fn user_defined_faceted_fields_ids(&self, rtxn: &RoTxn<'_>) -> Result<HashSet<FieldId>> {
let fields = self.user_defined_faceted_fields(rtxn)?;
let fields_ids_map = self.fields_ids_map(rtxn)?;
let mut fields_ids = HashSet::new();
for name in fields {
if let Some(field_id) = fields_ids_map.id(&name) {
fields_ids.insert(field_id);
}
}
Ok(fields_ids)
Ok(asc_desc_fields)
}
/* faceted documents ids */
@ -1833,7 +1787,7 @@ pub(crate) mod tests {
use big_s::S;
use bumpalo::Bump;
use heed::{EnvOpenOptions, RwTxn};
use maplit::{btreemap, hashset};
use maplit::btreemap;
use memmap2::Mmap;
use tempfile::TempDir;
@ -1849,7 +1803,8 @@ pub(crate) mod tests {
use crate::vector::settings::{EmbedderSource, EmbeddingSettings};
use crate::vector::EmbeddingConfigs;
use crate::{
db_snap, obkv_to_json, Filter, Index, Search, SearchResult, ThreadPoolNoAbortBuilder,
db_snap, obkv_to_json, Filter, FilterableAttributesRule, Index, Search, SearchResult,
ThreadPoolNoAbortBuilder,
};
pub(crate) struct TempIndex {
@ -2256,7 +2211,7 @@ pub(crate) mod tests {
let rtxn = index.read_txn().unwrap();
let real = index.searchable_fields(&rtxn).unwrap();
assert_eq!(real, &["doggo", "name"]);
assert!(real.is_empty());
let user_defined = index.user_defined_searchable_fields(&rtxn).unwrap().unwrap();
assert_eq!(user_defined, &["doggo", "name"]);
@ -2284,7 +2239,9 @@ pub(crate) mod tests {
index
.update_settings(|settings| {
settings.set_filterable_fields(hashset! { S(RESERVED_GEO_FIELD_NAME) });
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(
RESERVED_GEO_FIELD_NAME.to_string(),
)]);
})
.unwrap();
index
@ -2392,7 +2349,9 @@ pub(crate) mod tests {
index
.update_settings(|settings| {
settings.set_filterable_fields(hashset! { S("doggo") });
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(
"doggo".to_string(),
)]);
})
.unwrap();
index
@ -2429,15 +2388,14 @@ pub(crate) mod tests {
#[test]
fn replace_documents_external_ids_and_soft_deletion_check() {
use big_s::S;
use maplit::hashset;
let index = TempIndex::new();
index
.update_settings(|settings| {
settings.set_primary_key("id".to_owned());
settings.set_filterable_fields(hashset! { S("doggo") });
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(
"doggo".to_string(),
)]);
})
.unwrap();
@ -2970,8 +2928,9 @@ pub(crate) mod tests {
index
.update_settings(|settings| {
settings.set_primary_key("id".to_string());
settings
.set_filterable_fields(HashSet::from([RESERVED_GEO_FIELD_NAME.to_string()]));
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(
RESERVED_GEO_FIELD_NAME.to_string(),
)]);
})
.unwrap();
@ -3005,8 +2964,9 @@ pub(crate) mod tests {
index
.update_settings(|settings| {
settings.set_primary_key("id".to_string());
settings
.set_filterable_fields(HashSet::from([RESERVED_GEO_FIELD_NAME.to_string()]));
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(
RESERVED_GEO_FIELD_NAME.to_string(),
)]);
})
.unwrap();
@ -3039,7 +2999,9 @@ pub(crate) mod tests {
index
.update_settings(|settings| {
settings.set_searchable_fields(vec![S("name")]);
settings.set_filterable_fields(HashSet::from([S("age")]));
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(
"age".to_string(),
)]);
})
.unwrap();
@ -3047,35 +3009,37 @@ pub(crate) mod tests {
.add_documents(documents!({ "id": 1, "name": "Many", "age": 28, "realName": "Maxime" }))
.unwrap();
db_snap!(index, fields_ids_map, @r###"
0 name |
1 id |
0 id |
1 name |
2 age |
3 realName |
"###);
db_snap!(index, searchable_fields, @r###"["name"]"###);
db_snap!(index, fieldids_weights_map, @r###"
fid weight
0 0 |
1 0 |
"###);
index
.update_settings(|settings| {
settings.set_searchable_fields(vec![S("name"), S("realName")]);
settings.set_filterable_fields(HashSet::from([S("age")]));
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(
"age".to_string(),
)]);
})
.unwrap();
// The order of the field id map shouldn't change
db_snap!(index, fields_ids_map, @r###"
0 name |
1 id |
0 id |
1 name |
2 age |
3 realName |
"###);
db_snap!(index, searchable_fields, @r###"["name", "realName"]"###);
db_snap!(index, fieldids_weights_map, @r###"
fid weight
0 0 |
1 0 |
3 1 |
"###);
}
@ -3160,14 +3124,16 @@ pub(crate) mod tests {
index
.update_settings(|settings| {
settings.set_searchable_fields(vec![S("_vectors"), S("_vectors.doggo")]);
settings.set_filterable_fields(hashset![S("_vectors"), S("_vectors.doggo")]);
settings.set_filterable_fields(vec![
FilterableAttributesRule::Field("_vectors".to_string()),
FilterableAttributesRule::Field("_vectors.doggo".to_string()),
]);
})
.unwrap();
db_snap!(index, fields_ids_map, @r###"
0 id |
1 _vectors |
2 _vectors.doggo |
"###);
db_snap!(index, searchable_fields, @"[]");
db_snap!(index, fieldids_weights_map, @r###"
@ -3200,7 +3166,6 @@ pub(crate) mod tests {
db_snap!(index, fields_ids_map, @r###"
0 id |
1 _vectors |
2 _vectors.doggo |
"###);
db_snap!(index, searchable_fields, @"[]");
db_snap!(index, fieldids_weights_map, @r###"

View File

@ -9,12 +9,14 @@ pub static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
pub mod documents;
mod asc_desc;
mod attribute_patterns;
mod criterion;
pub mod database_stats;
mod error;
mod external_documents_ids;
pub mod facet;
mod fields_ids_map;
mod filterable_attributes_rules;
pub mod heed_codec;
pub mod index;
mod localized_attributes_rules;
@ -52,6 +54,8 @@ pub use thread_pool_no_abort::{PanicCatched, ThreadPoolNoAbort, ThreadPoolNoAbor
pub use {charabia as tokenizer, heed, rhai};
pub use self::asc_desc::{AscDesc, AscDescError, Member, SortError};
pub use self::attribute_patterns::AttributePatterns;
pub use self::attribute_patterns::PatternMatch;
pub use self::criterion::{default_criteria, Criterion, CriterionError};
pub use self::error::{
Error, FieldIdMapMissingEntry, InternalError, SerializationError, UserError,
@ -59,6 +63,10 @@ pub use self::error::{
pub use self::external_documents_ids::ExternalDocumentsIds;
pub use self::fieldids_weights_map::FieldidsWeightsMap;
pub use self::fields_ids_map::{FieldsIdsMap, GlobalFieldsIdsMap};
pub use self::filterable_attributes_rules::{
FilterFeatures, FilterableAttributesFeatures, FilterableAttributesPatterns,
FilterableAttributesRule,
};
pub use self::heed_codec::{
BEU16StrCodec, BEU32StrCodec, BoRoaringBitmapCodec, BoRoaringBitmapLenCodec,
CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, FieldIdWordCountCodec, ObkvCodec,
@ -67,7 +75,6 @@ pub use self::heed_codec::{
};
pub use self::index::Index;
pub use self::localized_attributes_rules::LocalizedAttributesRule;
use self::localized_attributes_rules::LocalizedFieldIds;
pub use self::search::facet::{FacetValueHit, SearchForFacetValues};
pub use self::search::similar::Similar;
pub use self::search::{

View File

@ -4,8 +4,9 @@ use charabia::Language;
use serde::{Deserialize, Serialize};
use utoipa::ToSchema;
use crate::attribute_patterns::PatternMatch;
use crate::fields_ids_map::FieldsIdsMap;
use crate::FieldId;
use crate::{AttributePatterns, FieldId};
/// A rule that defines which locales are supported for a given attribute.
///
@ -17,18 +18,18 @@ use crate::FieldId;
/// The pattern `*attribute_name*` matches any attribute name that contains `attribute_name`.
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, ToSchema)]
pub struct LocalizedAttributesRule {
pub attribute_patterns: Vec<String>,
pub attribute_patterns: AttributePatterns,
#[schema(value_type = Vec<String>)]
pub locales: Vec<Language>,
}
impl LocalizedAttributesRule {
pub fn new(attribute_patterns: Vec<String>, locales: Vec<Language>) -> Self {
Self { attribute_patterns, locales }
Self { attribute_patterns: AttributePatterns::from(attribute_patterns), locales }
}
pub fn match_str(&self, str: &str) -> bool {
self.attribute_patterns.iter().any(|pattern| match_pattern(pattern.as_str(), str))
pub fn match_str(&self, str: &str) -> PatternMatch {
self.attribute_patterns.match_str(str)
}
pub fn locales(&self) -> &[Language] {
@ -36,20 +37,6 @@ impl LocalizedAttributesRule {
}
}
fn match_pattern(pattern: &str, str: &str) -> bool {
if pattern == "*" {
true
} else if pattern.starts_with('*') && pattern.ends_with('*') {
str.contains(&pattern[1..pattern.len() - 1])
} else if let Some(pattern) = pattern.strip_prefix('*') {
str.ends_with(pattern)
} else if let Some(pattern) = pattern.strip_suffix('*') {
str.starts_with(pattern)
} else {
pattern == str
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct LocalizedFieldIds {
field_id_to_locales: HashMap<FieldId, Vec<Language>>,
@ -65,13 +52,13 @@ impl LocalizedFieldIds {
if let Some(rules) = rules {
let fields = fields_ids.filter_map(|field_id| {
fields_ids_map.name(field_id).map(|field_name| (field_id, field_name))
fields_ids_map.name(field_id).map(|field_name: &str| (field_id, field_name))
});
for (field_id, field_name) in fields {
let mut locales = Vec::new();
for rule in rules {
if rule.match_str(field_name) {
if rule.match_str(field_name) == PatternMatch::Match {
locales.extend(rule.locales.iter());
// Take the first rule that matches
break;
@ -89,10 +76,6 @@ impl LocalizedFieldIds {
Self { field_id_to_locales }
}
pub fn locales(&self, fields_id: FieldId) -> Option<&[Language]> {
self.field_id_to_locales.get(&fields_id).map(Vec::as_slice)
}
pub fn all_locales(&self) -> Vec<Language> {
let mut locales = Vec::new();
for field_locales in self.field_id_to_locales.values() {
@ -108,24 +91,3 @@ impl LocalizedFieldIds {
locales
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_match_pattern() {
assert!(match_pattern("*", "test"));
assert!(match_pattern("test*", "test"));
assert!(match_pattern("test*", "testa"));
assert!(match_pattern("*test", "test"));
assert!(match_pattern("*test", "atest"));
assert!(match_pattern("*test*", "test"));
assert!(match_pattern("*test*", "atesta"));
assert!(match_pattern("*test*", "atest"));
assert!(match_pattern("*test*", "testa"));
assert!(!match_pattern("test*test", "test"));
assert!(!match_pattern("*test", "testa"));
assert!(!match_pattern("test*", "atest"));
}
}

View File

@ -7,14 +7,14 @@ use liquid::model::{
};
use liquid::{ObjectView, ValueView};
use super::{FieldMetadata, FieldsIdsMapWithMetadata};
use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, Metadata};
use crate::GlobalFieldsIdsMap;
#[derive(Debug, Clone, Copy)]
pub struct FieldValue<'a, D: ObjectView> {
name: &'a str,
document: &'a D,
metadata: FieldMetadata,
metadata: Metadata,
}
impl<'a, D: ObjectView> ValueView for FieldValue<'a, D> {
@ -67,7 +67,10 @@ impl<'a, D: ObjectView> FieldValue<'a, D> {
}
pub fn is_searchable(&self) -> &bool {
&self.metadata.searchable
match self.metadata.is_searchable() {
true => &true,
false => &false,
}
}
pub fn is_empty(&self) -> bool {
@ -125,15 +128,11 @@ pub struct BorrowedFields<'a, 'map, D: ObjectView> {
}
impl<'a, D: ObjectView> OwnedFields<'a, D> {
pub fn new(document: &'a D, field_id_map: &'a FieldsIdsMapWithMetadata<'a>) -> Self {
pub fn new(document: &'a D, field_id_map: &'a FieldIdMapWithMetadata) -> Self {
Self(
std::iter::repeat(document)
.zip(field_id_map.iter())
.map(|(document, (fid, name))| FieldValue {
document,
name,
metadata: field_id_map.metadata(fid).unwrap_or_default(),
})
.map(|(document, (_fid, name, metadata))| FieldValue { document, name, metadata })
.collect(),
)
}
@ -187,7 +186,7 @@ impl<'a, 'map, D: ObjectView> ArrayView for BorrowedFields<'a, 'map, D> {
let fv = self.doc_alloc.alloc(FieldValue {
name: self.doc_alloc.alloc_str(&k),
document: self.document,
metadata: FieldMetadata { searchable: metadata.searchable },
metadata,
});
fv as _
}))
@ -207,7 +206,7 @@ impl<'a, 'map, D: ObjectView> ArrayView for BorrowedFields<'a, 'map, D> {
let fv = self.doc_alloc.alloc(FieldValue {
name: self.doc_alloc.alloc_str(&key),
document: self.document,
metadata: FieldMetadata { searchable: metadata.searchable },
metadata,
});
Some(fv as _)
}

View File

@ -5,11 +5,9 @@ mod fields;
mod template_checker;
use std::cell::RefCell;
use std::collections::BTreeMap;
use std::convert::TryFrom;
use std::fmt::Debug;
use std::num::NonZeroUsize;
use std::ops::Deref;
use bumpalo::Bump;
use document::ParseableDocument;
@ -18,8 +16,9 @@ use fields::{BorrowedFields, OwnedFields};
use self::context::Context;
use self::document::Document;
use crate::fields_ids_map::metadata::FieldIdMapWithMetadata;
use crate::update::del_add::DelAdd;
use crate::{FieldId, FieldsIdsMap, GlobalFieldsIdsMap};
use crate::GlobalFieldsIdsMap;
pub struct Prompt {
template: liquid::Template,
@ -145,9 +144,9 @@ impl Prompt {
&self,
document: &obkv::KvReaderU16,
side: DelAdd,
field_id_map: &FieldsIdsMapWithMetadata,
field_id_map: &FieldIdMapWithMetadata,
) -> Result<String, RenderPromptError> {
let document = Document::new(document, side, field_id_map);
let document = Document::new(document, side, field_id_map.as_fields_ids_map());
let fields = OwnedFields::new(&document, field_id_map);
let context = Context::new(&document, &fields);
@ -172,40 +171,6 @@ fn truncate(s: &mut String, max_bytes: usize) {
}
}
pub struct FieldsIdsMapWithMetadata<'a> {
fields_ids_map: &'a FieldsIdsMap,
metadata: BTreeMap<FieldId, FieldMetadata>,
}
impl<'a> FieldsIdsMapWithMetadata<'a> {
pub fn new(fields_ids_map: &'a FieldsIdsMap, searchable_fields_ids: &'_ [FieldId]) -> Self {
let mut metadata: BTreeMap<FieldId, FieldMetadata> =
fields_ids_map.ids().map(|id| (id, Default::default())).collect();
for searchable_field_id in searchable_fields_ids {
let Some(metadata) = metadata.get_mut(searchable_field_id) else { continue };
metadata.searchable = true;
}
Self { fields_ids_map, metadata }
}
pub fn metadata(&self, field_id: FieldId) -> Option<FieldMetadata> {
self.metadata.get(&field_id).copied()
}
}
impl<'a> Deref for FieldsIdsMapWithMetadata<'a> {
type Target = FieldsIdsMap;
fn deref(&self) -> &Self::Target {
self.fields_ids_map
}
}
#[derive(Debug, Default, Clone, Copy)]
pub struct FieldMetadata {
pub searchable: bool,
}
#[cfg(test)]
mod test {
use super::Prompt;

View File

@ -1,4 +1,4 @@
use std::collections::{BTreeMap, HashMap, HashSet};
use std::collections::{BTreeMap, BTreeSet, HashMap};
use std::fmt::Display;
use std::ops::ControlFlow;
use std::{fmt, mem};
@ -9,8 +9,9 @@ use indexmap::IndexMap;
use roaring::RoaringBitmap;
use serde::{Deserialize, Serialize};
use crate::error::UserError;
use crate::attribute_patterns::match_field_legacy;
use crate::facet::FacetType;
use crate::filterable_attributes_rules::{filtered_matching_patterns, matching_features};
use crate::heed_codec::facet::{
FacetGroupKeyCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, OrderedF64Codec,
};
@ -18,7 +19,7 @@ use crate::heed_codec::{BytesRefCodec, StrRefCodec};
use crate::search::facet::facet_distribution_iter::{
count_iterate_over_facet_distribution, lexicographically_iterate_over_facet_distribution,
};
use crate::{FieldId, Index, Result};
use crate::{Error, FieldId, FilterableAttributesRule, Index, PatternMatch, Result, UserError};
/// The default number of values by facets that will
/// be fetched from the key-value store.
@ -287,37 +288,19 @@ impl<'a> FacetDistribution<'a> {
}
pub fn compute_stats(&self) -> Result<BTreeMap<String, (f64, f64)>> {
let fields_ids_map = self.index.fields_ids_map(self.rtxn)?;
let filterable_fields = self.index.filterable_fields(self.rtxn)?;
let candidates = if let Some(candidates) = self.candidates.clone() {
candidates
} else {
return Ok(Default::default());
};
let fields = match &self.facets {
Some(facets) => {
let invalid_fields: HashSet<_> = facets
.iter()
.map(|(name, _)| name)
.filter(|facet| !crate::is_faceted(facet, &filterable_fields))
.collect();
if !invalid_fields.is_empty() {
return Err(UserError::InvalidFacetsDistribution {
invalid_facets_name: invalid_fields.into_iter().cloned().collect(),
valid_facets_name: filterable_fields.into_iter().collect(),
}
.into());
} else {
facets.iter().map(|(name, _)| name).cloned().collect()
}
}
None => filterable_fields,
};
let fields_ids_map = self.index.fields_ids_map(self.rtxn)?;
let filterable_attributes_rules = self.index.filterable_attributes_rules(self.rtxn)?;
self.check_faceted_fields(&filterable_attributes_rules)?;
let mut distribution = BTreeMap::new();
for (fid, name) in fields_ids_map.iter() {
if crate::is_faceted(name, &fields) {
if self.select_field(name, &filterable_attributes_rules) {
let min_value = if let Some(min_value) = crate::search::facet::facet_min_value(
self.index,
self.rtxn,
@ -348,31 +331,12 @@ impl<'a> FacetDistribution<'a> {
pub fn execute(&self) -> Result<BTreeMap<String, IndexMap<String, u64>>> {
let fields_ids_map = self.index.fields_ids_map(self.rtxn)?;
let filterable_fields = self.index.filterable_fields(self.rtxn)?;
let fields = match self.facets {
Some(ref facets) => {
let invalid_fields: HashSet<_> = facets
.iter()
.map(|(name, _)| name)
.filter(|facet| !crate::is_faceted(facet, &filterable_fields))
.collect();
if !invalid_fields.is_empty() {
return Err(UserError::InvalidFacetsDistribution {
invalid_facets_name: invalid_fields.into_iter().cloned().collect(),
valid_facets_name: filterable_fields.into_iter().collect(),
}
.into());
} else {
facets.iter().map(|(name, _)| name).cloned().collect()
}
}
None => filterable_fields,
};
let filterable_attributes_rules = self.index.filterable_attributes_rules(self.rtxn)?;
self.check_faceted_fields(&filterable_attributes_rules)?;
let mut distribution = BTreeMap::new();
for (fid, name) in fields_ids_map.iter() {
if crate::is_faceted(name, &fields) {
if self.select_field(name, &filterable_attributes_rules) {
let order_by = self
.facets
.as_ref()
@ -385,6 +349,62 @@ impl<'a> FacetDistribution<'a> {
Ok(distribution)
}
/// Select a field if it is filterable and in the facets.
fn select_field(
&self,
name: &str,
filterable_attributes_rules: &[FilterableAttributesRule],
) -> bool {
// If the field is not filterable, we don't want to compute the facet distribution.
if !matching_features(name, filterable_attributes_rules)
.map_or(false, |(_, features)| features.is_filterable())
{
return false;
}
match &self.facets {
Some(facets) => {
// The list of facets provided by the user is a legacy pattern ("dog.age" must be selected with "dog").
facets.keys().any(|key| match_field_legacy(key, name) == PatternMatch::Match)
}
None => true,
}
}
/// Check if the fields in the facets are valid filterable fields.
fn check_faceted_fields(
&self,
filterable_attributes_rules: &[FilterableAttributesRule],
) -> Result<()> {
let mut invalid_facets = BTreeSet::new();
if let Some(facets) = &self.facets {
for field in facets.keys() {
let is_valid_filterable_field =
matching_features(field, filterable_attributes_rules)
.map_or(false, |(_, features)| features.is_filterable());
if !is_valid_filterable_field {
invalid_facets.insert(field.to_string());
}
}
}
if !invalid_facets.is_empty() {
let valid_patterns =
filtered_matching_patterns(filterable_attributes_rules, &|features| {
features.is_filterable()
})
.into_iter()
.map(String::from)
.collect();
return Err(Error::UserError(UserError::InvalidFacetsDistribution {
invalid_facets_name: invalid_facets,
valid_patterns,
}));
}
Ok(())
}
}
impl fmt::Debug for FacetDistribution<'_> {
@ -412,11 +432,10 @@ mod tests {
use std::iter;
use big_s::S;
use maplit::hashset;
use crate::documents::mmap_from_objects;
use crate::index::tests::TempIndex;
use crate::{milli_snap, FacetDistribution, OrderBy};
use crate::{milli_snap, FacetDistribution, FilterableAttributesRule, OrderBy};
#[test]
fn few_candidates_few_facet_values() {
@ -426,7 +445,9 @@ mod tests {
let index = TempIndex::new();
index
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
.update_settings(|settings| {
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(S("colour"))])
})
.unwrap();
let documents = documents!([
@ -497,7 +518,9 @@ mod tests {
let index = TempIndex::new_with_map_size(4096 * 10_000);
index
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
.update_settings(|settings| {
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(S("colour"))])
})
.unwrap();
let facet_values = ["Red", "RED", " red ", "Blue", "BLUE"];
@ -582,7 +605,9 @@ mod tests {
let index = TempIndex::new_with_map_size(4096 * 10_000);
index
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
.update_settings(|settings| {
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(S("colour"))])
})
.unwrap();
let facet_values = (0..1000).map(|x| format!("{x:x}")).collect::<Vec<_>>();
@ -641,7 +666,9 @@ mod tests {
let index = TempIndex::new_with_map_size(4096 * 10_000);
index
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
.update_settings(|settings| {
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(S("colour"))])
})
.unwrap();
let facet_values = (0..1000).collect::<Vec<_>>();
@ -692,7 +719,9 @@ mod tests {
let index = TempIndex::new_with_map_size(4096 * 10_000);
index
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
.update_settings(|settings| {
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(S("colour"))])
})
.unwrap();
let facet_values = (0..1000).collect::<Vec<_>>();
@ -743,7 +772,9 @@ mod tests {
let index = TempIndex::new_with_map_size(4096 * 10_000);
index
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
.update_settings(|settings| {
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(S("colour"))])
})
.unwrap();
let facet_values = (0..1000).collect::<Vec<_>>();
@ -794,7 +825,9 @@ mod tests {
let index = TempIndex::new_with_map_size(4096 * 10_000);
index
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
.update_settings(|settings| {
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(S("colour"))])
})
.unwrap();
let facet_values = (0..1000).collect::<Vec<_>>();

View File

@ -1,4 +1,4 @@
use std::collections::HashSet;
use std::collections::BTreeSet;
use std::fmt::{Debug, Display};
use std::ops::Bound::{self, Excluded, Included};
@ -12,12 +12,14 @@ use serde_json::Value;
use super::facet_range_search;
use crate::constants::RESERVED_GEO_FIELD_NAME;
use crate::error::{Error, UserError};
use crate::filterable_attributes_rules::{filtered_matching_patterns, matching_features};
use crate::heed_codec::facet::{
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, OrderedF64Codec,
};
use crate::index::db_name::FACET_ID_STRING_DOCIDS;
use crate::{
distance_between_two_points, lat_lng_to_xyz, FieldId, Index, InternalError, Result,
distance_between_two_points, lat_lng_to_xyz, FieldId, FieldsIdsMap,
FilterableAttributesFeatures, FilterableAttributesRule, Index, InternalError, Result,
SerializationError,
};
@ -60,7 +62,7 @@ impl Display for BadGeoError {
#[derive(Debug)]
enum FilterError<'a> {
AttributeNotFilterable { attribute: &'a str, filterable_fields: HashSet<String> },
AttributeNotFilterable { attribute: &'a str, filterable_patterns: BTreeSet<&'a str> },
ParseGeoError(BadGeoError),
TooDeep,
}
@ -75,14 +77,14 @@ impl<'a> From<BadGeoError> for FilterError<'a> {
impl<'a> Display for FilterError<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::AttributeNotFilterable { attribute, filterable_fields } => {
Self::AttributeNotFilterable { attribute, filterable_patterns } => {
write!(f, "Attribute `{attribute}` is not filterable.")?;
if filterable_fields.is_empty() {
if filterable_patterns.is_empty() {
write!(f, " This index does not have configured filterable attributes.")
} else {
write!(f, " Available filterable attributes are: ")?;
write!(f, " Available filterable attribute patterns are: ")?;
let mut filterables_list =
filterable_fields.iter().map(AsRef::as_ref).collect::<Vec<&str>>();
filterable_patterns.iter().map(AsRef::as_ref).collect::<Vec<&str>>();
filterables_list.sort_unstable();
for (idx, filterable) in filterables_list.iter().enumerate() {
write!(f, "`{filterable}`")?;
@ -230,17 +232,27 @@ impl<'a> Filter<'a> {
impl<'a> Filter<'a> {
pub fn evaluate(&self, rtxn: &heed::RoTxn<'_>, index: &Index) -> Result<RoaringBitmap> {
// to avoid doing this for each recursive call we're going to do it ONCE ahead of time
let filterable_fields = index.filterable_fields(rtxn)?;
let fields_ids_map = index.fields_ids_map(rtxn)?;
let filterable_attributes_rules = index.filterable_attributes_rules(rtxn)?;
for fid in self.condition.fids(MAX_FILTER_DEPTH) {
let attribute = fid.value();
if !crate::is_faceted(attribute, &filterable_fields) {
return Err(fid.as_external_error(FilterError::AttributeNotFilterable {
attribute,
filterable_fields,
}))?;
if matching_features(attribute, &filterable_attributes_rules)
.map_or(false, |(_, features)| features.is_filterable())
{
continue;
}
// If the field is not filterable, return an error
return Err(fid.as_external_error(FilterError::AttributeNotFilterable {
attribute,
filterable_patterns: filtered_matching_patterns(
&filterable_attributes_rules,
&|features| features.is_filterable(),
),
}))?;
}
self.inner_evaluate(rtxn, index, &filterable_fields, None)
self.inner_evaluate(rtxn, index, &fields_ids_map, &filterable_attributes_rules, None)
}
fn evaluate_operator(
@ -249,6 +261,8 @@ impl<'a> Filter<'a> {
field_id: FieldId,
universe: Option<&RoaringBitmap>,
operator: &Condition<'a>,
features: &FilterableAttributesFeatures,
rule_index: usize,
) -> Result<RoaringBitmap> {
let numbers_db = index.facet_id_f64_docids;
let strings_db = index.facet_id_string_docids;
@ -258,6 +272,38 @@ impl<'a> Filter<'a> {
// field id and the level.
let (left, right) = match operator {
// return an error if the filter is not allowed for this field
Condition::GreaterThan(_)
| Condition::GreaterThanOrEqual(_)
| Condition::LowerThan(_)
| Condition::LowerThanOrEqual(_)
| Condition::Between { .. }
if !features.is_filterable_comparison() =>
{
return Err(generate_filter_error(
rtxn, index, field_id, operator, features, rule_index,
));
}
Condition::Empty if !features.is_filterable_empty() => {
return Err(generate_filter_error(
rtxn, index, field_id, operator, features, rule_index,
));
}
Condition::Null if !features.is_filterable_null() => {
return Err(generate_filter_error(
rtxn, index, field_id, operator, features, rule_index,
));
}
Condition::Exists if !features.is_filterable_exists() => {
return Err(generate_filter_error(
rtxn, index, field_id, operator, features, rule_index,
));
}
Condition::Equal(_) | Condition::NotEqual(_) if !features.is_filterable_equality() => {
return Err(generate_filter_error(
rtxn, index, field_id, operator, features, rule_index,
));
}
Condition::GreaterThan(val) => {
(Excluded(val.parse_finite_float()?), Included(f64::MAX))
}
@ -307,7 +353,9 @@ impl<'a> Filter<'a> {
}
Condition::NotEqual(val) => {
let operator = Condition::Equal(val.clone());
let docids = Self::evaluate_operator(rtxn, index, field_id, None, &operator)?;
let docids = Self::evaluate_operator(
rtxn, index, field_id, None, &operator, features, rule_index,
)?;
let all_ids = index.documents_ids(rtxn)?;
return Ok(all_ids - docids);
}
@ -409,7 +457,8 @@ impl<'a> Filter<'a> {
&self,
rtxn: &heed::RoTxn<'_>,
index: &Index,
filterable_fields: &HashSet<String>,
field_ids_map: &FieldsIdsMap,
filterable_attribute_rules: &[FilterableAttributesRule],
universe: Option<&RoaringBitmap>,
) -> Result<RoaringBitmap> {
if universe.map_or(false, |u| u.is_empty()) {
@ -422,7 +471,8 @@ impl<'a> Filter<'a> {
&(f.as_ref().clone()).into(),
rtxn,
index,
filterable_fields,
field_ids_map,
filterable_attribute_rules,
universe,
)?;
match universe {
@ -434,42 +484,49 @@ impl<'a> Filter<'a> {
}
}
FilterCondition::In { fid, els } => {
if crate::is_faceted(fid.value(), filterable_fields) {
let field_ids_map = index.fields_ids_map(rtxn)?;
if let Some(fid) = field_ids_map.id(fid.value()) {
els.iter()
.map(|el| Condition::Equal(el.clone()))
.map(|op| Self::evaluate_operator(rtxn, index, fid, universe, &op))
.union()
} else {
Ok(RoaringBitmap::new())
}
} else {
Err(fid.as_external_error(FilterError::AttributeNotFilterable {
attribute: fid.value(),
filterable_fields: filterable_fields.clone(),
}))?
}
let Some(field_id) = field_ids_map.id(fid.value()) else {
return Ok(RoaringBitmap::new());
};
let Some((rule_index, features)) =
matching_features(fid.value(), filterable_attribute_rules)
else {
return Ok(RoaringBitmap::new());
};
els.iter()
.map(|el| Condition::Equal(el.clone()))
.map(|op| {
Self::evaluate_operator(
rtxn, index, field_id, universe, &op, &features, rule_index,
)
})
.union()
}
FilterCondition::Condition { fid, op } => {
if crate::is_faceted(fid.value(), filterable_fields) {
let field_ids_map = index.fields_ids_map(rtxn)?;
if let Some(fid) = field_ids_map.id(fid.value()) {
Self::evaluate_operator(rtxn, index, fid, universe, op)
} else {
Ok(RoaringBitmap::new())
}
} else {
Err(fid.as_external_error(FilterError::AttributeNotFilterable {
attribute: fid.value(),
filterable_fields: filterable_fields.clone(),
}))?
}
let Some(field_id) = field_ids_map.id(fid.value()) else {
return Ok(RoaringBitmap::new());
};
let Some((rule_index, features)) =
matching_features(fid.value(), filterable_attribute_rules)
else {
return Ok(RoaringBitmap::new());
};
Self::evaluate_operator(rtxn, index, field_id, universe, op, &features, rule_index)
}
FilterCondition::Or(subfilters) => subfilters
.iter()
.cloned()
.map(|f| Self::inner_evaluate(&f.into(), rtxn, index, filterable_fields, universe))
.map(|f| {
Self::inner_evaluate(
&f.into(),
rtxn,
index,
field_ids_map,
filterable_attribute_rules,
universe,
)
})
.union(),
FilterCondition::And(subfilters) => {
let mut subfilters_iter = subfilters.iter();
@ -478,7 +535,8 @@ impl<'a> Filter<'a> {
&(first_subfilter.clone()).into(),
rtxn,
index,
filterable_fields,
field_ids_map,
filterable_attribute_rules,
universe,
)?;
for f in subfilters_iter {
@ -492,7 +550,8 @@ impl<'a> Filter<'a> {
&(f.clone()).into(),
rtxn,
index,
filterable_fields,
field_ids_map,
filterable_attribute_rules,
Some(&bitmap),
)?;
}
@ -502,7 +561,7 @@ impl<'a> Filter<'a> {
}
}
FilterCondition::GeoLowerThan { point, radius } => {
if filterable_fields.contains(RESERVED_GEO_FIELD_NAME) {
if index.is_geo_filtering_enabled(rtxn)? {
let base_point: [f64; 2] =
[point[0].parse_finite_float()?, point[1].parse_finite_float()?];
if !(-90.0..=90.0).contains(&base_point[0]) {
@ -532,12 +591,15 @@ impl<'a> Filter<'a> {
} else {
Err(point[0].as_external_error(FilterError::AttributeNotFilterable {
attribute: RESERVED_GEO_FIELD_NAME,
filterable_fields: filterable_fields.clone(),
filterable_patterns: filtered_matching_patterns(
filterable_attribute_rules,
&|features| features.is_filterable(),
),
}))?
}
}
FilterCondition::GeoBoundingBox { top_right_point, bottom_left_point } => {
if filterable_fields.contains(RESERVED_GEO_FIELD_NAME) {
if index.is_geo_filtering_enabled(rtxn)? {
let top_right: [f64; 2] = [
top_right_point[0].parse_finite_float()?,
top_right_point[1].parse_finite_float()?,
@ -592,7 +654,8 @@ impl<'a> Filter<'a> {
let selected_lat = Filter { condition: condition_lat }.inner_evaluate(
rtxn,
index,
filterable_fields,
field_ids_map,
filterable_attribute_rules,
universe,
)?;
@ -625,7 +688,8 @@ impl<'a> Filter<'a> {
let left = Filter { condition: condition_left }.inner_evaluate(
rtxn,
index,
filterable_fields,
field_ids_map,
filterable_attribute_rules,
universe,
)?;
@ -639,7 +703,8 @@ impl<'a> Filter<'a> {
let right = Filter { condition: condition_right }.inner_evaluate(
rtxn,
index,
filterable_fields,
field_ids_map,
filterable_attribute_rules,
universe,
)?;
@ -655,7 +720,8 @@ impl<'a> Filter<'a> {
Filter { condition: condition_lng }.inner_evaluate(
rtxn,
index,
filterable_fields,
field_ids_map,
filterable_attribute_rules,
universe,
)?
};
@ -665,7 +731,10 @@ impl<'a> Filter<'a> {
Err(top_right_point[0].as_external_error(
FilterError::AttributeNotFilterable {
attribute: RESERVED_GEO_FIELD_NAME,
filterable_fields: filterable_fields.clone(),
filterable_patterns: filtered_matching_patterns(
filterable_attribute_rules,
&|features| features.is_filterable(),
),
},
))?
}
@ -674,6 +743,28 @@ impl<'a> Filter<'a> {
}
}
fn generate_filter_error(
rtxn: &heed::RoTxn<'_>,
index: &Index,
field_id: FieldId,
operator: &Condition<'_>,
features: &FilterableAttributesFeatures,
rule_index: usize,
) -> Error {
match index.fields_ids_map(rtxn) {
Ok(fields_ids_map) => {
let field = fields_ids_map.name(field_id).unwrap_or_default();
Error::UserError(UserError::FilterOperatorNotAllowed {
field: field.to_string(),
allowed_operators: features.allowed_filter_operators(),
operator: operator.operator().to_string(),
rule_index,
})
}
Err(e) => e.into(),
}
}
impl<'a> From<FilterCondition<'a>> for Filter<'a> {
fn from(fc: FilterCondition<'a>) -> Self {
Self { condition: fc }
@ -687,12 +778,12 @@ mod tests {
use big_s::S;
use either::Either;
use maplit::hashset;
use meili_snap::snapshot;
use roaring::RoaringBitmap;
use crate::constants::RESERVED_GEO_FIELD_NAME;
use crate::index::tests::TempIndex;
use crate::Filter;
use crate::{Filter, FilterableAttributesRule};
#[test]
fn empty_db() {
@ -700,7 +791,9 @@ mod tests {
//Set the filterable fields to be the channel.
index
.update_settings(|settings| {
settings.set_filterable_fields(hashset! { S("PrIcE") });
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(
"PrIcE".to_string(),
)]);
})
.unwrap();
@ -784,27 +877,32 @@ mod tests {
let rtxn = index.read_txn().unwrap();
let filter = Filter::from_str("_geoRadius(42, 150, 10)").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().starts_with(
"Attribute `_geo` is not filterable. This index does not have configured filterable attributes."
));
snapshot!(error.to_string(), @r###"
Attribute `_geo` is not filterable. This index does not have configured filterable attributes.
12:14 _geoRadius(42, 150, 10)
"###);
let filter = Filter::from_str("_geoBoundingBox([42, 150], [30, 10])").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().starts_with(
"Attribute `_geo` is not filterable. This index does not have configured filterable attributes."
));
snapshot!(error.to_string(), @r###"
Attribute `_geo` is not filterable. This index does not have configured filterable attributes.
18:20 _geoBoundingBox([42, 150], [30, 10])
"###);
let filter = Filter::from_str("dog = \"bernese mountain\"").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().starts_with(
"Attribute `dog` is not filterable. This index does not have configured filterable attributes."
));
snapshot!(error.to_string(), @r###"
Attribute `dog` is not filterable. This index does not have configured filterable attributes.
1:4 dog = "bernese mountain"
"###);
drop(rtxn);
index
.update_settings(|settings| {
settings.set_searchable_fields(vec![S("title")]);
settings.set_filterable_fields(hashset! { S("title") });
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(
"title".to_string(),
)]);
})
.unwrap();
@ -812,39 +910,45 @@ mod tests {
let filter = Filter::from_str("_geoRadius(-100, 150, 10)").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().starts_with(
"Attribute `_geo` is not filterable. Available filterable attributes are: `title`."
));
snapshot!(error.to_string(), @r###"
Attribute `_geo` is not filterable. Available filterable attribute patterns are: `title`.
12:16 _geoRadius(-100, 150, 10)
"###);
let filter = Filter::from_str("_geoBoundingBox([42, 150], [30, 10])").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().starts_with(
"Attribute `_geo` is not filterable. Available filterable attributes are: `title`."
));
snapshot!(error.to_string(), @r###"
Attribute `_geo` is not filterable. Available filterable attribute patterns are: `title`.
18:20 _geoBoundingBox([42, 150], [30, 10])
"###);
let filter = Filter::from_str("name = 12").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().starts_with(
"Attribute `name` is not filterable. Available filterable attributes are: `title`."
));
snapshot!(error.to_string(), @r###"
Attribute `name` is not filterable. Available filterable attribute patterns are: `title`.
1:5 name = 12
"###);
let filter = Filter::from_str("title = \"test\" AND name = 12").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().starts_with(
"Attribute `name` is not filterable. Available filterable attributes are: `title`."
));
snapshot!(error.to_string(), @r###"
Attribute `name` is not filterable. Available filterable attribute patterns are: `title`.
20:24 title = "test" AND name = 12
"###);
let filter = Filter::from_str("title = \"test\" AND name IN [12]").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().starts_with(
"Attribute `name` is not filterable. Available filterable attributes are: `title`."
));
snapshot!(error.to_string(), @r###"
Attribute `name` is not filterable. Available filterable attribute patterns are: `title`.
20:24 title = "test" AND name IN [12]
"###);
let filter = Filter::from_str("title = \"test\" AND name != 12").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().starts_with(
"Attribute `name` is not filterable. Available filterable attributes are: `title`."
));
snapshot!(error.to_string(), @r###"
Attribute `name` is not filterable. Available filterable attribute patterns are: `title`.
20:24 title = "test" AND name != 12
"###);
}
#[test]
@ -870,7 +974,9 @@ mod tests {
index
.update_settings(|settings| {
settings.set_filterable_fields(hashset!(S("monitor_diagonal")));
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(
"monitor_diagonal".to_string(),
)]);
})
.unwrap();
@ -901,7 +1007,9 @@ mod tests {
index
.update_settings(|settings| {
settings.set_filterable_fields(hashset! { S(RESERVED_GEO_FIELD_NAME) });
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(S(
RESERVED_GEO_FIELD_NAME,
))]);
})
.unwrap();
@ -948,7 +1056,10 @@ mod tests {
index
.update_settings(|settings| {
settings.set_searchable_fields(vec![S(RESERVED_GEO_FIELD_NAME), S("price")]); // to keep the fields order
settings.set_filterable_fields(hashset! { S(RESERVED_GEO_FIELD_NAME), S("price") });
settings.set_filterable_fields(vec![
FilterableAttributesRule::Field(S(RESERVED_GEO_FIELD_NAME)),
FilterableAttributesRule::Field("price".to_string()),
]);
})
.unwrap();
@ -998,7 +1109,10 @@ mod tests {
index
.update_settings(|settings| {
settings.set_searchable_fields(vec![S(RESERVED_GEO_FIELD_NAME), S("price")]); // to keep the fields order
settings.set_filterable_fields(hashset! { S(RESERVED_GEO_FIELD_NAME), S("price") });
settings.set_filterable_fields(vec![
FilterableAttributesRule::Field(S(RESERVED_GEO_FIELD_NAME)),
FilterableAttributesRule::Field("price".to_string()),
]);
})
.unwrap();
@ -1108,7 +1222,9 @@ mod tests {
index
.update_settings(|settings| {
settings.set_searchable_fields(vec![S("price")]); // to keep the fields order
settings.set_filterable_fields(hashset! { S("price") });
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(
"price".to_string(),
)]);
})
.unwrap();
index
@ -1164,7 +1280,11 @@ mod tests {
index
.update_settings(|settings| {
settings.set_primary_key("id".to_owned());
settings.set_filterable_fields(hashset! { S("id"), S("one"), S("two") });
settings.set_filterable_fields(vec![
FilterableAttributesRule::Field("id".to_string()),
FilterableAttributesRule::Field("one".to_string()),
FilterableAttributesRule::Field("two".to_string()),
]);
})
.unwrap();

View File

@ -10,6 +10,7 @@ use roaring::RoaringBitmap;
use tracing::error;
use crate::error::UserError;
use crate::filterable_attributes_rules::{filtered_matching_patterns, matching_features};
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue};
use crate::search::build_dfa;
use crate::{DocumentId, FieldId, OrderBy, Result, Search};
@ -73,25 +74,28 @@ impl<'a> SearchForFacetValues<'a> {
let index = self.search_query.index;
let rtxn = self.search_query.rtxn;
let filterable_fields = index.filterable_fields(rtxn)?;
if !filterable_fields.contains(&self.facet) {
let (valid_fields, hidden_fields) =
index.remove_hidden_fields(rtxn, filterable_fields)?;
let filterable_attributes_rules = index.filterable_attributes_rules(rtxn)?;
if !matching_features(&self.facet, &filterable_attributes_rules)
.map_or(false, |(_, features)| features.is_facet_searchable())
{
let matching_field_names =
filtered_matching_patterns(&filterable_attributes_rules, &|features| {
features.is_facet_searchable()
});
let (valid_patterns, hidden_fields) =
index.remove_hidden_fields(rtxn, matching_field_names)?;
return Err(UserError::InvalidFacetSearchFacetName {
field: self.facet.clone(),
valid_fields,
valid_patterns,
hidden_fields,
}
.into());
}
};
let fields_ids_map = index.fields_ids_map(rtxn)?;
let fid = match fields_ids_map.id(&self.facet) {
Some(fid) => fid,
// we return an empty list of results when the attribute has been
// set as filterable but no document contains this field (yet).
None => return Ok(Vec::new()),
let Some(fid) = fields_ids_map.id(&self.facet) else {
return Ok(Vec::new());
};
let fst = match self.search_query.index.facet_id_string_fst.get(rtxn, &fid)? {

View File

@ -9,6 +9,7 @@ use roaring::bitmap::RoaringBitmap;
pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET};
pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords};
use self::new::{execute_vector_search, PartialSearchResult};
use crate::filterable_attributes_rules::{filtered_matching_patterns, matching_features};
use crate::score_details::{ScoreDetails, ScoringStrategy};
use crate::vector::Embedder;
use crate::{
@ -187,13 +188,22 @@ impl<'a> Search<'a> {
}
if let Some(distinct) = &self.distinct {
let filterable_fields = ctx.index.filterable_fields(ctx.txn)?;
if !crate::is_faceted(distinct, &filterable_fields) {
let (valid_fields, hidden_fields) =
ctx.index.remove_hidden_fields(ctx.txn, filterable_fields)?;
let filterable_fields = ctx.index.filterable_attributes_rules(ctx.txn)?;
// check if the distinct field is in the filterable fields
if !matching_features(distinct, &filterable_fields)
.map_or(false, |(_, features)| features.is_filterable())
{
// if not, remove the hidden fields from the filterable fields to generate the error message
let matching_patterns =
filtered_matching_patterns(&filterable_fields, &|features| {
features.is_filterable()
});
let (valid_patterns, hidden_fields) =
ctx.index.remove_hidden_fields(ctx.txn, matching_patterns)?;
// and return the error
return Err(Error::UserError(UserError::InvalidDistinctAttribute {
field: distinct.clone(),
valid_fields,
valid_patterns,
hidden_fields,
}));
}

View File

@ -57,6 +57,7 @@ impl RankingRuleGraphTrait for FidGraph {
let term = to_term;
let mut all_fields = FxHashSet::default();
let mut current_max_weight = 0;
for word in term.term_subset.all_single_words_except_prefix_db(ctx)? {
let fields = ctx.get_db_word_fids(word.interned())?;
all_fields.extend(fields);
@ -81,6 +82,9 @@ impl RankingRuleGraphTrait for FidGraph {
let weight = weights_map
.weight(fid)
.ok_or(InternalError::FieldidsWeightsMapMissingEntry { key: fid })?;
if weight > current_max_weight {
current_max_weight = weight;
}
edges.push((
weight as u32 * term.term_ids.len() as u32,
conditions_interner.insert(FidCondition { term: term.clone(), fid: Some(fid) }),
@ -88,10 +92,10 @@ impl RankingRuleGraphTrait for FidGraph {
}
// always lookup the max_fid if we don't already and add an artificial condition for max scoring
let max_weight: Option<u16> = weights_map.max_weight();
let max_weight = ctx.index.max_searchable_attribute_weight(ctx.txn)?;
if let Some(max_weight) = max_weight {
if !all_fields.contains(&max_weight) {
if current_max_weight < max_weight {
edges.push((
max_weight as u32 * term.term_ids.len() as u32, // TODO improve the fid score i.e. fid^10.
conditions_interner.insert(FidCondition {

View File

@ -5,13 +5,11 @@
use std::time::Duration;
use big_s::S;
use maplit::hashset;
use meili_snap::snapshot;
use crate::index::tests::TempIndex;
use crate::score_details::{ScoreDetails, ScoringStrategy};
use crate::{Criterion, Filter, Search, TimeBudget};
use crate::{Criterion, Filter, FilterableAttributesRule, Search, TimeBudget};
fn create_index() -> TempIndex {
let index = TempIndex::new();
@ -20,7 +18,7 @@ fn create_index() -> TempIndex {
.update_settings(|s| {
s.set_primary_key("id".to_owned());
s.set_searchable_fields(vec!["text".to_owned()]);
s.set_filterable_fields(hashset! { S("id") });
s.set_filterable_fields(vec![FilterableAttributesRule::Field("id".to_owned())]);
s.set_criteria(vec![Criterion::Words, Criterion::Typo]);
})
.unwrap();

View File

@ -19,7 +19,10 @@ use maplit::hashset;
use super::collect_field_values;
use crate::index::tests::TempIndex;
use crate::{AscDesc, Criterion, Index, Member, Search, SearchResult, TermsMatchingStrategy};
use crate::{
AscDesc, Criterion, FilterableAttributesRule, Index, Member, Search, SearchResult,
TermsMatchingStrategy,
};
fn create_index() -> TempIndex {
let index = TempIndex::new();
@ -236,7 +239,7 @@ fn test_distinct_placeholder_no_ranking_rules() {
// Set the letter as filterable and unset the distinct attribute.
index
.update_settings(|s| {
s.set_filterable_fields(hashset! { S("letter") });
s.set_filterable_fields(vec![FilterableAttributesRule::Field("letter".to_owned())]);
s.reset_distinct_field();
})
.unwrap();

View File

@ -9,7 +9,7 @@ use crate::progress::Progress;
use crate::update::new::indexer;
use crate::update::{IndexerConfig, Settings};
use crate::vector::EmbeddingConfigs;
use crate::{db_snap, Criterion, Index};
use crate::{db_snap, Criterion, FilterableAttributesRule, Index};
pub const CONTENT: &str = include_str!("../../../../tests/assets/test_set.ndjson");
use crate::constants::RESERVED_GEO_FIELD_NAME;
@ -25,14 +25,14 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
let mut builder = Settings::new(&mut wtxn, &index, &config);
builder.set_criteria(criteria.to_vec());
builder.set_filterable_fields(hashset! {
S("tag"),
S("asc_desc_rank"),
S(RESERVED_GEO_FIELD_NAME),
S("opt1"),
S("opt1.opt2"),
S("tag_in")
});
builder.set_filterable_fields(vec![
FilterableAttributesRule::Field(S("tag")),
FilterableAttributesRule::Field(S("asc_desc_rank")),
FilterableAttributesRule::Field(S(RESERVED_GEO_FIELD_NAME)),
FilterableAttributesRule::Field(S("opt1")),
FilterableAttributesRule::Field(S("opt1.opt2")),
FilterableAttributesRule::Field(S("tag_in")),
]);
builder.set_sortable_fields(hashset! {
S("tag"),
S("asc_desc_rank"),

View File

@ -1,5 +1,5 @@
---
source: milli/src/search/new/tests/attribute_position.rs
source: crates/milli/src/search/new/tests/attribute_position.rs
expression: "format!(\"{document_ids_scores:#?}\")"
---
[

View File

@ -1,5 +1,5 @@
---
source: milli/src/search/new/tests/attribute_position.rs
source: crates/milli/src/search/new/tests/attribute_position.rs
expression: "format!(\"{document_ids_scores:#?}\")"
---
[

View File

@ -1,5 +1,5 @@
---
source: milli/src/search/new/tests/attribute_position.rs
source: crates/milli/src/search/new/tests/attribute_position.rs
expression: "format!(\"{document_ids_scores:#?}\")"
---
[

View File

@ -1,5 +1,5 @@
---
source: milli/src/search/new/tests/attribute_position.rs
source: crates/milli/src/search/new/tests/attribute_position.rs
expression: "format!(\"{document_ids_scores:#?}\")"
---
[

View File

@ -386,7 +386,7 @@ pub fn snap_settings(index: &Index) -> String {
write_setting_to_snap!(criteria);
write_setting_to_snap!(displayed_fields);
write_setting_to_snap!(distinct_field);
write_setting_to_snap!(filterable_fields);
write_setting_to_snap!(filterable_attributes_rules);
write_setting_to_snap!(sortable_fields);
write_setting_to_snap!(synonyms);
write_setting_to_snap!(authorize_typos);

View File

@ -81,6 +81,17 @@ pub enum DelAddOperation {
DeletionAndAddition,
}
impl DelAddOperation {
/// Merge two DelAddOperation enum variants.
pub fn merge(self, other: Self) -> Self {
match (self, other) {
(Self::Deletion, Self::Deletion) => Self::Deletion,
(Self::Addition, Self::Addition) => Self::Addition,
_ => Self::DeletionAndAddition,
}
}
}
/// Creates a Kv<K, Kv<DelAdd, value>> from two Kv<K, value>
///
/// putting each deletion obkv's keys under an DelAdd::Deletion

View File

@ -6,7 +6,7 @@ use heed::types::Bytes;
use heed::{BytesDecode, BytesEncode, Error, PutFlags, RoTxn, RwTxn};
use roaring::RoaringBitmap;
use super::{FACET_GROUP_SIZE, FACET_MIN_LEVEL_SIZE};
use super::{clear_facet_levels, FACET_GROUP_SIZE, FACET_MIN_LEVEL_SIZE};
use crate::facet::FacetType;
use crate::heed_codec::facet::{
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
@ -97,9 +97,7 @@ pub(crate) struct FacetsUpdateBulkInner<R: std::io::Read + std::io::Seek> {
impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
pub fn update(mut self, wtxn: &mut RwTxn<'_>, field_ids: &[u16]) -> Result<()> {
self.update_level0(wtxn)?;
for &field_id in field_ids.iter() {
self.clear_levels(wtxn, field_id)?;
}
clear_facet_levels(wtxn, &self.db.remap_data_type(), field_ids)?;
for &field_id in field_ids.iter() {
let level_readers = self.compute_levels_for_field_id(field_id, wtxn)?;
@ -114,14 +112,6 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
Ok(())
}
fn clear_levels(&self, wtxn: &mut heed::RwTxn<'_>, field_id: FieldId) -> Result<()> {
let left = FacetGroupKey::<&[u8]> { field_id, level: 1, left_bound: &[] };
let right = FacetGroupKey::<&[u8]> { field_id, level: u8::MAX, left_bound: &[] };
let range = left..=right;
self.db.delete_range(wtxn, &range).map(drop)?;
Ok(())
}
fn update_level0(&mut self, wtxn: &mut RwTxn<'_>) -> Result<()> {
let delta_data = match self.delta_data.take() {
Some(x) => x,
@ -365,8 +355,6 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
mod tests {
use std::iter::once;
use big_s::S;
use maplit::hashset;
use roaring::RoaringBitmap;
use crate::documents::mmap_from_objects;
@ -374,7 +362,7 @@ mod tests {
use crate::heed_codec::StrRefCodec;
use crate::index::tests::TempIndex;
use crate::update::facet::test_helpers::{ordered_string, FacetIndex};
use crate::{db_snap, milli_snap};
use crate::{db_snap, milli_snap, FilterableAttributesRule};
#[test]
fn insert() {
@ -474,7 +462,8 @@ mod tests {
index
.update_settings(|settings| {
settings.set_primary_key("id".to_owned());
settings.set_filterable_fields(hashset! { S("id") });
settings
.set_filterable_fields(vec![FilterableAttributesRule::Field("id".to_string())]);
})
.unwrap();

View File

@ -89,6 +89,7 @@ use time::OffsetDateTime;
use tracing::debug;
use self::incremental::FacetsUpdateIncremental;
use super::settings::{InnerIndexSettings, InnerIndexSettingsDiff};
use super::{FacetsUpdateBulk, MergeDeladdBtreesetString, MergeDeladdCboRoaringBitmaps};
use crate::facet::FacetType;
use crate::heed_codec::facet::{
@ -147,7 +148,11 @@ impl<'i> FacetsUpdate<'i> {
}
}
pub fn execute(self, wtxn: &mut heed::RwTxn<'_>) -> Result<()> {
pub fn execute(
self,
wtxn: &mut heed::RwTxn<'_>,
new_settings: &InnerIndexSettings,
) -> Result<()> {
if self.data_size == 0 {
return Ok(());
}
@ -156,8 +161,7 @@ impl<'i> FacetsUpdate<'i> {
// See self::comparison_bench::benchmark_facet_indexing
if self.data_size >= (self.database.len(wtxn)? / 500) {
let field_ids =
self.index.faceted_fields_ids(wtxn)?.iter().copied().collect::<Vec<_>>();
let field_ids = facet_levels_field_ids(new_settings);
let bulk_update = FacetsUpdateBulk::new(
self.index,
field_ids,
@ -291,6 +295,53 @@ fn index_facet_search(
Ok(())
}
/// Clear all the levels greater than 0 for given field ids.
pub fn clear_facet_levels<'a, I>(
wtxn: &mut heed::RwTxn<'_>,
db: &heed::Database<FacetGroupKeyCodec<BytesRefCodec>, DecodeIgnore>,
field_ids: I,
) -> Result<()>
where
I: IntoIterator<Item = &'a FieldId>,
{
for field_id in field_ids {
let field_id = *field_id;
let left = FacetGroupKey::<&[u8]> { field_id, level: 1, left_bound: &[] };
let right = FacetGroupKey::<&[u8]> { field_id, level: u8::MAX, left_bound: &[] };
let range = left..=right;
db.delete_range(wtxn, &range).map(drop)?;
}
Ok(())
}
pub fn clear_facet_levels_based_on_settings_diff(
wtxn: &mut heed::RwTxn<'_>,
index: &Index,
settings_diff: &InnerIndexSettingsDiff,
) -> Result<()> {
let new_field_ids: BTreeSet<_> = facet_levels_field_ids(&settings_diff.new);
let old_field_ids: BTreeSet<_> = facet_levels_field_ids(&settings_diff.old);
let field_ids_to_clear: Vec<_> = old_field_ids.difference(&new_field_ids).copied().collect();
clear_facet_levels(wtxn, &index.facet_id_string_docids.remap_types(), &field_ids_to_clear)?;
clear_facet_levels(wtxn, &index.facet_id_f64_docids.remap_types(), &field_ids_to_clear)?;
Ok(())
}
fn facet_levels_field_ids<B>(settings: &InnerIndexSettings) -> B
where
B: FromIterator<FieldId>,
{
settings
.fields_ids_map
.iter_id_metadata()
.filter(|(_, metadata)| {
metadata.require_facet_level_database(&settings.filterable_attributes_rules)
})
.map(|(id, _)| id)
.collect()
}
#[cfg(test)]
pub(crate) mod test_helpers {
use std::cell::Cell;

View File

@ -95,12 +95,7 @@ pub fn enrich_documents_batch<R: Read + Seek>(
// If the settings specifies that a _geo field must be used therefore we must check the
// validity of it in all the documents of this batch and this is when we return `Some`.
let geo_field_id = match documents_batch_index.id(RESERVED_GEO_FIELD_NAME) {
Some(geo_field_id)
if index.sortable_fields(rtxn)?.contains(RESERVED_GEO_FIELD_NAME)
|| index.filterable_fields(rtxn)?.contains(RESERVED_GEO_FIELD_NAME) =>
{
Some(geo_field_id)
}
Some(geo_field_id) if index.is_geo_enabled(rtxn)? => Some(geo_field_id),
_otherwise => None,
};

View File

@ -150,9 +150,14 @@ fn searchable_fields_changed(
obkv: &KvReader<FieldId>,
settings_diff: &InnerIndexSettingsDiff,
) -> bool {
let searchable_fields = &settings_diff.new.searchable_fields_ids;
for (field_id, field_bytes) in obkv.iter() {
if searchable_fields.contains(&field_id) {
let Some(metadata) = settings_diff.new.fields_ids_map.metadata(field_id) else {
// If the field id is not in the fields ids map, skip it.
// This happens for the vectors sub-fields. for example:
// "_vectors": { "manual": [1, 2, 3]} -> "_vectors.manual" is not registered.
continue;
};
if metadata.is_searchable() {
let del_add = KvReaderDelAdd::from_slice(field_bytes);
match (del_add.get(DelAdd::Deletion), del_add.get(DelAdd::Addition)) {
// if both fields are None, check the next field.
@ -200,8 +205,14 @@ fn tokens_from_document<'a>(
buffers.obkv_buffer.clear();
let mut document_writer = KvWriterU16::new(&mut buffers.obkv_buffer);
for (field_id, field_bytes) in obkv.iter() {
let Some(metadata) = settings.fields_ids_map.metadata(field_id) else {
// If the field id is not in the fields ids map, skip it.
// This happens for the vectors sub-fields. for example:
// "_vectors": { "manual": [1, 2, 3]} -> "_vectors.manual" is not registered.
continue;
};
// if field is searchable.
if settings.searchable_fields_ids.contains(&field_id) {
if metadata.is_searchable() {
// extract deletion or addition only.
if let Some(field_bytes) = KvReaderDelAdd::from_slice(field_bytes).get(del_add) {
// parse json.
@ -216,7 +227,7 @@ fn tokens_from_document<'a>(
buffers.field_buffer.clear();
if let Some(field) = json_to_string(&value, &mut buffers.field_buffer) {
// create an iterator of token with their positions.
let locales = settings.localized_searchable_fields_ids.locales(field_id);
let locales = metadata.locales(&settings.localized_attributes_rules);
let tokens = process_tokens(tokenizer.tokenize_with_allow_list(field, locales))
.take_while(|(p, _)| (*p as u32) < max_positions_per_attributes);

View File

@ -12,12 +12,11 @@ use heed::BytesEncode;
use super::helpers::{create_sorter, sorter_into_reader, try_split_array_at, GrenadParameters};
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec};
use crate::heed_codec::{BEU16StrCodec, StrRefCodec};
use crate::localized_attributes_rules::LocalizedFieldIds;
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
use crate::update::index_documents::helpers::{
MergeDeladdBtreesetString, MergeDeladdCboRoaringBitmaps,
};
use crate::update::settings::InnerIndexSettingsDiff;
use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff};
use crate::{FieldId, Result, MAX_FACET_VALUE_LENGTH};
/// Extracts the facet string and the documents ids where this facet string appear.
@ -33,13 +32,10 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
if settings_diff.settings_update_only() {
extract_facet_string_docids_settings(docid_fid_facet_string, indexer, settings_diff)
} else {
let localized_field_ids = &settings_diff.new.localized_faceted_fields_ids;
let facet_search = settings_diff.new.facet_search;
extract_facet_string_docids_document_update(
docid_fid_facet_string,
indexer,
localized_field_ids,
facet_search,
&settings_diff.new,
)
}
}
@ -52,8 +48,7 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
fn extract_facet_string_docids_document_update<R: io::Read + io::Seek>(
docid_fid_facet_string: grenad::Reader<R>,
indexer: GrenadParameters,
localized_field_ids: &LocalizedFieldIds,
facet_search: bool,
settings: &InnerIndexSettings,
) -> Result<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)> {
let max_memory = indexer.max_memory_by_thread();
@ -92,6 +87,14 @@ fn extract_facet_string_docids_document_update<R: io::Read + io::Seek>(
let (field_id_bytes, bytes) = try_split_array_at(key).unwrap();
let field_id = FieldId::from_be_bytes(field_id_bytes);
let Some(metadata) = settings.fields_ids_map.metadata(field_id) else {
unreachable!("metadata not found for field_id: {}", field_id)
};
if !metadata.is_faceted(&settings.filterable_attributes_rules) {
continue;
}
let (document_id_bytes, normalized_value_bytes) =
try_split_array_at::<_, 4>(bytes).unwrap();
let document_id = u32::from_be_bytes(document_id_bytes);
@ -99,8 +102,10 @@ fn extract_facet_string_docids_document_update<R: io::Read + io::Seek>(
let normalized_value = str::from_utf8(normalized_value_bytes)?;
// Facet search normalization
if facet_search {
let locales = localized_field_ids.locales(field_id);
let features =
metadata.filterable_attributes_features(&settings.filterable_attributes_rules);
if features.is_facet_searchable() && settings.facet_search {
let locales = metadata.locales(&settings.localized_attributes_rules);
let hyper_normalized_value = normalize_facet_string(normalized_value, locales);
let set = BTreeSet::from_iter(std::iter::once(normalized_value));
@ -178,8 +183,15 @@ fn extract_facet_string_docids_settings<R: io::Read + io::Seek>(
let (field_id_bytes, bytes) = try_split_array_at(key).unwrap();
let field_id = FieldId::from_be_bytes(field_id_bytes);
let old_locales = settings_diff.old.localized_faceted_fields_ids.locales(field_id);
let new_locales = settings_diff.new.localized_faceted_fields_ids.locales(field_id);
let Some(old_metadata) = settings_diff.old.fields_ids_map.metadata(field_id) else {
unreachable!("old metadata not found for field_id: {}", field_id)
};
let Some(new_metadata) = settings_diff.new.fields_ids_map.metadata(field_id) else {
unreachable!("new metadata not found for field_id: {}", field_id)
};
let old_locales = old_metadata.locales(&settings_diff.old.localized_attributes_rules);
let new_locales = new_metadata.locales(&settings_diff.new.localized_attributes_rules);
let are_same_locales = old_locales == new_locales;
let reindex_facet_search =
@ -197,10 +209,15 @@ fn extract_facet_string_docids_settings<R: io::Read + io::Seek>(
// Facet search normalization
if settings_diff.new.facet_search {
let new_filterable_features = new_metadata
.filterable_attributes_features(&settings_diff.new.filterable_attributes_rules);
let new_hyper_normalized_value = normalize_facet_string(normalized_value, new_locales);
let old_hyper_normalized_value;
let old_filterable_features = old_metadata
.filterable_attributes_features(&settings_diff.old.filterable_attributes_rules);
let old_hyper_normalized_value = if !settings_diff.old.facet_search
|| deladd_reader.get(DelAdd::Deletion).is_none()
|| !old_filterable_features.is_facet_searchable()
{
// if the facet search is disabled in the old settings or if no facet string is deleted,
// we don't need to normalize the facet string.
@ -215,7 +232,9 @@ fn extract_facet_string_docids_settings<R: io::Read + io::Seek>(
let set = BTreeSet::from_iter(std::iter::once(normalized_value));
// if the facet string is the same, we can put the deletion and addition in the same obkv.
if old_hyper_normalized_value == Some(&new_hyper_normalized_value) {
if old_hyper_normalized_value == Some(&new_hyper_normalized_value)
&& new_filterable_features.is_facet_searchable()
{
// nothing to do if we delete and re-add the value.
if is_same_value {
continue;
@ -249,7 +268,9 @@ fn extract_facet_string_docids_settings<R: io::Read + io::Seek>(
}
// addition
if deladd_reader.get(DelAdd::Addition).is_some() {
if new_filterable_features.is_facet_searchable()
&& deladd_reader.get(DelAdd::Addition).is_some()
{
// insert new value
let val = SerdeJson::bytes_encode(&set).map_err(heed::Error::Encoding)?;
buffer.clear();

View File

@ -76,9 +76,9 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
let mut strings_key_buffer = Vec::new();
let old_faceted_fids: BTreeSet<_> =
settings_diff.old.faceted_fields_ids.iter().copied().collect();
settings_diff.list_faceted_fields_from_fid_map(DelAdd::Deletion);
let new_faceted_fids: BTreeSet<_> =
settings_diff.new.faceted_fields_ids.iter().copied().collect();
settings_diff.list_faceted_fields_from_fid_map(DelAdd::Addition);
if !settings_diff.settings_update_only || settings_diff.reindex_facets() {
let mut cursor = obkv_documents.into_cursor()?;

View File

@ -15,8 +15,9 @@ use serde_json::Value;
use super::helpers::{create_writer, writer_into_reader, GrenadParameters};
use crate::constants::RESERVED_VECTORS_FIELD_NAME;
use crate::error::FaultSource;
use crate::fields_ids_map::metadata::FieldIdMapWithMetadata;
use crate::index::IndexEmbeddingConfig;
use crate::prompt::{FieldsIdsMapWithMetadata, Prompt};
use crate::prompt::Prompt;
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
use crate::update::settings::InnerIndexSettingsDiff;
use crate::vector::error::{EmbedErrorKind, PossibleEmbeddingMistakes, UnusedVectorsDistribution};
@ -190,12 +191,8 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
let reindex_vectors = settings_diff.reindex_vectors();
let old_fields_ids_map = &settings_diff.old.fields_ids_map;
let old_fields_ids_map =
FieldsIdsMapWithMetadata::new(old_fields_ids_map, &settings_diff.old.searchable_fields_ids);
let new_fields_ids_map = &settings_diff.new.fields_ids_map;
let new_fields_ids_map =
FieldsIdsMapWithMetadata::new(new_fields_ids_map, &settings_diff.new.searchable_fields_ids);
// the vector field id may have changed
let old_vectors_fid = old_fields_ids_map.id(RESERVED_VECTORS_FIELD_NAME);
@ -383,7 +380,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
);
continue;
}
regenerate_prompt(obkv, prompt, &new_fields_ids_map)?
regenerate_prompt(obkv, prompt, new_fields_ids_map)?
}
},
// prompt regeneration is only triggered for existing embedders
@ -400,7 +397,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
regenerate_if_prompt_changed(
obkv,
(old_prompt, prompt),
(&old_fields_ids_map, &new_fields_ids_map),
(old_fields_ids_map, new_fields_ids_map),
)?
} else {
// we can simply ignore user provided vectors as they are not regenerated and are
@ -416,7 +413,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
prompt,
(add_to_user_provided, remove_from_user_provided),
(old, new),
(&old_fields_ids_map, &new_fields_ids_map),
(old_fields_ids_map, new_fields_ids_map),
document_id,
embedder_name,
embedder_is_manual,
@ -486,10 +483,7 @@ fn extract_vector_document_diff(
prompt: &Prompt,
(add_to_user_provided, remove_from_user_provided): (&mut RoaringBitmap, &mut RoaringBitmap),
(old, new): (VectorState, VectorState),
(old_fields_ids_map, new_fields_ids_map): (
&FieldsIdsMapWithMetadata,
&FieldsIdsMapWithMetadata,
),
(old_fields_ids_map, new_fields_ids_map): (&FieldIdMapWithMetadata, &FieldIdMapWithMetadata),
document_id: impl Fn() -> Value,
embedder_name: &str,
embedder_is_manual: bool,
@ -611,10 +605,7 @@ fn extract_vector_document_diff(
fn regenerate_if_prompt_changed(
obkv: &obkv::KvReader<FieldId>,
(old_prompt, new_prompt): (&Prompt, &Prompt),
(old_fields_ids_map, new_fields_ids_map): (
&FieldsIdsMapWithMetadata,
&FieldsIdsMapWithMetadata,
),
(old_fields_ids_map, new_fields_ids_map): (&FieldIdMapWithMetadata, &FieldIdMapWithMetadata),
) -> Result<VectorStateDelta> {
let old_prompt = old_prompt
.render_kvdeladd(obkv, DelAdd::Deletion, old_fields_ids_map)
@ -630,7 +621,7 @@ fn regenerate_if_prompt_changed(
fn regenerate_prompt(
obkv: &obkv::KvReader<FieldId>,
prompt: &Prompt,
new_fields_ids_map: &FieldsIdsMapWithMetadata,
new_fields_ids_map: &FieldIdMapWithMetadata,
) -> Result<VectorStateDelta> {
let prompt = prompt.render_kvdeladd(obkv, DelAdd::Addition, new_fields_ids_map)?;

View File

@ -26,6 +26,7 @@ use typed_chunk::{write_typed_chunk_into_index, ChunkAccumulator, TypedChunk};
pub use self::enrich::{extract_finite_float_from_value, DocumentId};
pub use self::helpers::*;
pub use self::transform::{Transform, TransformOutput};
use super::facet::clear_facet_levels_based_on_settings_diff;
use super::new::StdResult;
use crate::documents::{obkv_to_object, DocumentsBatchReader};
use crate::error::{Error, InternalError};
@ -215,9 +216,8 @@ where
flattened_documents,
} = output;
// update the internal facet and searchable list,
// update the searchable list,
// because they might have changed due to the nested documents flattening.
settings_diff.new.recompute_facets(self.wtxn, self.index)?;
settings_diff.new.recompute_searchables(self.wtxn, self.index)?;
let settings_diff = Arc::new(settings_diff);
@ -465,6 +465,11 @@ where
}
}
// If the settings are only being updated, we may have to clear some of the facet levels.
if settings_diff.settings_update_only() {
clear_facet_levels_based_on_settings_diff(self.wtxn, self.index, &settings_diff)?;
}
Ok(())
}).map_err(InternalError::from)??;
@ -776,7 +781,7 @@ mod tests {
use crate::search::TermsMatchingStrategy;
use crate::update::new::indexer;
use crate::update::Setting;
use crate::{all_obkv_to_json, db_snap, Filter, Search, UserError};
use crate::{all_obkv_to_json, db_snap, Filter, FilterableAttributesRule, Search, UserError};
#[test]
fn simple_document_replacement() {
@ -1006,7 +1011,9 @@ mod tests {
index
.update_settings(|settings| {
settings.set_filterable_fields(hashset!(S(RESERVED_GEO_FIELD_NAME)));
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(
RESERVED_GEO_FIELD_NAME.to_string(),
)]);
})
.unwrap();
}
@ -1018,7 +1025,9 @@ mod tests {
index
.update_settings(|settings| {
settings.set_filterable_fields(hashset!(S(RESERVED_GEO_FIELD_NAME)));
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(
RESERVED_GEO_FIELD_NAME.to_string(),
)]);
})
.unwrap();
@ -1234,16 +1243,17 @@ mod tests {
let searchable_fields = vec![S("title"), S("nested.object"), S("nested.machin")];
settings.set_searchable_fields(searchable_fields);
let faceted_fields = hashset!(S("title"), S("nested.object"), S("nested.machin"));
let faceted_fields = vec![
FilterableAttributesRule::Field("title".to_string()),
FilterableAttributesRule::Field("nested.object".to_string()),
FilterableAttributesRule::Field("nested.machin".to_string()),
];
settings.set_filterable_fields(faceted_fields);
})
.unwrap();
let rtxn = index.read_txn().unwrap();
let facets = index.faceted_fields(&rtxn).unwrap();
assert_eq!(facets, hashset!(S("title"), S("nested.object"), S("nested.machin")));
// testing the simple query search
let mut search = crate::Search::new(&rtxn, &index);
search.query("document");
@ -1438,7 +1448,9 @@ mod tests {
index
.update_settings(|settings| {
settings.set_filterable_fields(hashset!(String::from("dog")));
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(
"dog".to_string(),
)]);
})
.unwrap();
@ -1457,10 +1469,6 @@ mod tests {
let rtxn = index.read_txn().unwrap();
let hidden = index.faceted_fields(&rtxn).unwrap();
assert_eq!(hidden, hashset!(S("dog"), S("dog.race"), S("dog.race.bernese mountain")));
for (s, i) in [("zeroth", 0), ("first", 1), ("second", 2), ("third", 3)] {
let mut search = crate::Search::new(&rtxn, &index);
let filter = format!(r#""dog.race.bernese mountain" = {s}"#);
@ -1478,12 +1486,6 @@ mod tests {
db_snap!(index, facet_id_string_docids, @"");
db_snap!(index, field_id_docid_facet_strings, @"");
let rtxn = index.read_txn().unwrap();
let facets = index.faceted_fields(&rtxn).unwrap();
assert_eq!(facets, hashset!());
// update the settings to test the sortable
index
.update_settings(|settings| {
@ -1506,10 +1508,6 @@ mod tests {
let rtxn = index.read_txn().unwrap();
let facets = index.faceted_fields(&rtxn).unwrap();
assert_eq!(facets, hashset!(S("dog.race"), S("dog.race.bernese mountain")));
let mut search = crate::Search::new(&rtxn, &index);
search.sort_criteria(vec![crate::AscDesc::Asc(crate::Member::Field(S(
"dog.race.bernese mountain",
@ -1717,8 +1715,6 @@ mod tests {
let check_ok = |index: &Index| {
let rtxn = index.read_txn().unwrap();
let facets = index.faceted_fields(&rtxn).unwrap();
assert_eq!(facets, hashset!(S("colour"), S("colour.green"), S("colour.green.blue")));
let colour_id = index.fields_ids_map(&rtxn).unwrap().id("colour").unwrap();
let colour_green_id = index.fields_ids_map(&rtxn).unwrap().id("colour.green").unwrap();
@ -1738,7 +1734,7 @@ mod tests {
assert_eq!(bitmap_colour_blue.into_iter().collect::<Vec<_>>(), vec![7]);
};
let faceted_fields = hashset!(S("colour"));
let faceted_fields = vec![FilterableAttributesRule::Field("colour".to_string())];
let index = TempIndex::new();
index.add_documents(content()).unwrap();
@ -1823,8 +1819,6 @@ mod tests {
let check_ok = |index: &Index| {
let rtxn = index.read_txn().unwrap();
let facets = index.faceted_fields(&rtxn).unwrap();
assert_eq!(facets, hashset!(S("colour"), S("colour.green"), S("colour.green.blue")));
let colour_id = index.fields_ids_map(&rtxn).unwrap().id("colour").unwrap();
let colour_green_id = index.fields_ids_map(&rtxn).unwrap().id("colour.green").unwrap();
@ -1844,7 +1838,7 @@ mod tests {
assert_eq!(bitmap_colour_blue.into_iter().collect::<Vec<_>>(), vec![3]);
};
let faceted_fields = hashset!(S("colour"));
let faceted_fields = vec![FilterableAttributesRule::Field("colour".to_string())];
let index = TempIndex::new();
index.add_documents(content()).unwrap();
@ -1887,8 +1881,6 @@ mod tests {
let check_ok = |index: &Index| {
let rtxn = index.read_txn().unwrap();
let facets = index.faceted_fields(&rtxn).unwrap();
assert_eq!(facets, hashset!(S("tags"), S("tags.green"), S("tags.green.blue")));
let tags_id = index.fields_ids_map(&rtxn).unwrap().id("tags").unwrap();
let tags_green_id = index.fields_ids_map(&rtxn).unwrap().id("tags.green").unwrap();
@ -1907,7 +1899,7 @@ mod tests {
assert_eq!(bitmap_tags_blue.into_iter().collect::<Vec<_>>(), vec![12]);
};
let faceted_fields = hashset!(S("tags"));
let faceted_fields = vec![FilterableAttributesRule::Field("tags".to_string())];
let index = TempIndex::new();
index.add_documents(content()).unwrap();
@ -2259,7 +2251,9 @@ mod tests {
index
.update_settings(|settings| {
settings.set_filterable_fields(hashset! { S("title") });
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(
"title".to_string(),
)]);
})
.unwrap();
@ -3117,7 +3111,10 @@ mod tests {
index
.update_settings_using_wtxn(&mut wtxn, |settings| {
settings.set_primary_key(S("docid"));
settings.set_filterable_fields(hashset! { S("label"), S("label2") });
settings.set_filterable_fields(vec![
FilterableAttributesRule::Field("label".to_string()),
FilterableAttributesRule::Field("label2".to_string()),
]);
})
.unwrap();
wtxn.commit().unwrap();
@ -3296,7 +3293,9 @@ mod tests {
index
.update_settings_using_wtxn(&mut wtxn, |settings| {
settings.set_primary_key(S("id"));
settings.set_filterable_fields(hashset!(S(RESERVED_GEO_FIELD_NAME)));
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(
RESERVED_GEO_FIELD_NAME.to_string(),
)]);
settings.set_sortable_fields(hashset!(S(RESERVED_GEO_FIELD_NAME)));
})
.unwrap();

View File

@ -1,7 +1,7 @@
use std::borrow::Cow;
use std::collections::btree_map::Entry as BEntry;
use std::collections::hash_map::Entry as HEntry;
use std::collections::{BTreeMap, HashMap, HashSet};
use std::collections::{BTreeMap, HashMap};
use std::fs::File;
use std::io::{Read, Seek};
@ -18,8 +18,10 @@ use super::helpers::{
ObkvsMergeAdditionsAndDeletions,
};
use super::{create_writer, IndexDocumentsMethod, IndexerConfig, KeepFirst};
use crate::attribute_patterns::PatternMatch;
use crate::documents::{DocumentsBatchIndex, EnrichedDocument, EnrichedDocumentsBatchReader};
use crate::error::{Error, InternalError, UserError};
use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
use crate::index::{db_name, main_key};
use crate::update::del_add::{
into_del_add_obkv, into_del_add_obkv_conditional_operation, DelAdd, DelAddOperation,
@ -31,9 +33,7 @@ use crate::update::{AvailableIds, UpdateIndexingStep};
use crate::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
use crate::vector::settings::WriteBackToDocuments;
use crate::vector::ArroyWrapper;
use crate::{
is_faceted_by, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result,
};
use crate::{FieldDistribution, FieldId, FieldIdMapMissingEntry, Index, Result};
pub struct TransformOutput {
pub primary_key: String,
@ -52,7 +52,7 @@ pub struct TransformOutput {
/// containing all those documents.
pub struct Transform<'a, 'i> {
pub index: &'i Index,
fields_ids_map: FieldsIdsMap,
fields_ids_map: FieldIdMapWithMetadata,
indexer_settings: &'a IndexerConfig,
pub index_documents_method: IndexDocumentsMethod,
@ -84,7 +84,7 @@ pub enum Operation {
///
/// If new fields are present in the addition, they are added to the index field ids map.
fn create_fields_mapping(
index_field_map: &mut FieldsIdsMap,
index_field_map: &mut FieldIdMapWithMetadata,
batch_field_map: &DocumentsBatchIndex,
) -> Result<HashMap<FieldId, FieldId>> {
batch_field_map
@ -141,10 +141,13 @@ impl<'a, 'i> Transform<'a, 'i> {
true,
);
let documents_ids = index.documents_ids(wtxn)?;
let fields_ids_map = index.fields_ids_map(wtxn)?;
let builder = MetadataBuilder::from_index(index, wtxn)?;
let fields_ids_map = FieldIdMapWithMetadata::new(fields_ids_map, builder);
Ok(Transform {
index,
fields_ids_map: index.fields_ids_map(wtxn)?,
fields_ids_map,
indexer_settings,
available_documents_ids: AvailableIds::new(&documents_ids),
original_sorter,
@ -354,7 +357,7 @@ impl<'a, 'i> Transform<'a, 'i> {
documents_seen: documents_count,
});
self.index.put_fields_ids_map(wtxn, &self.fields_ids_map)?;
self.index.put_fields_ids_map(wtxn, self.fields_ids_map.as_fields_ids_map())?;
self.index.put_primary_key(wtxn, &primary_key)?;
self.documents_count += documents_count;
// Now that we have a valid sorter that contains the user id and the obkv we
@ -371,7 +374,7 @@ impl<'a, 'i> Transform<'a, 'i> {
)]
fn flatten_from_fields_ids_map(
obkv: &KvReader<FieldId>,
fields_ids_map: &mut FieldsIdsMap,
fields_ids_map: &mut FieldIdMapWithMetadata,
) -> Result<Option<Vec<u8>>> {
if obkv
.iter()
@ -657,7 +660,6 @@ impl<'a, 'i> Transform<'a, 'i> {
fn rebind_existing_document(
old_obkv: &KvReader<FieldId>,
settings_diff: &InnerIndexSettingsDiff,
modified_faceted_fields: &HashSet<String>,
mut injected_vectors: serde_json::Map<String, serde_json::Value>,
old_vectors_fid: Option<FieldId>,
original_obkv_buffer: Option<&mut Vec<u8>>,
@ -667,23 +669,26 @@ impl<'a, 'i> Transform<'a, 'i> {
let is_primary_key = |id: FieldId| -> bool { settings_diff.primary_key_id == Some(id) };
// If only a faceted field has been added, keep only this field.
let global_facet_settings_changed = settings_diff.global_facet_settings_changed();
let facet_fids_changed = settings_diff.facet_fids_changed();
let necessary_faceted_field =
|id: FieldId| -> bool {
let necessary_faceted_field = |id: FieldId| -> Option<DelAddOperation> {
if facet_fids_changed {
let field_name = settings_diff.new.fields_ids_map.name(id).unwrap();
if global_facet_settings_changed {
settings_diff.new.user_defined_faceted_fields.iter().any(|long| {
is_faceted_by(long, field_name) || is_faceted_by(field_name, long)
})
} else if facet_fids_changed {
modified_faceted_fields.iter().any(|long| {
is_faceted_by(long, field_name) || is_faceted_by(field_name, long)
})
} else {
false
// if the faceted fields changed, we need to keep all the field that are
// faceted in the old or new settings.
match (
settings_diff.old.match_faceted_field(field_name),
settings_diff.new.match_faceted_field(field_name),
) {
(PatternMatch::NoMatch, PatternMatch::NoMatch) => None,
(PatternMatch::NoMatch, _) => Some(DelAddOperation::Addition),
(_, PatternMatch::NoMatch) => Some(DelAddOperation::Deletion),
(_, _) => Some(DelAddOperation::DeletionAndAddition),
}
};
} else {
None
}
};
// Alway provide all fields when vectors are involved because
// we need the fields for the prompt/templating.
@ -734,12 +739,24 @@ impl<'a, 'i> Transform<'a, 'i> {
}
}
if is_primary_key(id) || necessary_faceted_field(id) || reindex_vectors {
if is_primary_key(id) || reindex_vectors {
operations.insert(id, DelAddOperation::DeletionAndAddition);
obkv_writer.insert(id, val)?;
} else if let Some(operation) = settings_diff.reindex_searchable_id(id) {
operations.insert(id, operation);
obkv_writer.insert(id, val)?;
} else {
let facet_operation = necessary_faceted_field(id);
let searchable_operation = settings_diff.reindex_searchable_id(id);
let operation = match (facet_operation, searchable_operation) {
(Some(facet_operation), Some(searchable_operation)) => {
Some(facet_operation.merge(searchable_operation))
}
(Some(operation), None) | (None, Some(operation)) => Some(operation),
(None, None) => None,
};
if let Some(operation) = operation {
operations.insert(id, operation);
obkv_writer.insert(id, val)?;
}
}
}
if !injected_vectors.is_empty() {
@ -856,7 +873,6 @@ impl<'a, 'i> Transform<'a, 'i> {
};
if original_sorter.is_some() || flattened_sorter.is_some() {
let modified_faceted_fields = settings_diff.modified_faceted_fields();
let mut original_obkv_buffer = Vec::new();
let mut flattened_obkv_buffer = Vec::new();
let mut document_sorter_key_buffer = Vec::new();
@ -897,7 +913,6 @@ impl<'a, 'i> Transform<'a, 'i> {
Self::rebind_existing_document(
old_obkv,
&settings_diff,
&modified_faceted_fields,
injected_vectors,
old_vectors_fid,
Some(&mut original_obkv_buffer).filter(|_| original_sorter.is_some()),

View File

@ -365,7 +365,7 @@ pub(crate) fn write_typed_chunk_into_index(
let merger = builder.build();
let indexer = FacetsUpdate::new(index, FacetType::Number, merger, None, data_size);
indexer.execute(wtxn)?;
indexer.execute(wtxn, &settings_diff.new)?;
is_merged_database = true;
}
TypedChunk::FieldIdFacetStringDocids(_) => {
@ -401,7 +401,7 @@ pub(crate) fn write_typed_chunk_into_index(
Some(normalized_facet_id_string_merger),
data_size,
);
indexer.execute(wtxn)?;
indexer.execute(wtxn, &settings_diff.new)?;
is_merged_database = true;
}
TypedChunk::FieldIdFacetExistsDocids(_) => {

View File

@ -4,10 +4,10 @@ use heed::RoTxn;
use super::document::{
Document as _, DocumentFromDb, DocumentFromVersions, MergedDocument, Versions,
};
use super::extract::perm_json_p;
use super::vector_document::{
MergedVectorDocument, VectorDocumentFromDb, VectorDocumentFromVersions,
};
use crate::attribute_patterns::PatternMatch;
use crate::documents::FieldIdMapper;
use crate::vector::EmbeddingConfigs;
use crate::{DocumentId, Index, Result};
@ -167,13 +167,15 @@ impl<'doc> Update<'doc> {
}
}
/// Returns whether the updated version of the document is different from the current version for the passed subset of fields.
/// Returns whether the updated version of the document is different from the current version for the subset of fields selected by `selector`.
///
/// `true` if at least one top-level-field that is a exactly a member of field or a parent of a member of field changed.
/// `true` if at least one top-level-field that is exactly a selected field or a parent of a selected field changed.
/// Otherwise `false`.
///
/// - Note: `_geo` and `_vectors` are not taken into account by this function.
pub fn has_changed_for_fields<'t, Mapper: FieldIdMapper>(
&self,
fields: Option<&[&str]>,
selector: &mut impl FnMut(&str) -> PatternMatch,
rtxn: &'t RoTxn,
index: &'t Index,
mapper: &'t Mapper,
@ -185,7 +187,7 @@ impl<'doc> Update<'doc> {
for entry in self.only_changed_fields().iter_top_level_fields() {
let (key, updated_value) = entry?;
if perm_json_p::select_field(key, fields, &[]) == perm_json_p::Selection::Skip {
if selector(key) == PatternMatch::NoMatch {
continue;
}
@ -229,7 +231,7 @@ impl<'doc> Update<'doc> {
for entry in current.iter_top_level_fields() {
let (key, _) = entry?;
if perm_json_p::select_field(key, fields, &[]) == perm_json_p::Selection::Skip {
if selector(key) == PatternMatch::NoMatch {
continue;
}
current_selected_field_count += 1;

View File

@ -5,12 +5,13 @@ use std::ops::DerefMut as _;
use bumpalo::collections::Vec as BVec;
use bumpalo::Bump;
use hashbrown::HashMap;
use heed::RoTxn;
use serde_json::Value;
use super::super::cache::BalancedCaches;
use super::facet_document::extract_document_facets;
use super::FacetKind;
use crate::fields_ids_map::metadata::Metadata;
use crate::filterable_attributes_rules::match_faceted_field;
use crate::heed_codec::facet::OrderedF64Codec;
use crate::update::del_add::DelAdd;
use crate::update::new::channel::FieldIdDocidFacetSender;
@ -23,13 +24,17 @@ use crate::update::new::steps::IndexingStep;
use crate::update::new::thread_local::{FullySend, ThreadLocal};
use crate::update::new::DocumentChange;
use crate::update::GrenadParameters;
use crate::{DocumentId, FieldId, Index, Result, MAX_FACET_VALUE_LENGTH};
use crate::{DocumentId, FieldId, FilterableAttributesRule, Result, MAX_FACET_VALUE_LENGTH};
pub struct FacetedExtractorData<'a, 'b> {
attributes_to_extract: &'a [&'a str],
sender: &'a FieldIdDocidFacetSender<'a, 'b>,
grenad_parameters: &'a GrenadParameters,
buckets: usize,
filterable_attributes: &'a [FilterableAttributesRule],
sortable_fields: &'a HashSet<String>,
asc_desc_fields: &'a HashSet<String>,
distinct_field: &'a Option<String>,
is_geo_enabled: bool,
}
impl<'a, 'b, 'extractor> Extractor<'extractor> for FacetedExtractorData<'a, 'b> {
@ -52,7 +57,11 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for FacetedExtractorData<'a, 'b>
let change = change?;
FacetedDocidsExtractor::extract_document_change(
context,
self.attributes_to_extract,
self.filterable_attributes,
self.sortable_fields,
self.asc_desc_fields,
self.distinct_field,
self.is_geo_enabled,
change,
self.sender,
)?
@ -64,13 +73,18 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for FacetedExtractorData<'a, 'b>
pub struct FacetedDocidsExtractor;
impl FacetedDocidsExtractor {
#[allow(clippy::too_many_arguments)]
fn extract_document_change(
context: &DocumentChangeContext<RefCell<BalancedCaches>>,
attributes_to_extract: &[&str],
filterable_attributes: &[FilterableAttributesRule],
sortable_fields: &HashSet<String>,
asc_desc_fields: &HashSet<String>,
distinct_field: &Option<String>,
is_geo_enabled: bool,
document_change: DocumentChange,
sender: &FieldIdDocidFacetSender,
) -> Result<()> {
let index = &context.index;
let index = context.index;
let rtxn = &context.rtxn;
let mut new_fields_ids_map = context.new_fields_ids_map.borrow_mut_or_yield();
let mut cached_sorter = context.data.borrow_mut_or_yield();
@ -78,11 +92,15 @@ impl FacetedDocidsExtractor {
let docid = document_change.docid();
let res = match document_change {
DocumentChange::Deletion(inner) => extract_document_facets(
attributes_to_extract,
inner.current(rtxn, index, context.db_fields_ids_map)?,
inner.external_document_id(),
new_fields_ids_map.deref_mut(),
&mut |fid, depth, value| {
filterable_attributes,
sortable_fields,
asc_desc_fields,
distinct_field,
is_geo_enabled,
&mut |fid, meta, depth, value| {
Self::facet_fn_with_options(
&context.doc_alloc,
cached_sorter.deref_mut(),
@ -91,6 +109,8 @@ impl FacetedDocidsExtractor {
DelAddFacetValue::insert_del,
docid,
fid,
meta,
filterable_attributes,
depth,
value,
)
@ -98,7 +118,15 @@ impl FacetedDocidsExtractor {
),
DocumentChange::Update(inner) => {
if !inner.has_changed_for_fields(
Some(attributes_to_extract),
&mut |field_name| {
match_faceted_field(
field_name,
filterable_attributes,
sortable_fields,
asc_desc_fields,
distinct_field,
)
},
rtxn,
index,
context.db_fields_ids_map,
@ -107,11 +135,15 @@ impl FacetedDocidsExtractor {
}
extract_document_facets(
attributes_to_extract,
inner.current(rtxn, index, context.db_fields_ids_map)?,
inner.external_document_id(),
new_fields_ids_map.deref_mut(),
&mut |fid, depth, value| {
filterable_attributes,
sortable_fields,
asc_desc_fields,
distinct_field,
is_geo_enabled,
&mut |fid, meta, depth, value| {
Self::facet_fn_with_options(
&context.doc_alloc,
cached_sorter.deref_mut(),
@ -120,6 +152,8 @@ impl FacetedDocidsExtractor {
DelAddFacetValue::insert_del,
docid,
fid,
meta,
filterable_attributes,
depth,
value,
)
@ -127,11 +161,15 @@ impl FacetedDocidsExtractor {
)?;
extract_document_facets(
attributes_to_extract,
inner.merged(rtxn, index, context.db_fields_ids_map)?,
inner.external_document_id(),
new_fields_ids_map.deref_mut(),
&mut |fid, depth, value| {
filterable_attributes,
sortable_fields,
asc_desc_fields,
distinct_field,
is_geo_enabled,
&mut |fid, meta, depth, value| {
Self::facet_fn_with_options(
&context.doc_alloc,
cached_sorter.deref_mut(),
@ -140,6 +178,8 @@ impl FacetedDocidsExtractor {
DelAddFacetValue::insert_add,
docid,
fid,
meta,
filterable_attributes,
depth,
value,
)
@ -147,11 +187,15 @@ impl FacetedDocidsExtractor {
)
}
DocumentChange::Insertion(inner) => extract_document_facets(
attributes_to_extract,
inner.inserted(),
inner.external_document_id(),
new_fields_ids_map.deref_mut(),
&mut |fid, depth, value| {
filterable_attributes,
sortable_fields,
asc_desc_fields,
distinct_field,
is_geo_enabled,
&mut |fid, meta, depth, value| {
Self::facet_fn_with_options(
&context.doc_alloc,
cached_sorter.deref_mut(),
@ -160,6 +204,8 @@ impl FacetedDocidsExtractor {
DelAddFacetValue::insert_add,
docid,
fid,
meta,
filterable_attributes,
depth,
value,
)
@ -180,9 +226,18 @@ impl FacetedDocidsExtractor {
facet_fn: impl Fn(&mut DelAddFacetValue<'doc>, FieldId, BVec<'doc, u8>, FacetKind),
docid: DocumentId,
fid: FieldId,
meta: Metadata,
filterable_attributes: &[FilterableAttributesRule],
depth: perm_json_p::Depth,
value: &Value,
) -> Result<()> {
// if the field is not faceted, do nothing
if !meta.is_faceted(filterable_attributes) {
return Ok(());
}
let features = meta.filterable_attributes_features(filterable_attributes);
let mut buffer = BVec::new_in(doc_alloc);
// Exists
// key: fid
@ -246,7 +301,9 @@ impl FacetedDocidsExtractor {
}
// Null
// key: fid
Value::Null if depth == perm_json_p::Depth::OnBaseKey => {
Value::Null
if depth == perm_json_p::Depth::OnBaseKey && features.is_filterable_null() =>
{
buffer.clear();
buffer.push(FacetKind::Null as u8);
buffer.extend_from_slice(&fid.to_be_bytes());
@ -254,19 +311,29 @@ impl FacetedDocidsExtractor {
}
// Empty
// key: fid
Value::Array(a) if a.is_empty() && depth == perm_json_p::Depth::OnBaseKey => {
Value::Array(a)
if a.is_empty()
&& depth == perm_json_p::Depth::OnBaseKey
&& features.is_filterable_empty() =>
{
buffer.clear();
buffer.push(FacetKind::Empty as u8);
buffer.extend_from_slice(&fid.to_be_bytes());
cache_fn(cached_sorter, &buffer, docid)
}
Value::String(_) if depth == perm_json_p::Depth::OnBaseKey => {
Value::String(_)
if depth == perm_json_p::Depth::OnBaseKey && features.is_filterable_empty() =>
{
buffer.clear();
buffer.push(FacetKind::Empty as u8);
buffer.extend_from_slice(&fid.to_be_bytes());
cache_fn(cached_sorter, &buffer, docid)
}
Value::Object(o) if o.is_empty() && depth == perm_json_p::Depth::OnBaseKey => {
Value::Object(o)
if o.is_empty()
&& depth == perm_json_p::Depth::OnBaseKey
&& features.is_filterable_empty() =>
{
buffer.clear();
buffer.push(FacetKind::Empty as u8);
buffer.extend_from_slice(&fid.to_be_bytes());
@ -276,10 +343,6 @@ impl FacetedDocidsExtractor {
_ => Ok(()),
}
}
fn attributes_to_extract<'a>(rtxn: &'a RoTxn, index: &'a Index) -> Result<HashSet<String>> {
index.user_defined_faceted_fields(rtxn)
}
}
struct DelAddFacetValue<'doc> {
@ -399,9 +462,11 @@ impl FacetedDocidsExtractor {
{
let index = indexing_context.index;
let rtxn = index.read_txn()?;
let attributes_to_extract = Self::attributes_to_extract(&rtxn, index)?;
let attributes_to_extract: Vec<_> =
attributes_to_extract.iter().map(|s| s.as_ref()).collect();
let filterable_attributes = index.filterable_attributes_rules(&rtxn)?;
let sortable_fields = index.sortable_fields(&rtxn)?;
let asc_desc_fields = index.asc_desc_fields(&rtxn)?;
let distinct_field = index.distinct_field(&rtxn)?.map(|s| s.to_string());
let is_geo_enabled = index.is_geo_enabled(&rtxn)?;
let datastore = ThreadLocal::new();
{
@ -410,10 +475,14 @@ impl FacetedDocidsExtractor {
let _entered = span.enter();
let extractor = FacetedExtractorData {
attributes_to_extract: &attributes_to_extract,
grenad_parameters: indexing_context.grenad_parameters,
buckets: rayon::current_num_threads(),
sender,
filterable_attributes: &filterable_attributes,
sortable_fields: &sortable_fields,
asc_desc_fields: &asc_desc_fields,
distinct_field: &distinct_field,
is_geo_enabled,
};
extract(
document_changes,

View File

@ -1,46 +1,80 @@
use std::collections::HashSet;
use serde_json::Value;
use crate::constants::RESERVED_GEO_FIELD_NAME;
use crate::attribute_patterns::PatternMatch;
use crate::fields_ids_map::metadata::Metadata;
use crate::update::new::document::Document;
use crate::update::new::extract::geo::extract_geo_coordinates;
use crate::update::new::extract::perm_json_p;
use crate::{FieldId, GlobalFieldsIdsMap, InternalError, Result, UserError};
use crate::{
FieldId, FilterableAttributesRule, GlobalFieldsIdsMap, InternalError, Result, UserError,
};
use crate::filterable_attributes_rules::match_faceted_field;
#[allow(clippy::too_many_arguments)]
pub fn extract_document_facets<'doc>(
attributes_to_extract: &[&str],
document: impl Document<'doc>,
external_document_id: &str,
field_id_map: &mut GlobalFieldsIdsMap,
facet_fn: &mut impl FnMut(FieldId, perm_json_p::Depth, &Value) -> Result<()>,
filterable_attributes: &[FilterableAttributesRule],
sortable_fields: &HashSet<String>,
asc_desc_fields: &HashSet<String>,
distinct_field: &Option<String>,
is_geo_enabled: bool,
facet_fn: &mut impl FnMut(FieldId, Metadata, perm_json_p::Depth, &Value) -> Result<()>,
) -> Result<()> {
// return the match result for the given field name.
let match_field = |field_name: &str| -> PatternMatch {
match_faceted_field(
field_name,
filterable_attributes,
sortable_fields,
asc_desc_fields,
distinct_field,
)
};
// extract the field if it is faceted (facet searchable, filterable, sortable)
let mut extract_field = |name: &str, depth: perm_json_p::Depth, value: &Value| -> Result<()> {
match field_id_map.id_with_metadata_or_insert(name) {
Some((field_id, meta)) => {
facet_fn(field_id, meta, depth, value)?;
Ok(())
}
None => Err(UserError::AttributeLimitReached.into()),
}
};
for res in document.iter_top_level_fields() {
let (field_name, value) = res?;
let selection = match_field(field_name);
let mut tokenize_field =
|name: &str, depth: perm_json_p::Depth, value: &Value| match field_id_map
.id_or_insert(name)
{
Some(field_id) => facet_fn(field_id, depth, value),
None => Err(UserError::AttributeLimitReached.into()),
};
// extract the field if it matches a pattern and if it is faceted (facet searchable, filterable, sortable)
let mut match_and_extract = |name: &str, depth: perm_json_p::Depth, value: &Value| {
let selection = match_field(name);
if selection == PatternMatch::Match {
extract_field(name, depth, value)?;
}
// if the current field is searchable or contains a searchable attribute
let selection = perm_json_p::select_field(field_name, Some(attributes_to_extract), &[]);
if selection != perm_json_p::Selection::Skip {
Ok(selection)
};
if selection != PatternMatch::NoMatch {
// parse json.
match serde_json::value::to_value(value).map_err(InternalError::SerdeJson)? {
Value::Object(object) => {
perm_json_p::seek_leaf_values_in_object(
&object,
Some(attributes_to_extract),
&[], // skip no attributes
field_name,
perm_json_p::Depth::OnBaseKey,
&mut tokenize_field,
&mut match_and_extract,
)?;
if selection == perm_json_p::Selection::Select {
tokenize_field(
if selection == PatternMatch::Match {
extract_field(
field_name,
perm_json_p::Depth::OnBaseKey,
&Value::Object(object),
@ -50,36 +84,34 @@ pub fn extract_document_facets<'doc>(
Value::Array(array) => {
perm_json_p::seek_leaf_values_in_array(
&array,
Some(attributes_to_extract),
&[], // skip no attributes
field_name,
perm_json_p::Depth::OnBaseKey,
&mut tokenize_field,
&mut match_and_extract,
)?;
if selection == perm_json_p::Selection::Select {
tokenize_field(
if selection == PatternMatch::Match {
extract_field(
field_name,
perm_json_p::Depth::OnBaseKey,
&Value::Array(array),
)?;
}
}
value => tokenize_field(field_name, perm_json_p::Depth::OnBaseKey, &value)?,
value => extract_field(field_name, perm_json_p::Depth::OnBaseKey, &value)?,
}
}
}
if attributes_to_extract.contains(&RESERVED_GEO_FIELD_NAME) {
if is_geo_enabled {
if let Some(geo_value) = document.geo_field()? {
if let Some([lat, lng]) = extract_geo_coordinates(external_document_id, geo_value)? {
let (lat_fid, lng_fid) = field_id_map
.id_or_insert("_geo.lat")
.zip(field_id_map.id_or_insert("_geo.lng"))
let ((lat_fid, lat_meta), (lng_fid, lng_meta)) = field_id_map
.id_with_metadata_or_insert("_geo.lat")
.zip(field_id_map.id_with_metadata_or_insert("_geo.lng"))
.ok_or(UserError::AttributeLimitReached)?;
facet_fn(lat_fid, perm_json_p::Depth::OnBaseKey, &lat.into())?;
facet_fn(lng_fid, perm_json_p::Depth::OnBaseKey, &lng.into())?;
facet_fn(lat_fid, lat_meta, perm_json_p::Depth::OnBaseKey, &lat.into())?;
facet_fn(lng_fid, lng_meta, perm_json_p::Depth::OnBaseKey, &lng.into())?;
}
}
}

View File

@ -9,7 +9,6 @@ use heed::RoTxn;
use serde_json::value::RawValue;
use serde_json::Value;
use crate::constants::RESERVED_GEO_FIELD_NAME;
use crate::error::GeoError;
use crate::update::new::document::Document;
use crate::update::new::indexer::document_changes::{DocumentChangeContext, Extractor};
@ -29,9 +28,7 @@ impl GeoExtractor {
index: &Index,
grenad_parameters: GrenadParameters,
) -> Result<Option<Self>> {
let is_sortable = index.sortable_fields(rtxn)?.contains(RESERVED_GEO_FIELD_NAME);
let is_filterable = index.filterable_fields(rtxn)?.contains(RESERVED_GEO_FIELD_NAME);
if is_sortable || is_filterable {
if index.is_geo_enabled(rtxn)? {
Ok(Some(GeoExtractor { grenad_parameters }))
} else {
Ok(None)

View File

@ -5,7 +5,6 @@ mod geo;
mod searchable;
mod vectors;
use bumpalo::Bump;
pub use cache::{
merge_caches_sorted, transpose_and_freeze_caches, BalancedCaches, DelAddRoaringBitmap,
};
@ -15,27 +14,11 @@ pub use geo::*;
pub use searchable::*;
pub use vectors::EmbeddingExtractor;
use super::indexer::document_changes::{DocumentChanges, IndexingContext};
use super::steps::IndexingStep;
use super::thread_local::{FullySend, ThreadLocal};
use crate::Result;
pub trait DocidsExtractor {
fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
document_changes: &DC,
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
step: IndexingStep,
) -> Result<Vec<BalancedCaches<'extractor>>>
where
MSP: Fn() -> bool + Sync;
}
/// TODO move in permissive json pointer
pub mod perm_json_p {
use serde_json::{Map, Value};
use crate::Result;
use crate::{attribute_patterns::PatternMatch, Result};
const SPLIT_SYMBOL: char = '.';
/// Returns `true` if the `selector` match the `key`.
@ -68,11 +51,9 @@ pub mod perm_json_p {
pub fn seek_leaf_values_in_object(
value: &Map<String, Value>,
selectors: Option<&[&str]>,
skip_selectors: &[&str],
base_key: &str,
base_depth: Depth,
seeker: &mut impl FnMut(&str, Depth, &Value) -> Result<()>,
seeker: &mut impl FnMut(&str, Depth, &Value) -> Result<PatternMatch>,
) -> Result<()> {
if value.is_empty() {
seeker(base_key, base_depth, &Value::Object(Map::with_capacity(0)))?;
@ -85,40 +66,16 @@ pub mod perm_json_p {
format!("{}{}{}", base_key, SPLIT_SYMBOL, key)
};
// here if the user only specified `doggo` we need to iterate in all the fields of `doggo`
// so we check the contained_in on both side
let selection = select_field(&base_key, selectors, skip_selectors);
if selection != Selection::Skip {
let selection = seeker(&base_key, Depth::OnBaseKey, value)?;
if selection != PatternMatch::NoMatch {
match value {
Value::Object(object) => {
if selection == Selection::Select {
seeker(&base_key, Depth::OnBaseKey, value)?;
}
seek_leaf_values_in_object(
object,
selectors,
skip_selectors,
&base_key,
Depth::OnBaseKey,
seeker,
)
seek_leaf_values_in_object(object, &base_key, Depth::OnBaseKey, seeker)
}
Value::Array(array) => {
if selection == Selection::Select {
seeker(&base_key, Depth::OnBaseKey, value)?;
}
seek_leaf_values_in_array(
array,
selectors,
skip_selectors,
&base_key,
Depth::OnBaseKey,
seeker,
)
seek_leaf_values_in_array(array, &base_key, Depth::OnBaseKey, seeker)
}
value => seeker(&base_key, Depth::OnBaseKey, value),
_ => Ok(()),
}?;
}
}
@ -128,11 +85,9 @@ pub mod perm_json_p {
pub fn seek_leaf_values_in_array(
values: &[Value],
selectors: Option<&[&str]>,
skip_selectors: &[&str],
base_key: &str,
base_depth: Depth,
seeker: &mut impl FnMut(&str, Depth, &Value) -> Result<()>,
seeker: &mut impl FnMut(&str, Depth, &Value) -> Result<PatternMatch>,
) -> Result<()> {
if values.is_empty() {
seeker(base_key, base_depth, &Value::Array(vec![]))?;
@ -140,61 +95,16 @@ pub mod perm_json_p {
for value in values {
match value {
Value::Object(object) => seek_leaf_values_in_object(
object,
selectors,
skip_selectors,
base_key,
Depth::InsideArray,
seeker,
),
Value::Array(array) => seek_leaf_values_in_array(
array,
selectors,
skip_selectors,
base_key,
Depth::InsideArray,
seeker,
),
value => seeker(base_key, Depth::InsideArray, value),
Value::Object(object) => {
seek_leaf_values_in_object(object, base_key, Depth::InsideArray, seeker)
}
Value::Array(array) => {
seek_leaf_values_in_array(array, base_key, Depth::InsideArray, seeker)
}
value => seeker(base_key, Depth::InsideArray, value).map(|_| ()),
}?;
}
Ok(())
}
pub fn select_field(
field_name: &str,
selectors: Option<&[&str]>,
skip_selectors: &[&str],
) -> Selection {
if skip_selectors.iter().any(|skip_selector| {
contained_in(skip_selector, field_name) || contained_in(field_name, skip_selector)
}) {
Selection::Skip
} else if let Some(selectors) = selectors {
let mut selection = Selection::Skip;
for selector in selectors {
if contained_in(field_name, selector) {
selection = Selection::Select;
break;
} else if contained_in(selector, field_name) {
selection = Selection::Parent;
}
}
selection
} else {
Selection::Select
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Selection {
/// The field is a parent of the of a nested field that must be selected
Parent,
/// The field must be selected
Select,
/// The field must be skipped
Skip,
}
}

View File

@ -5,8 +5,8 @@ use std::ops::DerefMut as _;
use bumpalo::collections::vec::Vec as BumpVec;
use bumpalo::Bump;
use heed::RoTxn;
use super::match_searchable_field;
use super::tokenize_document::{tokenizer_builder, DocumentTokenizer};
use crate::update::new::extract::cache::BalancedCaches;
use crate::update::new::extract::perm_json_p::contained_in;
@ -17,8 +17,7 @@ use crate::update::new::ref_cell_ext::RefCellExt as _;
use crate::update::new::steps::IndexingStep;
use crate::update::new::thread_local::{FullySend, MostlySend, ThreadLocal};
use crate::update::new::DocumentChange;
use crate::update::GrenadParameters;
use crate::{bucketed_position, DocumentId, FieldId, Index, Result, MAX_POSITION_PER_ATTRIBUTE};
use crate::{bucketed_position, DocumentId, FieldId, Result, MAX_POSITION_PER_ATTRIBUTE};
const MAX_COUNTED_WORDS: usize = 30;
@ -207,9 +206,10 @@ impl<'extractor> WordDocidsCaches<'extractor> {
}
pub struct WordDocidsExtractorData<'a> {
tokenizer: &'a DocumentTokenizer<'a>,
grenad_parameters: &'a GrenadParameters,
tokenizer: DocumentTokenizer<'a>,
max_memory_by_thread: Option<usize>,
buckets: usize,
searchable_attributes: Option<Vec<&'a str>>,
}
impl<'a, 'extractor> Extractor<'extractor> for WordDocidsExtractorData<'a> {
@ -218,7 +218,7 @@ impl<'a, 'extractor> Extractor<'extractor> for WordDocidsExtractorData<'a> {
fn init_data(&self, extractor_alloc: &'extractor Bump) -> Result<Self::Data> {
Ok(RefCell::new(Some(WordDocidsBalancedCaches::new_in(
self.buckets,
self.grenad_parameters.max_memory_by_thread(),
self.max_memory_by_thread,
extractor_alloc,
))))
}
@ -230,7 +230,12 @@ impl<'a, 'extractor> Extractor<'extractor> for WordDocidsExtractorData<'a> {
) -> Result<()> {
for change in changes {
let change = change?;
WordDocidsExtractors::extract_document_change(context, self.tokenizer, change)?;
WordDocidsExtractors::extract_document_change(
context,
&self.tokenizer,
self.searchable_attributes.as_deref(),
change,
)?;
}
Ok(())
}
@ -248,52 +253,42 @@ impl WordDocidsExtractors {
where
MSP: Fn() -> bool + Sync,
{
let index = indexing_context.index;
let rtxn = index.read_txn()?;
let stop_words = index.stop_words(&rtxn)?;
let allowed_separators = index.allowed_separators(&rtxn)?;
// Warning: this is duplicated code from extract_word_pair_proximity_docids.rs
let rtxn = indexing_context.index.read_txn()?;
let stop_words = indexing_context.index.stop_words(&rtxn)?;
let allowed_separators = indexing_context.index.allowed_separators(&rtxn)?;
let allowed_separators: Option<Vec<_>> =
allowed_separators.as_ref().map(|s| s.iter().map(String::as_str).collect());
let dictionary = index.dictionary(&rtxn)?;
let dictionary = indexing_context.index.dictionary(&rtxn)?;
let dictionary: Option<Vec<_>> =
dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
let builder = tokenizer_builder(
let mut builder = tokenizer_builder(
stop_words.as_ref(),
allowed_separators.as_deref(),
dictionary.as_deref(),
);
let tokenizer = builder.into_tokenizer();
let attributes_to_extract = Self::attributes_to_extract(&rtxn, index)?;
let attributes_to_skip = Self::attributes_to_skip(&rtxn, index)?;
let tokenizer = builder.build();
let localized_attributes_rules =
index.localized_attributes_rules(&rtxn)?.unwrap_or_default();
indexing_context.index.localized_attributes_rules(&rtxn)?.unwrap_or_default();
let document_tokenizer = DocumentTokenizer {
tokenizer: &tokenizer,
attribute_to_extract: attributes_to_extract.as_deref(),
attribute_to_skip: attributes_to_skip.as_slice(),
localized_attributes_rules: &localized_attributes_rules,
max_positions_per_attributes: MAX_POSITION_PER_ATTRIBUTE,
};
let extractor_data = WordDocidsExtractorData {
tokenizer: document_tokenizer,
max_memory_by_thread: indexing_context.grenad_parameters.max_memory_by_thread(),
buckets: rayon::current_num_threads(),
searchable_attributes: indexing_context.index.user_defined_searchable_fields(&rtxn)?,
};
let datastore = ThreadLocal::new();
{
let span =
tracing::trace_span!(target: "indexing::documents::extract", "docids_extraction");
let _entered = span.enter();
let extractor = WordDocidsExtractorData {
tokenizer: &document_tokenizer,
grenad_parameters: indexing_context.grenad_parameters,
buckets: rayon::current_num_threads(),
};
extract(
document_changes,
&extractor,
&extractor_data,
indexing_context,
extractor_allocs,
&datastore,
@ -312,6 +307,7 @@ impl WordDocidsExtractors {
fn extract_document_change(
context: &DocumentChangeContext<RefCell<Option<WordDocidsBalancedCaches>>>,
document_tokenizer: &DocumentTokenizer,
searchable_attributes: Option<&[&str]>,
document_change: DocumentChange,
) -> Result<()> {
let index = &context.index;
@ -345,7 +341,9 @@ impl WordDocidsExtractors {
}
DocumentChange::Update(inner) => {
if !inner.has_changed_for_fields(
document_tokenizer.attribute_to_extract,
&mut |field_name: &str| {
match_searchable_field(field_name, searchable_attributes)
},
&context.rtxn,
context.index,
context.db_fields_ids_map,
@ -408,15 +406,4 @@ impl WordDocidsExtractors {
let mut buffer = BumpVec::with_capacity_in(buffer_size, &context.doc_alloc);
cached_sorter.flush_fid_word_count(&mut buffer)
}
fn attributes_to_extract<'a>(
rtxn: &'a RoTxn,
index: &'a Index,
) -> Result<Option<Vec<&'a str>>> {
index.user_defined_searchable_fields(rtxn).map_err(Into::into)
}
fn attributes_to_skip<'a>(_rtxn: &'a RoTxn, _index: &'a Index) -> Result<Vec<&'a str>> {
Ok(Vec::new())
}
}

View File

@ -2,30 +2,114 @@ use std::cell::RefCell;
use std::collections::VecDeque;
use std::rc::Rc;
use heed::RoTxn;
use bumpalo::Bump;
use super::tokenize_document::DocumentTokenizer;
use super::SearchableExtractor;
use super::match_searchable_field;
use super::tokenize_document::{tokenizer_builder, DocumentTokenizer};
use crate::proximity::{index_proximity, MAX_DISTANCE};
use crate::update::new::document::Document;
use crate::update::new::extract::cache::BalancedCaches;
use crate::update::new::indexer::document_changes::DocumentChangeContext;
use crate::update::new::indexer::document_changes::{
extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext,
};
use crate::update::new::ref_cell_ext::RefCellExt as _;
use crate::update::new::steps::IndexingStep;
use crate::update::new::thread_local::{FullySend, ThreadLocal};
use crate::update::new::DocumentChange;
use crate::{FieldId, GlobalFieldsIdsMap, Index, Result};
use crate::{FieldId, GlobalFieldsIdsMap, Result, MAX_POSITION_PER_ATTRIBUTE};
pub struct WordPairProximityDocidsExtractorData<'a> {
tokenizer: DocumentTokenizer<'a>,
searchable_attributes: Option<Vec<&'a str>>,
max_memory_by_thread: Option<usize>,
buckets: usize,
}
impl<'a, 'extractor> Extractor<'extractor> for WordPairProximityDocidsExtractorData<'a> {
type Data = RefCell<BalancedCaches<'extractor>>;
fn init_data(&self, extractor_alloc: &'extractor Bump) -> Result<Self::Data> {
Ok(RefCell::new(BalancedCaches::new_in(
self.buckets,
self.max_memory_by_thread,
extractor_alloc,
)))
}
fn process<'doc>(
&self,
changes: impl Iterator<Item = Result<DocumentChange<'doc>>>,
context: &DocumentChangeContext<Self::Data>,
) -> Result<()> {
for change in changes {
let change = change?;
WordPairProximityDocidsExtractor::extract_document_change(
context,
&self.tokenizer,
self.searchable_attributes.as_deref(),
change,
)?;
}
Ok(())
}
}
pub struct WordPairProximityDocidsExtractor;
impl SearchableExtractor for WordPairProximityDocidsExtractor {
fn attributes_to_extract<'a>(
rtxn: &'a RoTxn,
index: &'a Index,
) -> Result<Option<Vec<&'a str>>> {
index.user_defined_searchable_fields(rtxn).map_err(Into::into)
}
impl WordPairProximityDocidsExtractor {
pub fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
document_changes: &DC,
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
step: IndexingStep,
) -> Result<Vec<BalancedCaches<'extractor>>>
where
MSP: Fn() -> bool + Sync,
{
// Warning: this is duplicated code from extract_word_docids.rs
let rtxn = indexing_context.index.read_txn()?;
let stop_words = indexing_context.index.stop_words(&rtxn)?;
let allowed_separators = indexing_context.index.allowed_separators(&rtxn)?;
let allowed_separators: Option<Vec<_>> =
allowed_separators.as_ref().map(|s| s.iter().map(String::as_str).collect());
let dictionary = indexing_context.index.dictionary(&rtxn)?;
let dictionary: Option<Vec<_>> =
dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
let mut builder = tokenizer_builder(
stop_words.as_ref(),
allowed_separators.as_deref(),
dictionary.as_deref(),
);
let tokenizer = builder.build();
let localized_attributes_rules =
indexing_context.index.localized_attributes_rules(&rtxn)?.unwrap_or_default();
let document_tokenizer = DocumentTokenizer {
tokenizer: &tokenizer,
localized_attributes_rules: &localized_attributes_rules,
max_positions_per_attributes: MAX_POSITION_PER_ATTRIBUTE,
};
let extractor_data = WordPairProximityDocidsExtractorData {
tokenizer: document_tokenizer,
searchable_attributes: indexing_context.index.user_defined_searchable_fields(&rtxn)?,
max_memory_by_thread: indexing_context.grenad_parameters.max_memory_by_thread(),
buckets: rayon::current_num_threads(),
};
let datastore = ThreadLocal::new();
{
let span =
tracing::trace_span!(target: "indexing::documents::extract", "docids_extraction");
let _entered = span.enter();
extract(
document_changes,
&extractor_data,
indexing_context,
extractor_allocs,
&datastore,
step,
)?;
}
fn attributes_to_skip<'a>(_rtxn: &'a RoTxn, _index: &'a Index) -> Result<Vec<&'a str>> {
Ok(Vec::new())
Ok(datastore.into_iter().map(RefCell::into_inner).collect())
}
// This method is reimplemented to count the number of words in the document in each field
@ -34,6 +118,7 @@ impl SearchableExtractor for WordPairProximityDocidsExtractor {
fn extract_document_change(
context: &DocumentChangeContext<RefCell<BalancedCaches>>,
document_tokenizer: &DocumentTokenizer,
searchable_attributes: Option<&[&str]>,
document_change: DocumentChange,
) -> Result<()> {
let doc_alloc = &context.doc_alloc;
@ -71,7 +156,9 @@ impl SearchableExtractor for WordPairProximityDocidsExtractor {
}
DocumentChange::Update(inner) => {
if !inner.has_changed_for_fields(
document_tokenizer.attribute_to_extract,
&mut |field_name: &str| {
match_searchable_field(field_name, searchable_attributes)
},
rtxn,
index,
context.db_fields_ids_map,

View File

@ -2,145 +2,28 @@ mod extract_word_docids;
mod extract_word_pair_proximity_docids;
mod tokenize_document;
use std::cell::RefCell;
use std::marker::PhantomData;
use bumpalo::Bump;
pub use extract_word_docids::{WordDocidsCaches, WordDocidsExtractors};
pub use extract_word_pair_proximity_docids::WordPairProximityDocidsExtractor;
use heed::RoTxn;
use tokenize_document::{tokenizer_builder, DocumentTokenizer};
use super::cache::BalancedCaches;
use super::DocidsExtractor;
use crate::update::new::indexer::document_changes::{
extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext,
};
use crate::update::new::steps::IndexingStep;
use crate::update::new::thread_local::{FullySend, ThreadLocal};
use crate::update::new::DocumentChange;
use crate::update::GrenadParameters;
use crate::{Index, Result, MAX_POSITION_PER_ATTRIBUTE};
use crate::attribute_patterns::{match_field_legacy, PatternMatch};
pub struct SearchableExtractorData<'a, EX: SearchableExtractor> {
tokenizer: &'a DocumentTokenizer<'a>,
grenad_parameters: &'a GrenadParameters,
buckets: usize,
_ex: PhantomData<EX>,
}
pub fn match_searchable_field(
field_name: &str,
searchable_fields: Option<&[&str]>,
) -> PatternMatch {
let Some(searchable_fields) = searchable_fields else {
// If no searchable fields are provided, consider all fields as searchable
return PatternMatch::Match;
};
impl<'a, 'extractor, EX: SearchableExtractor + Sync> Extractor<'extractor>
for SearchableExtractorData<'a, EX>
{
type Data = RefCell<BalancedCaches<'extractor>>;
fn init_data(&self, extractor_alloc: &'extractor Bump) -> Result<Self::Data> {
Ok(RefCell::new(BalancedCaches::new_in(
self.buckets,
self.grenad_parameters.max_memory_by_thread(),
extractor_alloc,
)))
}
fn process<'doc>(
&self,
changes: impl Iterator<Item = Result<DocumentChange<'doc>>>,
context: &DocumentChangeContext<Self::Data>,
) -> Result<()> {
for change in changes {
let change = change?;
EX::extract_document_change(context, self.tokenizer, change)?;
let mut selection = PatternMatch::NoMatch;
for pattern in searchable_fields {
match match_field_legacy(pattern, field_name) {
PatternMatch::Match => return PatternMatch::Match,
PatternMatch::Parent => selection = PatternMatch::Parent,
PatternMatch::NoMatch => (),
}
Ok(())
}
}
pub trait SearchableExtractor: Sized + Sync {
fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
document_changes: &DC,
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
step: IndexingStep,
) -> Result<Vec<BalancedCaches<'extractor>>>
where
MSP: Fn() -> bool + Sync,
{
let rtxn = indexing_context.index.read_txn()?;
let stop_words = indexing_context.index.stop_words(&rtxn)?;
let allowed_separators = indexing_context.index.allowed_separators(&rtxn)?;
let allowed_separators: Option<Vec<_>> =
allowed_separators.as_ref().map(|s| s.iter().map(String::as_str).collect());
let dictionary = indexing_context.index.dictionary(&rtxn)?;
let dictionary: Option<Vec<_>> =
dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
let mut builder = tokenizer_builder(
stop_words.as_ref(),
allowed_separators.as_deref(),
dictionary.as_deref(),
);
let tokenizer = builder.build();
let attributes_to_extract = Self::attributes_to_extract(&rtxn, indexing_context.index)?;
let attributes_to_skip = Self::attributes_to_skip(&rtxn, indexing_context.index)?;
let localized_attributes_rules =
indexing_context.index.localized_attributes_rules(&rtxn)?.unwrap_or_default();
let document_tokenizer = DocumentTokenizer {
tokenizer: &tokenizer,
attribute_to_extract: attributes_to_extract.as_deref(),
attribute_to_skip: attributes_to_skip.as_slice(),
localized_attributes_rules: &localized_attributes_rules,
max_positions_per_attributes: MAX_POSITION_PER_ATTRIBUTE,
};
let extractor_data: SearchableExtractorData<Self> = SearchableExtractorData {
tokenizer: &document_tokenizer,
grenad_parameters: indexing_context.grenad_parameters,
buckets: rayon::current_num_threads(),
_ex: PhantomData,
};
let datastore = ThreadLocal::new();
{
let span =
tracing::trace_span!(target: "indexing::documents::extract", "docids_extraction");
let _entered = span.enter();
extract(
document_changes,
&extractor_data,
indexing_context,
extractor_allocs,
&datastore,
step,
)?;
}
Ok(datastore.into_iter().map(RefCell::into_inner).collect())
}
fn extract_document_change(
context: &DocumentChangeContext<RefCell<BalancedCaches>>,
document_tokenizer: &DocumentTokenizer,
document_change: DocumentChange,
) -> Result<()>;
fn attributes_to_extract<'a>(rtxn: &'a RoTxn, index: &'a Index)
-> Result<Option<Vec<&'a str>>>;
fn attributes_to_skip<'a>(rtxn: &'a RoTxn, index: &'a Index) -> Result<Vec<&'a str>>;
}
impl<T: SearchableExtractor> DocidsExtractor for T {
fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
document_changes: &DC,
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
step: IndexingStep,
) -> Result<Vec<BalancedCaches<'extractor>>>
where
MSP: Fn() -> bool + Sync,
{
Self::run_extraction(document_changes, indexing_context, extractor_allocs, step)
}
selection
}

View File

@ -3,9 +3,10 @@ use std::collections::HashMap;
use charabia::{SeparatorKind, Token, TokenKind, Tokenizer, TokenizerBuilder};
use serde_json::Value;
use crate::attribute_patterns::PatternMatch;
use crate::update::new::document::Document;
use crate::update::new::extract::perm_json_p::{
seek_leaf_values_in_array, seek_leaf_values_in_object, select_field, Depth, Selection,
seek_leaf_values_in_array, seek_leaf_values_in_object, Depth,
};
use crate::{
FieldId, GlobalFieldsIdsMap, InternalError, LocalizedAttributesRule, Result, UserError,
@ -17,8 +18,6 @@ const MAX_DISTANCE: u32 = 8;
pub struct DocumentTokenizer<'a> {
pub tokenizer: &'a Tokenizer<'a>,
pub attribute_to_extract: Option<&'a [&'a str]>,
pub attribute_to_skip: &'a [&'a str],
pub localized_attributes_rules: &'a [LocalizedAttributesRule],
pub max_positions_per_attributes: u32,
}
@ -31,87 +30,94 @@ impl<'a> DocumentTokenizer<'a> {
token_fn: &mut impl FnMut(&str, FieldId, u16, &str) -> Result<()>,
) -> Result<()> {
let mut field_position = HashMap::new();
let mut tokenize_field = |field_name: &str, _depth, value: &Value| {
let Some((field_id, meta)) = field_id_map.id_with_metadata_or_insert(field_name) else {
return Err(UserError::AttributeLimitReached.into());
};
if meta.is_searchable() {
self.tokenize_field(field_id, field_name, value, token_fn, &mut field_position)?;
}
// todo: should be a match on the field_name using `match_field_legacy` function,
// but for legacy reasons we iterate over all the fields to fill the field_id_map.
Ok(PatternMatch::Match)
};
for entry in document.iter_top_level_fields() {
let (field_name, value) = entry?;
let mut tokenize_field = |field_name: &str, _depth, value: &Value| {
let Some(field_id) = field_id_map.id_or_insert(field_name) else {
return Err(UserError::AttributeLimitReached.into());
};
if select_field(field_name, self.attribute_to_extract, self.attribute_to_skip)
!= Selection::Select
{
return Ok(());
}
let position = field_position
.entry(field_id)
.and_modify(|counter| *counter += MAX_DISTANCE)
.or_insert(0);
if *position >= self.max_positions_per_attributes {
return Ok(());
}
let text;
let tokens = match value {
Value::Number(n) => {
text = n.to_string();
self.tokenizer.tokenize(text.as_str())
}
Value::Bool(b) => {
text = b.to_string();
self.tokenizer.tokenize(text.as_str())
}
Value::String(text) => {
let locales = self
.localized_attributes_rules
.iter()
.find(|rule| rule.match_str(field_name))
.map(|rule| rule.locales());
self.tokenizer.tokenize_with_allow_list(text.as_str(), locales)
}
_ => return Ok(()),
};
// create an iterator of token with their positions.
let tokens = process_tokens(*position, tokens)
.take_while(|(p, _)| *p < self.max_positions_per_attributes);
for (index, token) in tokens {
// keep a word only if it is not empty and fit in a LMDB key.
let token = token.lemma().trim();
if !token.is_empty() && token.len() <= MAX_WORD_LENGTH {
*position = index;
if let Ok(position) = (*position).try_into() {
token_fn(field_name, field_id, position, token)?;
}
}
}
Ok(())
};
// parse json.
match serde_json::to_value(value).map_err(InternalError::SerdeJson)? {
Value::Object(object) => seek_leaf_values_in_object(
&object,
None,
&[],
field_name,
Depth::OnBaseKey,
&mut tokenize_field,
)?,
Value::Array(array) => seek_leaf_values_in_array(
&array,
None,
&[],
field_name,
Depth::OnBaseKey,
&mut tokenize_field,
)?,
value => tokenize_field(field_name, Depth::OnBaseKey, &value)?,
value => {
tokenize_field(field_name, Depth::OnBaseKey, &value)?;
}
}
}
Ok(())
}
fn tokenize_field(
&self,
field_id: FieldId,
field_name: &str,
value: &Value,
token_fn: &mut impl FnMut(&str, u16, u16, &str) -> std::result::Result<(), crate::Error>,
field_position: &mut HashMap<u16, u32>,
) -> Result<()> {
let position = field_position
.entry(field_id)
.and_modify(|counter| *counter += MAX_DISTANCE)
.or_insert(0);
if *position >= self.max_positions_per_attributes {
return Ok(());
}
let text;
let tokens = match value {
Value::Number(n) => {
text = n.to_string();
self.tokenizer.tokenize(text.as_str())
}
Value::Bool(b) => {
text = b.to_string();
self.tokenizer.tokenize(text.as_str())
}
Value::String(text) => {
let locales = self
.localized_attributes_rules
.iter()
.find(|rule| rule.match_str(field_name) == PatternMatch::Match)
.map(|rule| rule.locales());
self.tokenizer.tokenize_with_allow_list(text.as_str(), locales)
}
_ => return Ok(()),
};
// create an iterator of token with their positions.
let tokens = process_tokens(*position, tokens)
.take_while(|(p, _)| *p < self.max_positions_per_attributes);
for (index, token) in tokens {
// keep a word only if it is not empty and fit in a LMDB key.
let token = token.lemma().trim();
if !token.is_empty() && token.len() <= MAX_WORD_LENGTH {
*position = index;
if let Ok(position) = (*position).try_into() {
token_fn(field_name, field_id, position, token)?;
}
}
}
@ -215,15 +221,20 @@ mod test {
let mut tb = TokenizerBuilder::default();
let document_tokenizer = DocumentTokenizer {
tokenizer: &tb.build(),
attribute_to_extract: None,
attribute_to_skip: &["not-me", "me-nether.nope"],
localized_attributes_rules: &[],
max_positions_per_attributes: 1000,
};
let fields_ids_map = FieldIdMapWithMetadata::new(
fields_ids_map,
MetadataBuilder::new(Default::default(), Default::default(), Default::default(), None),
MetadataBuilder::new(
Default::default(),
Default::default(),
Default::default(),
None,
None,
Default::default(),
),
);
let fields_ids_map_lock = std::sync::RwLock::new(fields_ids_map);
@ -265,6 +276,10 @@ mod test {
2,
16,
]: "catto",
[
3,
0,
]: "unsearchable",
[
5,
0,
@ -277,6 +292,10 @@ mod test {
8,
0,
]: "23",
[
9,
0,
]: "unsearchable",
}
"###);
}

View File

@ -9,12 +9,14 @@ use heed::{BytesDecode, BytesEncode, RoTxn, RwTxn};
use super::fst_merger_builder::FstMergerBuilder;
use super::KvReaderDelAdd;
use crate::attribute_patterns::PatternMatch;
use crate::heed_codec::facet::FacetGroupKey;
use crate::update::del_add::{DelAdd, KvWriterDelAdd};
use crate::update::{create_sorter, MergeDeladdBtreesetString};
use crate::{
BEU16StrCodec, FieldId, GlobalFieldsIdsMap, Index, LocalizedAttributesRule, Result,
MAX_FACET_VALUE_LENGTH,
BEU16StrCodec, FieldId, FieldIdMapMissingEntry, FilterableAttributesFeatures,
FilterableAttributesRule, GlobalFieldsIdsMap, Index, InternalError, LocalizedAttributesRule,
Result, MAX_FACET_VALUE_LENGTH,
};
pub struct FacetSearchBuilder<'indexer> {
@ -22,6 +24,7 @@ pub struct FacetSearchBuilder<'indexer> {
normalized_facet_string_docids_sorter: Sorter<MergeDeladdBtreesetString>,
global_fields_ids_map: GlobalFieldsIdsMap<'indexer>,
localized_attributes_rules: Vec<LocalizedAttributesRule>,
filterable_attributes_rules: Vec<FilterableAttributesRule>,
// Buffered data below
buffer: Vec<u8>,
localized_field_ids: HashMap<FieldId, Option<Vec<Language>>>,
@ -31,6 +34,7 @@ impl<'indexer> FacetSearchBuilder<'indexer> {
pub fn new(
global_fields_ids_map: GlobalFieldsIdsMap<'indexer>,
localized_attributes_rules: Vec<LocalizedAttributesRule>,
filterable_attributes_rules: Vec<FilterableAttributesRule>,
) -> Self {
let registered_facets = HashMap::new();
let normalized_facet_string_docids_sorter = create_sorter(
@ -49,6 +53,7 @@ impl<'indexer> FacetSearchBuilder<'indexer> {
buffer: Vec::new(),
global_fields_ids_map,
localized_attributes_rules,
filterable_attributes_rules,
localized_field_ids: HashMap::new(),
}
}
@ -60,6 +65,13 @@ impl<'indexer> FacetSearchBuilder<'indexer> {
) -> Result<()> {
let FacetGroupKey { field_id, level: _level, left_bound } = facet_key;
let filterable_attributes_features = self.filterable_attributes_features(field_id)?;
// if facet search is disabled, we don't need to register the facet
if !filterable_attributes_features.is_facet_searchable() {
return Ok(());
};
if deladd == DelAdd::Addition {
self.registered_facets.entry(field_id).and_modify(|count| *count += 1).or_insert(1);
}
@ -83,6 +95,24 @@ impl<'indexer> FacetSearchBuilder<'indexer> {
Ok(())
}
fn filterable_attributes_features(
&mut self,
field_id: u16,
) -> Result<FilterableAttributesFeatures> {
let Some(filterable_attributes_features) =
self.global_fields_ids_map.metadata(field_id).map(|metadata| {
metadata.filterable_attributes_features(&self.filterable_attributes_rules)
})
else {
return Err(InternalError::FieldIdMapMissingEntry(FieldIdMapMissingEntry::FieldId {
field_id,
process: "facet_search_builder::register_from_key",
})
.into());
};
Ok(filterable_attributes_features)
}
fn locales(&mut self, field_id: FieldId) -> Option<&[Language]> {
if let Entry::Vacant(e) = self.localized_field_ids.entry(field_id) {
let Some(field_name) = self.global_fields_ids_map.name(field_id) else {
@ -92,7 +122,7 @@ impl<'indexer> FacetSearchBuilder<'indexer> {
let locales = self
.localized_attributes_rules
.iter()
.find(|rule| rule.match_str(field_name))
.find(|rule| rule.match_str(field_name) == PatternMatch::Match)
.map(|rule| rule.locales.clone());
e.insert(locales);

View File

@ -199,7 +199,7 @@ where
let span = tracing::trace_span!(target: "indexing::documents::extract", "word_pair_proximity_docids");
let _entered = span.enter();
<WordPairProximityDocidsExtractor as DocidsExtractor>::run_extraction(
WordPairProximityDocidsExtractor::run_extraction(
document_changes,
indexing_context,
extractor_allocs,

View File

@ -25,7 +25,7 @@ use crate::{GlobalFieldsIdsMap, Index, Result};
pub(super) fn post_process<MSP>(
indexing_context: IndexingContext<MSP>,
wtxn: &mut RwTxn<'_>,
global_fields_ids_map: GlobalFieldsIdsMap<'_>,
mut global_fields_ids_map: GlobalFieldsIdsMap<'_>,
facet_field_ids_delta: FacetFieldIdsDelta,
) -> Result<()>
where
@ -33,10 +33,8 @@ where
{
let index = indexing_context.index;
indexing_context.progress.update_progress(IndexingStep::PostProcessingFacets);
if index.facet_search(wtxn)? {
compute_facet_search_database(index, wtxn, global_fields_ids_map)?;
}
compute_facet_level_database(index, wtxn, facet_field_ids_delta)?;
compute_facet_level_database(index, wtxn, facet_field_ids_delta, &mut global_fields_ids_map)?;
compute_facet_search_database(index, wtxn, global_fields_ids_map)?;
indexing_context.progress.update_progress(IndexingStep::PostProcessingWords);
if let Some(prefix_delta) = compute_word_fst(index, wtxn)? {
compute_prefix_database(index, wtxn, prefix_delta, indexing_context.grenad_parameters)?;
@ -116,10 +114,18 @@ fn compute_facet_search_database(
global_fields_ids_map: GlobalFieldsIdsMap,
) -> Result<()> {
let rtxn = index.read_txn()?;
// if the facet search is not enabled, we can skip the rest of the function
if !index.facet_search(wtxn)? {
return Ok(());
}
let localized_attributes_rules = index.localized_attributes_rules(&rtxn)?;
let filterable_attributes_rules = index.filterable_attributes_rules(&rtxn)?;
let mut facet_search_builder = FacetSearchBuilder::new(
global_fields_ids_map,
localized_attributes_rules.unwrap_or_default(),
filterable_attributes_rules,
);
let previous_facet_id_string_docids = index
@ -164,8 +170,19 @@ fn compute_facet_level_database(
index: &Index,
wtxn: &mut RwTxn,
mut facet_field_ids_delta: FacetFieldIdsDelta,
global_fields_ids_map: &mut GlobalFieldsIdsMap,
) -> Result<()> {
let rtxn = index.read_txn()?;
let filterable_attributes_rules = index.filterable_attributes_rules(&rtxn)?;
for (fid, delta) in facet_field_ids_delta.consume_facet_string_delta() {
// skip field ids that should not be facet leveled
let Some(metadata) = global_fields_ids_map.metadata(fid) else {
continue;
};
if !metadata.require_facet_level_database(&filterable_attributes_rules) {
continue;
}
let span = tracing::trace_span!(target: "indexing::facet_field_ids", "string");
let _entered = span.enter();
match delta {

View File

@ -137,7 +137,6 @@ pub(super) fn update_index(
index.put_primary_key(wtxn, new_primary_key.name())?;
}
let mut inner_index_settings = InnerIndexSettings::from_index(index, wtxn, Some(embedders))?;
inner_index_settings.recompute_facets(wtxn, index)?;
inner_index_settings.recompute_searchables(wtxn, index)?;
index.put_field_distribution(wtxn, &field_distribution)?;
index.put_documents_ids(wtxn, &document_ids)?;

View File

@ -6,17 +6,20 @@ use std::sync::Arc;
use charabia::{Normalize, Tokenizer, TokenizerBuilder};
use deserr::{DeserializeError, Deserr};
use itertools::{EitherOrBoth, Itertools};
use itertools::{merge_join_by, EitherOrBoth, Itertools};
use roaring::RoaringBitmap;
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use time::OffsetDateTime;
use super::del_add::DelAddOperation;
use super::del_add::{DelAdd, DelAddOperation};
use super::index_documents::{IndexDocumentsConfig, Transform};
use super::IndexerConfig;
use crate::constants::{RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME};
use crate::attribute_patterns::PatternMatch;
use crate::constants::RESERVED_GEO_FIELD_NAME;
use crate::criterion::Criterion;
use crate::error::UserError;
use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
use crate::filterable_attributes_rules::match_faceted_field;
use crate::index::{
IndexEmbeddingConfig, PrefixSearch, DEFAULT_MIN_WORD_LEN_ONE_TYPO,
DEFAULT_MIN_WORD_LEN_TWO_TYPOS,
@ -31,7 +34,7 @@ use crate::vector::settings::{
SubEmbeddingSettings, WriteBackToDocuments,
};
use crate::vector::{Embedder, EmbeddingConfig, EmbeddingConfigs};
use crate::{FieldId, FieldsIdsMap, Index, LocalizedAttributesRule, LocalizedFieldIds, Result};
use crate::{FieldId, FilterableAttributesRule, Index, LocalizedAttributesRule, Result};
#[derive(Debug, Clone, PartialEq, Eq, Copy)]
pub enum Setting<T> {
@ -155,7 +158,7 @@ pub struct Settings<'a, 't, 'i> {
searchable_fields: Setting<Vec<String>>,
displayed_fields: Setting<Vec<String>>,
filterable_fields: Setting<HashSet<String>>,
filterable_fields: Setting<Vec<FilterableAttributesRule>>,
sortable_fields: Setting<HashSet<String>>,
criteria: Setting<Vec<Criterion>>,
stop_words: Setting<BTreeSet<String>>,
@ -241,8 +244,8 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
self.filterable_fields = Setting::Reset;
}
pub fn set_filterable_fields(&mut self, names: HashSet<String>) {
self.filterable_fields = Setting::Set(names);
pub fn set_filterable_fields(&mut self, rules: Vec<FilterableAttributesRule>) {
self.filterable_fields = Setting::Set(rules);
}
pub fn set_sortable_fields(&mut self, names: HashSet<String>) {
@ -516,7 +519,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
}
/// Updates the index's searchable attributes.
fn update_searchable(&mut self) -> Result<bool> {
fn update_user_defined_searchable_attributes(&mut self) -> Result<bool> {
match self.searchable_fields {
Setting::Set(ref fields) => {
// Check to see if the searchable fields changed before doing anything else
@ -529,26 +532,10 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
return Ok(false);
}
// Since we're updating the settings we can only add new fields at the end of the field id map
let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
// fields are deduplicated, only the first occurrence is taken into account
let names = fields.iter().unique().map(String::as_str).collect::<Vec<_>>();
// Add all the searchable attributes to the field map, and then add the
// remaining fields from the old field map to the new one
for name in names.iter() {
// The fields ids map won't change the field id of already present elements thus only the
// new fields will be inserted.
fields_ids_map.insert(name).ok_or(UserError::AttributeLimitReached)?;
}
self.index.put_all_searchable_fields_from_fields_ids_map(
self.wtxn,
&names,
&fields_ids_map.nested_ids(RESERVED_VECTORS_FIELD_NAME),
&fields_ids_map,
)?;
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
self.index.put_user_defined_searchable_fields(self.wtxn, &names)?;
Ok(true)
}
Setting::Reset => Ok(self.index.delete_all_searchable_fields(self.wtxn)?),
@ -760,14 +747,10 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
fn update_filterable(&mut self) -> Result<()> {
match self.filterable_fields {
Setting::Set(ref fields) => {
let mut new_facets = HashSet::new();
for name in fields {
new_facets.insert(name.clone());
}
self.index.put_filterable_fields(self.wtxn, &new_facets)?;
self.index.put_filterable_attributes_rules(self.wtxn, fields)?;
}
Setting::Reset => {
self.index.delete_filterable_fields(self.wtxn)?;
self.index.delete_filterable_attributes_rules(self.wtxn)?;
}
Setting::NotSet => (),
}
@ -1257,7 +1240,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
self.update_separator_tokens()?;
self.update_dictionary()?;
self.update_synonyms()?;
self.update_searchable()?;
self.update_user_defined_searchable_attributes()?;
self.update_exact_attributes()?;
self.update_proximity_precision()?;
self.update_prefix_search()?;
@ -1267,7 +1250,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
let embedding_config_updates = self.update_embedding_configs()?;
let mut new_inner_settings = InnerIndexSettings::from_index(self.index, self.wtxn, None)?;
new_inner_settings.recompute_facets(self.wtxn, self.index)?;
new_inner_settings.recompute_searchables(self.wtxn, self.index)?;
let primary_key_id = self
.index
@ -1319,8 +1302,8 @@ impl InnerIndexSettingsDiff {
settings_update_only: bool,
) -> Self {
let only_additional_fields = match (
&old_settings.user_defined_searchable_fields,
&new_settings.user_defined_searchable_fields,
&old_settings.user_defined_searchable_attributes,
&new_settings.user_defined_searchable_attributes,
) {
(None, None) | (Some(_), None) | (None, Some(_)) => None, // None means *
(Some(old), Some(new)) => {
@ -1342,14 +1325,14 @@ impl InnerIndexSettingsDiff {
|| old_settings.dictionary != new_settings.dictionary
|| old_settings.proximity_precision != new_settings.proximity_precision
|| old_settings.prefix_search != new_settings.prefix_search
|| old_settings.localized_searchable_fields_ids
!= new_settings.localized_searchable_fields_ids
|| old_settings.localized_attributes_rules
!= new_settings.localized_attributes_rules
};
let cache_exact_attributes = old_settings.exact_attributes != new_settings.exact_attributes;
let cache_user_defined_searchables = old_settings.user_defined_searchable_fields
!= new_settings.user_defined_searchable_fields;
let cache_user_defined_searchables = old_settings.user_defined_searchable_attributes
!= new_settings.user_defined_searchable_attributes;
// if the user-defined searchables changed, then we need to reindex prompts.
if cache_user_defined_searchables {
@ -1432,30 +1415,70 @@ impl InnerIndexSettingsDiff {
}
}
/// List the faceted fields from the inner fid map.
/// This is used to list the faceted fields when we are reindexing,
/// but it can't be used in document addition because the field id map must be exhaustive.
pub fn list_faceted_fields_from_fid_map(&self, del_add: DelAdd) -> BTreeSet<FieldId> {
let settings = match del_add {
DelAdd::Deletion => &self.old,
DelAdd::Addition => &self.new,
};
settings
.fields_ids_map
.iter_id_metadata()
.filter(|(_, metadata)| metadata.is_faceted(&settings.filterable_attributes_rules))
.map(|(id, _)| id)
.collect()
}
pub fn facet_fids_changed(&self) -> bool {
let existing_fields = &self.new.existing_fields;
if existing_fields.iter().any(|field| field.contains('.')) {
return true;
for eob in merge_join_by(
self.old.fields_ids_map.iter().filter(|(_, _, metadata)| {
metadata.is_faceted(&self.old.filterable_attributes_rules)
}),
self.new.fields_ids_map.iter().filter(|(_, _, metadata)| {
metadata.is_faceted(&self.new.filterable_attributes_rules)
}),
|(old_fid, _, _), (new_fid, _, _)| old_fid.cmp(new_fid),
) {
match eob {
// If there is a difference, we need to reindex facet databases.
EitherOrBoth::Left(_) | EitherOrBoth::Right(_) => return true,
// If the field is faceted in both old and new settings, we check the facet-searchable and facet level database.
EitherOrBoth::Both((_, _, old_metadata), (_, _, new_metadata)) => {
// Check if the field is facet-searchable in the old and new settings.
// If there is a difference, we need to reindex facet-search database.
let old_filterable_features = old_metadata
.filterable_attributes_features(&self.old.filterable_attributes_rules);
let new_filterable_features = new_metadata
.filterable_attributes_features(&self.new.filterable_attributes_rules);
let is_old_facet_searchable =
old_filterable_features.is_facet_searchable() && self.old.facet_search;
let is_new_facet_searchable =
new_filterable_features.is_facet_searchable() && self.new.facet_search;
if is_old_facet_searchable != is_new_facet_searchable {
return true;
}
// Check if the field needs a facet level database in the old and new settings.
// If there is a difference, we need to reindex facet level databases.
let old_facet_level_database = old_metadata
.require_facet_level_database(&self.old.filterable_attributes_rules);
let new_facet_level_database = new_metadata
.require_facet_level_database(&self.new.filterable_attributes_rules);
if old_facet_level_database != new_facet_level_database {
return true;
}
}
}
}
let old_faceted_fields = &self.old.user_defined_faceted_fields;
if old_faceted_fields.iter().any(|field| field.contains('.')) {
return true;
}
// If there is new faceted fields we indicate that we must reindex as we must
// index new fields as facets. It means that the distinct attribute,
// an Asc/Desc criterion or a filtered attribute as be added or removed.
let new_faceted_fields = &self.new.user_defined_faceted_fields;
if new_faceted_fields.iter().any(|field| field.contains('.')) {
return true;
}
(existing_fields - old_faceted_fields) != (existing_fields - new_faceted_fields)
false
}
pub fn global_facet_settings_changed(&self) -> bool {
self.old.localized_faceted_fields_ids != self.new.localized_faceted_fields_ids
self.old.localized_attributes_rules != self.new.localized_attributes_rules
|| self.old.facet_search != self.new.facet_search
}
@ -1475,10 +1498,6 @@ impl InnerIndexSettingsDiff {
self.old.geo_fields_ids != self.new.geo_fields_ids
|| (!self.settings_update_only && self.new.geo_fields_ids.is_some())
}
pub fn modified_faceted_fields(&self) -> HashSet<String> {
&self.old.user_defined_faceted_fields ^ &self.new.user_defined_faceted_fields
}
}
#[derive(Clone)]
@ -1486,20 +1505,17 @@ pub(crate) struct InnerIndexSettings {
pub stop_words: Option<fst::Set<Vec<u8>>>,
pub allowed_separators: Option<BTreeSet<String>>,
pub dictionary: Option<BTreeSet<String>>,
pub fields_ids_map: FieldsIdsMap,
pub user_defined_faceted_fields: HashSet<String>,
pub user_defined_searchable_fields: Option<Vec<String>>,
pub faceted_fields_ids: HashSet<FieldId>,
pub searchable_fields_ids: Vec<FieldId>,
pub fields_ids_map: FieldIdMapWithMetadata,
pub localized_attributes_rules: Vec<LocalizedAttributesRule>,
pub filterable_attributes_rules: Vec<FilterableAttributesRule>,
pub asc_desc_fields: HashSet<String>,
pub distinct_field: Option<String>,
pub user_defined_searchable_attributes: Option<Vec<String>>,
pub sortable_fields: HashSet<String>,
pub exact_attributes: HashSet<FieldId>,
pub proximity_precision: ProximityPrecision,
pub embedding_configs: EmbeddingConfigs,
pub existing_fields: HashSet<String>,
pub geo_fields_ids: Option<(FieldId, FieldId)>,
pub non_searchable_fields_ids: Vec<FieldId>,
pub non_faceted_fields_ids: Vec<FieldId>,
pub localized_searchable_fields_ids: LocalizedFieldIds,
pub localized_faceted_fields_ids: LocalizedFieldIds,
pub prefix_search: PrefixSearch,
pub facet_search: bool,
}
@ -1515,12 +1531,6 @@ impl InnerIndexSettings {
let allowed_separators = index.allowed_separators(rtxn)?;
let dictionary = index.dictionary(rtxn)?;
let mut fields_ids_map = index.fields_ids_map(rtxn)?;
let user_defined_searchable_fields = index.user_defined_searchable_fields(rtxn)?;
let user_defined_searchable_fields =
user_defined_searchable_fields.map(|sf| sf.into_iter().map(String::from).collect());
let user_defined_faceted_fields = index.user_defined_faceted_fields(rtxn)?;
let mut searchable_fields_ids = index.searchable_fields_ids(rtxn)?;
let mut faceted_fields_ids = index.faceted_fields_ids(rtxn)?;
let exact_attributes = index.exact_attributes_ids(rtxn)?;
let proximity_precision = index.proximity_precision(rtxn)?.unwrap_or_default();
let embedding_configs = match embedding_configs {
@ -1529,87 +1539,57 @@ impl InnerIndexSettings {
};
let prefix_search = index.prefix_search(rtxn)?.unwrap_or_default();
let facet_search = index.facet_search(rtxn)?;
let existing_fields: HashSet<_> = index
.field_distribution(rtxn)?
.into_iter()
.filter_map(|(field, count)| (count != 0).then_some(field))
.collect();
// index.fields_ids_map($a)? ==>> fields_ids_map
let geo_fields_ids = match fields_ids_map.id(RESERVED_GEO_FIELD_NAME) {
Some(gfid) => {
let is_sortable = index.sortable_fields_ids(rtxn)?.contains(&gfid);
let is_filterable = index.filterable_fields_ids(rtxn)?.contains(&gfid);
Some(_) if index.is_geo_enabled(rtxn)? => {
// if `_geo` is faceted then we get the `lat` and `lng`
if is_sortable || is_filterable {
let field_ids = fields_ids_map
.insert("_geo.lat")
.zip(fields_ids_map.insert("_geo.lng"))
.ok_or(UserError::AttributeLimitReached)?;
Some(field_ids)
} else {
None
}
let field_ids = fields_ids_map
.insert("_geo.lat")
.zip(fields_ids_map.insert("_geo.lng"))
.ok_or(UserError::AttributeLimitReached)?;
Some(field_ids)
}
None => None,
_ => None,
};
let localized_attributes_rules = index.localized_attributes_rules(rtxn)?;
let localized_searchable_fields_ids = LocalizedFieldIds::new(
&localized_attributes_rules,
&fields_ids_map,
searchable_fields_ids.iter().cloned(),
);
let localized_faceted_fields_ids = LocalizedFieldIds::new(
&localized_attributes_rules,
&fields_ids_map,
faceted_fields_ids.iter().cloned(),
);
let vectors_fids = fields_ids_map.nested_ids(RESERVED_VECTORS_FIELD_NAME);
searchable_fields_ids.retain(|id| !vectors_fids.contains(id));
faceted_fields_ids.retain(|id| !vectors_fids.contains(id));
let localized_attributes_rules =
index.localized_attributes_rules(rtxn)?.unwrap_or_default();
let filterable_attributes_rules = index.filterable_attributes_rules(rtxn)?;
let sortable_fields = index.sortable_fields(rtxn)?;
let asc_desc_fields = index.asc_desc_fields(rtxn)?;
let distinct_field = index.distinct_field(rtxn)?.map(|f| f.to_string());
let user_defined_searchable_attributes = index
.user_defined_searchable_fields(rtxn)?
.map(|fields| fields.into_iter().map(|f| f.to_string()).collect());
let builder = MetadataBuilder::from_index(index, rtxn)?;
let fields_ids_map = FieldIdMapWithMetadata::new(fields_ids_map, builder);
Ok(Self {
stop_words,
allowed_separators,
dictionary,
fields_ids_map,
user_defined_faceted_fields,
user_defined_searchable_fields,
faceted_fields_ids,
searchable_fields_ids,
localized_attributes_rules,
filterable_attributes_rules,
asc_desc_fields,
distinct_field,
user_defined_searchable_attributes,
sortable_fields,
exact_attributes,
proximity_precision,
embedding_configs,
existing_fields,
geo_fields_ids,
non_searchable_fields_ids: vectors_fids.clone(),
non_faceted_fields_ids: vectors_fids.clone(),
localized_searchable_fields_ids,
localized_faceted_fields_ids,
prefix_search,
facet_search,
})
}
// find and insert the new field ids
pub fn recompute_facets(&mut self, wtxn: &mut heed::RwTxn<'_>, index: &Index) -> Result<()> {
let new_facets = self
.fields_ids_map
.iter()
.filter(|(fid, _field)| !self.non_faceted_fields_ids.contains(fid))
.filter(|(_fid, field)| crate::is_faceted(field, &self.user_defined_faceted_fields))
.map(|(_fid, field)| field.to_string())
.collect();
index.put_faceted_fields(wtxn, &new_facets)?;
self.faceted_fields_ids = index.faceted_fields_ids(wtxn)?;
let localized_attributes_rules = index.localized_attributes_rules(wtxn)?;
self.localized_faceted_fields_ids = LocalizedFieldIds::new(
&localized_attributes_rules,
&self.fields_ids_map,
self.faceted_fields_ids.iter().cloned(),
);
Ok(())
pub fn match_faceted_field(&self, field: &str) -> PatternMatch {
match_faceted_field(
field,
&self.filterable_attributes_rules,
&self.sortable_fields,
&self.asc_desc_fields,
&self.distinct_field,
)
}
// find and insert the new field ids
@ -1619,7 +1599,7 @@ impl InnerIndexSettings {
index: &Index,
) -> Result<()> {
let searchable_fields = self
.user_defined_searchable_fields
.user_defined_searchable_attributes
.as_ref()
.map(|searchable| searchable.iter().map(|s| s.as_str()).collect::<Vec<_>>());
@ -1628,17 +1608,9 @@ impl InnerIndexSettings {
index.put_all_searchable_fields_from_fields_ids_map(
wtxn,
&searchable_fields,
&self.non_searchable_fields_ids,
&self.fields_ids_map,
)?;
}
self.searchable_fields_ids = index.searchable_fields_ids(wtxn)?;
let localized_attributes_rules = index.localized_attributes_rules(wtxn)?;
self.localized_searchable_fields_ids = LocalizedFieldIds::new(
&localized_attributes_rules,
&self.fields_ids_map,
self.searchable_fields_ids.iter().cloned(),
);
Ok(())
}

View File

@ -1,6 +1,6 @@
use big_s::S;
use heed::types::Bytes;
use maplit::{btreemap, btreeset, hashset};
use maplit::{btreemap, btreeset};
use meili_snap::snapshot;
use super::*;
@ -210,7 +210,7 @@ fn set_filterable_fields() {
// Set the filterable fields to be the age.
index
.update_settings(|settings| {
settings.set_filterable_fields(hashset! { S("age") });
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(S("age"))]);
})
.unwrap();
@ -225,8 +225,6 @@ fn set_filterable_fields() {
// Check that the displayed fields are correctly set.
let rtxn = index.read_txn().unwrap();
let fields_ids = index.filterable_fields(&rtxn).unwrap();
assert_eq!(fields_ids, hashset! { S("age") });
// Only count the field_id 0 and level 0 facet values.
// TODO we must support typed CSVs for numbers to be understood.
let fidmap = index.fields_ids_map(&rtxn).unwrap();
@ -268,15 +266,13 @@ fn set_filterable_fields() {
// Set the filterable fields to be the age and the name.
index
.update_settings(|settings| {
settings.set_filterable_fields(hashset! { S("age"), S("name") });
settings.set_filterable_fields(vec![
FilterableAttributesRule::Field(S("age")),
FilterableAttributesRule::Field(S("name")),
]);
})
.unwrap();
// Check that the displayed fields are correctly set.
let rtxn = index.read_txn().unwrap();
let fields_ids = index.filterable_fields(&rtxn).unwrap();
assert_eq!(fields_ids, hashset! { S("age"), S("name") });
let rtxn = index.read_txn().unwrap();
// Only count the field_id 2 and level 0 facet values.
let count = index
@ -300,15 +296,10 @@ fn set_filterable_fields() {
// Remove the age from the filterable fields.
index
.update_settings(|settings| {
settings.set_filterable_fields(hashset! { S("name") });
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(S("name"))]);
})
.unwrap();
// Check that the displayed fields are correctly set.
let rtxn = index.read_txn().unwrap();
let fields_ids = index.filterable_fields(&rtxn).unwrap();
assert_eq!(fields_ids, hashset! { S("name") });
let rtxn = index.read_txn().unwrap();
// Only count the field_id 2 and level 0 facet values.
let count = index
@ -637,7 +628,10 @@ fn setting_searchable_recomputes_other_settings() {
index
.update_settings(|settings| {
settings.set_displayed_fields(vec!["hello".to_string()]);
settings.set_filterable_fields(hashset! { S("age"), S("toto") });
settings.set_filterable_fields(vec![
FilterableAttributesRule::Field(S("age")),
FilterableAttributesRule::Field(S("toto")),
]);
settings.set_criteria(vec![Criterion::Asc(S("toto"))]);
})
.unwrap();
@ -754,7 +748,7 @@ fn setting_impact_relevancy() {
// Set the genres setting
index
.update_settings(|settings| {
settings.set_filterable_fields(hashset! { S("genres") });
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(S("genres"))]);
})
.unwrap();

View File

@ -1,13 +1,12 @@
use big_s::S;
use bumpalo::Bump;
use heed::EnvOpenOptions;
use maplit::hashset;
use milli::documents::mmap_from_objects;
use milli::progress::Progress;
use milli::update::new::indexer;
use milli::update::{IndexerConfig, Settings};
use milli::vector::EmbeddingConfigs;
use milli::{FacetDistribution, Index, Object, OrderBy};
use milli::{FacetDistribution, FilterableAttributesRule, Index, Object, OrderBy};
use serde_json::{from_value, json};
#[test]
@ -21,10 +20,10 @@ fn test_facet_distribution_with_no_facet_values() {
let config = IndexerConfig::default();
let mut builder = Settings::new(&mut wtxn, &index, &config);
builder.set_filterable_fields(hashset! {
S("genres"),
S("tags"),
});
builder.set_filterable_fields(vec![
FilterableAttributesRule::Field(S("genres")),
FilterableAttributesRule::Field(S("tags")),
]);
builder.execute(|_| (), || false).unwrap();
wtxn.commit().unwrap();

View File

@ -11,7 +11,9 @@ use milli::progress::Progress;
use milli::update::new::indexer;
use milli::update::{IndexerConfig, Settings};
use milli::vector::EmbeddingConfigs;
use milli::{AscDesc, Criterion, DocumentId, Index, Member, TermsMatchingStrategy};
use milli::{
AscDesc, Criterion, DocumentId, FilterableAttributesRule, Index, Member, TermsMatchingStrategy,
};
use serde::{Deserialize, Deserializer};
use slice_group_by::GroupBy;
@ -42,14 +44,14 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
let mut builder = Settings::new(&mut wtxn, &index, &config);
builder.set_criteria(criteria.to_vec());
builder.set_filterable_fields(hashset! {
S("tag"),
S("asc_desc_rank"),
S("_geo"),
S("opt1"),
S("opt1.opt2"),
S("tag_in")
});
builder.set_filterable_fields(vec![
FilterableAttributesRule::Field(S("tag")),
FilterableAttributesRule::Field(S("asc_desc_rank")),
FilterableAttributesRule::Field(S("_geo")),
FilterableAttributesRule::Field(S("opt1")),
FilterableAttributesRule::Field(S("opt1.opt2")),
FilterableAttributesRule::Field(S("tag_in")),
]);
builder.set_sortable_fields(hashset! {
S("tag"),
S("asc_desc_rank"),