mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-03-18 13:48:21 +01:00
Merge #5254
5254: Granular Filterable attribute settings r=ManyTheFish a=ManyTheFish # Related **Issue:** https://github.com/meilisearch/meilisearch/issues/5163 **PRD:** https://meilisearch.notion.site/API-usage-Settings-to-opt-out-indexing-features-filterableAttributes-1764b06b651f80aba8bdf359b2df3ca8 # Summary Change the `filterableAttributes` settings to let the user choose which facet feature he wants to activate or not. Deactivating a feature will avoid some database computation in the indexing process and save time and disk size. # Example `PATCH /indexes/:index_uid/settings` ```json { "filterableAttributes": [ { "patterns": [ "cattos", "doggos.age" ], "features": { "facetSearch": false, "filter": { "equality": true, "comparison": false } } } ] } ``` # Impact on the codebase - Settings API: - `/settings` - `/settings/filterable-attributes` - OpenAPI - may impact the LocalizedAttributesRules due to the AttributePatterns factorization - Database: - Filterable attributes format changed - Faceted field_ids are no more stored in the database - FieldIdsMap has no more unexisting fields - Search: - Search using filters - Facet search - `Attributes` ranking rule - Distinct attribute - Facet distribution - Settings reindexing: - searchable - facet - vector - geo - Document indexing: - searchable - facet - vector - geo - Dump import # Note for the reviewers The changes are huge and have been split in different commits with a dedicated explanation, I suggest reviewing the commit 1by1 Co-authored-by: ManyTheFish <many@meilisearch.com>
This commit is contained in:
commit
a2a86ef4e2
@ -12,7 +12,7 @@ use milli::progress::Progress;
|
||||
use milli::update::new::indexer;
|
||||
use milli::update::{IndexerConfig, Settings};
|
||||
use milli::vector::EmbeddingConfigs;
|
||||
use milli::Index;
|
||||
use milli::{FilterableAttributesRule, Index};
|
||||
use rand::seq::SliceRandom;
|
||||
use rand_chacha::rand_core::SeedableRng;
|
||||
use roaring::RoaringBitmap;
|
||||
@ -57,7 +57,8 @@ fn setup_settings<'t>(
|
||||
let searchable_fields = searchable_fields.iter().map(|s| s.to_string()).collect();
|
||||
builder.set_searchable_fields(searchable_fields);
|
||||
|
||||
let filterable_fields = filterable_fields.iter().map(|s| s.to_string()).collect();
|
||||
let filterable_fields =
|
||||
filterable_fields.iter().map(|s| FilterableAttributesRule::Field(s.to_string())).collect();
|
||||
builder.set_filterable_fields(filterable_fields);
|
||||
|
||||
let sortable_fields = sortable_fields.iter().map(|s| s.to_string()).collect();
|
||||
|
@ -2,7 +2,7 @@ mod datasets_paths;
|
||||
mod utils;
|
||||
|
||||
use criterion::{criterion_group, criterion_main};
|
||||
use milli::update::Settings;
|
||||
use milli::{update::Settings, FilterableAttributesRule};
|
||||
use utils::Conf;
|
||||
|
||||
#[cfg(not(windows))]
|
||||
@ -21,8 +21,10 @@ fn base_conf(builder: &mut Settings) {
|
||||
["name", "alternatenames", "elevation"].iter().map(|s| s.to_string()).collect();
|
||||
builder.set_searchable_fields(searchable_fields);
|
||||
|
||||
let filterable_fields =
|
||||
["_geo", "population", "elevation"].iter().map(|s| s.to_string()).collect();
|
||||
let filterable_fields = ["_geo", "population", "elevation"]
|
||||
.iter()
|
||||
.map(|s| FilterableAttributesRule::Field(s.to_string()))
|
||||
.collect();
|
||||
builder.set_filterable_fields(filterable_fields);
|
||||
|
||||
let sortable_fields =
|
||||
|
@ -2,7 +2,7 @@ mod datasets_paths;
|
||||
mod utils;
|
||||
|
||||
use criterion::{criterion_group, criterion_main};
|
||||
use milli::update::Settings;
|
||||
use milli::{update::Settings, FilterableAttributesRule};
|
||||
use utils::Conf;
|
||||
|
||||
#[cfg(not(windows))]
|
||||
@ -22,7 +22,7 @@ fn base_conf(builder: &mut Settings) {
|
||||
|
||||
let faceted_fields = ["released-timestamp", "duration-float", "genre", "country", "artist"]
|
||||
.iter()
|
||||
.map(|s| s.to_string())
|
||||
.map(|s| FilterableAttributesRule::Field(s.to_string()))
|
||||
.collect();
|
||||
builder.set_filterable_fields(faceted_fields);
|
||||
}
|
||||
|
@ -233,8 +233,8 @@ pub(crate) mod test {
|
||||
use meilisearch_types::features::{Network, Remote, RuntimeTogglableFeatures};
|
||||
use meilisearch_types::index_uid_pattern::IndexUidPattern;
|
||||
use meilisearch_types::keys::{Action, Key};
|
||||
use meilisearch_types::milli;
|
||||
use meilisearch_types::milli::update::Setting;
|
||||
use meilisearch_types::milli::{self, FilterableAttributesRule};
|
||||
use meilisearch_types::settings::{Checked, FacetingSettings, Settings};
|
||||
use meilisearch_types::task_view::DetailsView;
|
||||
use meilisearch_types::tasks::{Details, Kind, Status};
|
||||
@ -279,7 +279,10 @@ pub(crate) mod test {
|
||||
let settings = Settings {
|
||||
displayed_attributes: Setting::Set(vec![S("race"), S("name")]).into(),
|
||||
searchable_attributes: Setting::Set(vec![S("name"), S("race")]).into(),
|
||||
filterable_attributes: Setting::Set(btreeset! { S("race"), S("age") }),
|
||||
filterable_attributes: Setting::Set(vec![
|
||||
FilterableAttributesRule::Field(S("race")),
|
||||
FilterableAttributesRule::Field(S("age")),
|
||||
]),
|
||||
sortable_attributes: Setting::Set(btreeset! { S("age") }),
|
||||
ranking_rules: Setting::NotSet,
|
||||
stop_words: Setting::NotSet,
|
||||
|
@ -322,7 +322,16 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
|
||||
v6::Settings {
|
||||
displayed_attributes: v6::Setting::from(settings.displayed_attributes).into(),
|
||||
searchable_attributes: v6::Setting::from(settings.searchable_attributes).into(),
|
||||
filterable_attributes: settings.filterable_attributes.into(),
|
||||
filterable_attributes: match settings.filterable_attributes {
|
||||
v5::settings::Setting::Set(filterable_attributes) => v6::Setting::Set(
|
||||
filterable_attributes
|
||||
.into_iter()
|
||||
.map(v6::FilterableAttributesRule::Field)
|
||||
.collect(),
|
||||
),
|
||||
v5::settings::Setting::Reset => v6::Setting::Reset,
|
||||
v5::settings::Setting::NotSet => v6::Setting::NotSet,
|
||||
},
|
||||
sortable_attributes: settings.sortable_attributes.into(),
|
||||
ranking_rules: {
|
||||
match settings.ranking_rules {
|
||||
|
@ -46,6 +46,8 @@ pub type ResponseError = meilisearch_types::error::ResponseError;
|
||||
pub type Code = meilisearch_types::error::Code;
|
||||
pub type RankingRuleView = meilisearch_types::settings::RankingRuleView;
|
||||
|
||||
pub type FilterableAttributesRule = meilisearch_types::milli::FilterableAttributesRule;
|
||||
|
||||
pub struct V6Reader {
|
||||
dump: TempDir,
|
||||
instance_uid: Option<Uuid>,
|
||||
|
@ -30,6 +30,25 @@ pub enum Condition<'a> {
|
||||
StartsWith { keyword: Token<'a>, word: Token<'a> },
|
||||
}
|
||||
|
||||
impl Condition<'_> {
|
||||
pub fn operator(&self) -> &str {
|
||||
match self {
|
||||
Condition::GreaterThan(_) => ">",
|
||||
Condition::GreaterThanOrEqual(_) => ">=",
|
||||
Condition::Equal(_) => "=",
|
||||
Condition::NotEqual(_) => "!=",
|
||||
Condition::Null => "IS NULL",
|
||||
Condition::Empty => "IS EMPTY",
|
||||
Condition::Exists => "EXISTS",
|
||||
Condition::LowerThan(_) => "<",
|
||||
Condition::LowerThanOrEqual(_) => "<=",
|
||||
Condition::Between { .. } => "TO",
|
||||
Condition::Contains { .. } => "CONTAINS",
|
||||
Condition::StartsWith { .. } => "STARTS WITH",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// condition = value ("==" | ">" ...) value
|
||||
pub fn parse_condition(input: Span) -> IResult<FilterCondition> {
|
||||
let operator = alt((tag("<="), tag(">="), tag("!="), tag("<"), tag(">"), tag("=")));
|
||||
|
@ -1,13 +1,12 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch None:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
|
@ -1,13 +1,12 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch None:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
|
@ -1,17 +1,16 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch None:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
|
||||
2 {uid: 2, batch_uid: 2, status: succeeded, details: { received_document_ids: 1, deleted_documents: Some(1) }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1"] }}
|
||||
3 {uid: 3, batch_uid: 2, status: failed, error: ResponseError { code: 200, message: "Index `doggos`: Invalid type for filter subexpression: expected: String, Array, found: true.", error_code: "invalid_document_filter", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#invalid_document_filter" }, details: { original_filter: true, deleted_documents: Some(0) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: Bool(true) }}
|
||||
4 {uid: 4, batch_uid: 2, status: failed, error: ResponseError { code: 200, message: "Index `doggos`: Attribute `id` is not filterable. Available filterable attributes are: `catto`.\n1:3 id = 2", error_code: "invalid_document_filter", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#invalid_document_filter" }, details: { original_filter: "id = 2", deleted_documents: Some(0) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: String("id = 2") }}
|
||||
4 {uid: 4, batch_uid: 2, status: failed, error: ResponseError { code: 200, message: "Index `doggos`: Attribute `id` is not filterable. Available filterable attribute patterns are: `catto`.\n1:3 id = 2", error_code: "invalid_document_filter", error_type: "invalid_request", error_link: "https://docs.meilisearch.com/errors#invalid_document_filter" }, details: { original_filter: "id = 2", deleted_documents: Some(0) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: String("id = 2") }}
|
||||
5 {uid: 5, batch_uid: 2, status: succeeded, details: { original_filter: "catto EXISTS", deleted_documents: Some(1) }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: String("catto EXISTS") }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
|
@ -1,13 +1,12 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch None:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
0 {uid: 0, batch_uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 3, indexed_documents: Some(3) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
|
||||
2 {uid: 2, status: enqueued, details: { received_document_ids: 1, deleted_documents: None }, kind: DocumentDeletion { index_uid: "doggos", documents_ids: ["1"] }}
|
||||
3 {uid: 3, status: enqueued, details: { original_filter: true, deleted_documents: None }, kind: DocumentDeletionByFilter { index_uid: "doggos", filter_expr: Bool(true) }}
|
||||
|
@ -1,13 +1,12 @@
|
||||
---
|
||||
source: crates/index-scheduler/src/scheduler/test_failure.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
### Autobatching Enabled = true
|
||||
### Processing batch None:
|
||||
[]
|
||||
----------------------------------------------------------------------
|
||||
### All Tasks:
|
||||
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set({"catto"}), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: Set([Field("catto")]), sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: NotSet, search_cutoff_ms: NotSet, localized_attributes: NotSet, facet_search: NotSet, prefix_search: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
|
||||
1 {uid: 1, status: enqueued, details: { received_documents: 3, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 3, allow_index_creation: true }}
|
||||
----------------------------------------------------------------------
|
||||
### Status:
|
||||
|
@ -1,11 +1,11 @@
|
||||
use std::time::Instant;
|
||||
|
||||
use big_s::S;
|
||||
use maplit::btreeset;
|
||||
use meili_snap::snapshot;
|
||||
use meilisearch_types::milli::obkv_to_json;
|
||||
use meilisearch_types::milli::update::IndexDocumentsMethod::*;
|
||||
use meilisearch_types::milli::update::Setting;
|
||||
use meilisearch_types::milli::FilterableAttributesRule;
|
||||
use meilisearch_types::tasks::{Kind, KindWithContent};
|
||||
|
||||
use crate::insta_snapshot::snapshot_index_scheduler;
|
||||
@ -127,7 +127,8 @@ fn fail_in_process_batch_for_document_deletion() {
|
||||
|
||||
use meilisearch_types::settings::{Settings, Unchecked};
|
||||
let mut new_settings: Box<Settings<Unchecked>> = Box::default();
|
||||
new_settings.filterable_attributes = Setting::Set(btreeset!(S("catto")));
|
||||
new_settings.filterable_attributes =
|
||||
Setting::Set(vec![FilterableAttributesRule::Field(S("catto"))]);
|
||||
|
||||
index_scheduler
|
||||
.register(
|
||||
|
@ -414,6 +414,7 @@ impl ErrorCode for milli::Error {
|
||||
UserError::AttributeLimitReached => Code::MaxFieldsLimitExceeded,
|
||||
UserError::InvalidFilter(_) => Code::InvalidSearchFilter,
|
||||
UserError::InvalidFilterExpression(..) => Code::InvalidSearchFilter,
|
||||
UserError::FilterOperatorNotAllowed { .. } => Code::InvalidSearchFilter,
|
||||
UserError::MissingDocumentId { .. } => Code::MissingDocumentId,
|
||||
UserError::InvalidDocumentId { .. } | UserError::TooManyDocumentIds { .. } => {
|
||||
Code::InvalidDocumentId
|
||||
|
@ -1,5 +1,5 @@
|
||||
use deserr::Deserr;
|
||||
use milli::LocalizedAttributesRule;
|
||||
use milli::{AttributePatterns, LocalizedAttributesRule};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use utoipa::ToSchema;
|
||||
|
||||
@ -7,7 +7,7 @@ use utoipa::ToSchema;
|
||||
#[deserr(rename_all = camelCase)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct LocalizedAttributesRuleView {
|
||||
pub attribute_patterns: Vec<String>,
|
||||
pub attribute_patterns: AttributePatterns,
|
||||
pub locales: Vec<Locale>,
|
||||
}
|
||||
|
||||
|
@ -11,7 +11,7 @@ use fst::IntoStreamer;
|
||||
use milli::index::{IndexEmbeddingConfig, PrefixSearch};
|
||||
use milli::proximity::ProximityPrecision;
|
||||
use milli::update::Setting;
|
||||
use milli::{Criterion, CriterionError, Index, DEFAULT_VALUES_PER_FACET};
|
||||
use milli::{Criterion, CriterionError, FilterableAttributesRule, Index, DEFAULT_VALUES_PER_FACET};
|
||||
use serde::{Deserialize, Serialize, Serializer};
|
||||
use utoipa::ToSchema;
|
||||
|
||||
@ -202,8 +202,8 @@ pub struct Settings<T> {
|
||||
/// Attributes to use for faceting and filtering. See [Filtering and Faceted Search](https://www.meilisearch.com/docs/learn/filtering_and_sorting/search_with_facet_filters).
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSettingsFilterableAttributes>)]
|
||||
#[schema(value_type = Option<Vec<String>>, example = json!(["release_date", "genre"]))]
|
||||
pub filterable_attributes: Setting<BTreeSet<String>>,
|
||||
#[schema(value_type = Option<Vec<FilterableAttributesRule>>, example = json!(["release_date", "genre"]))]
|
||||
pub filterable_attributes: Setting<Vec<FilterableAttributesRule>>,
|
||||
/// Attributes to use when sorting search results.
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
#[deserr(default, error = DeserrJsonError<InvalidSettingsSortableAttributes>)]
|
||||
@ -791,7 +791,7 @@ pub fn settings(
|
||||
.user_defined_searchable_fields(rtxn)?
|
||||
.map(|fields| fields.into_iter().map(String::from).collect());
|
||||
|
||||
let filterable_attributes = index.filterable_fields(rtxn)?.into_iter().collect();
|
||||
let filterable_attributes = index.filterable_attributes_rules(rtxn)?.into_iter().collect();
|
||||
|
||||
let sortable_attributes = index.sortable_fields(rtxn)?.into_iter().collect();
|
||||
|
||||
|
@ -291,7 +291,7 @@ make_setting_routes!(
|
||||
{
|
||||
route: "/filterable-attributes",
|
||||
update_verb: put,
|
||||
value_type: std::collections::BTreeSet<String>,
|
||||
value_type: Vec<meilisearch_types::milli::FilterableAttributesRule>,
|
||||
err_type: meilisearch_types::deserr::DeserrJsonError<
|
||||
meilisearch_types::error::deserr_codes::InvalidSettingsFilterableAttributes,
|
||||
>,
|
||||
|
@ -8,6 +8,7 @@ use std::collections::{BTreeMap, BTreeSet, HashSet};
|
||||
use meilisearch_types::facet_values_sort::FacetValuesSort;
|
||||
use meilisearch_types::locales::{Locale, LocalizedAttributesRuleView};
|
||||
use meilisearch_types::milli::update::Setting;
|
||||
use meilisearch_types::milli::FilterableAttributesRule;
|
||||
use meilisearch_types::settings::{
|
||||
FacetingSettings, PaginationSettings, PrefixSearchSettings, ProximityPrecisionView,
|
||||
RankingRuleView, SettingEmbeddingSettings, TypoSettings,
|
||||
@ -89,6 +90,10 @@ impl Aggregate for SettingsAnalytics {
|
||||
filterable_attributes: FilterableAttributesAnalytics {
|
||||
total: new.filterable_attributes.total.or(self.filterable_attributes.total),
|
||||
has_geo: new.filterable_attributes.has_geo.or(self.filterable_attributes.has_geo),
|
||||
has_patterns: new
|
||||
.filterable_attributes
|
||||
.has_patterns
|
||||
.or(self.filterable_attributes.has_patterns),
|
||||
},
|
||||
distinct_attribute: DistinctAttributeAnalytics {
|
||||
set: self.distinct_attribute.set | new.distinct_attribute.set,
|
||||
@ -328,13 +333,19 @@ impl SortableAttributesAnalytics {
|
||||
pub struct FilterableAttributesAnalytics {
|
||||
pub total: Option<usize>,
|
||||
pub has_geo: Option<bool>,
|
||||
pub has_patterns: Option<bool>,
|
||||
}
|
||||
|
||||
impl FilterableAttributesAnalytics {
|
||||
pub fn new(setting: Option<&BTreeSet<String>>) -> Self {
|
||||
pub fn new(setting: Option<&Vec<FilterableAttributesRule>>) -> Self {
|
||||
Self {
|
||||
total: setting.as_ref().map(|filter| filter.len()),
|
||||
has_geo: setting.as_ref().map(|filter| filter.contains("_geo")),
|
||||
has_geo: setting
|
||||
.as_ref()
|
||||
.map(|filter| filter.iter().any(FilterableAttributesRule::has_geo)),
|
||||
has_patterns: setting.as_ref().map(|filter| {
|
||||
filter.iter().any(|rule| matches!(rule, FilterableAttributesRule::Pattern(_)))
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -9,6 +9,10 @@ use meilisearch_types::batches::BatchStats;
|
||||
use meilisearch_types::error::{Code, ErrorType, ResponseError};
|
||||
use meilisearch_types::index_uid::IndexUid;
|
||||
use meilisearch_types::keys::CreateApiKey;
|
||||
use meilisearch_types::milli::{
|
||||
AttributePatterns, FilterFeatures, FilterableAttributesFeatures, FilterableAttributesPatterns,
|
||||
FilterableAttributesRule,
|
||||
};
|
||||
use meilisearch_types::settings::{
|
||||
Checked, FacetingSettings, MinWordSizeTyposSetting, PaginationSettings, Settings, TypoSettings,
|
||||
Unchecked,
|
||||
@ -88,7 +92,7 @@ pub mod tasks;
|
||||
url = "/",
|
||||
description = "Local server",
|
||||
)),
|
||||
components(schemas(PaginationView<KeyView>, PaginationView<IndexView>, IndexView, DocumentDeletionByFilter, AllBatches, BatchStats, ProgressStepView, ProgressView, BatchView, RuntimeTogglableFeatures, SwapIndexesPayload, DocumentEditionByFunction, MergeFacets, FederationOptions, SearchQueryWithIndex, Federation, FederatedSearch, FederatedSearchResult, SearchResults, SearchResultWithIndex, SimilarQuery, SimilarResult, PaginationView<serde_json::Value>, BrowseQuery, UpdateIndexRequest, IndexUid, IndexCreateRequest, KeyView, Action, CreateApiKey, UpdateStderrLogs, LogMode, GetLogs, IndexStats, Stats, HealthStatus, HealthResponse, VersionResponse, Code, ErrorType, AllTasks, TaskView, Status, DetailsView, ResponseError, Settings<Unchecked>, Settings<Checked>, TypoSettings, MinWordSizeTyposSetting, FacetingSettings, PaginationSettings, SummarizedTaskView, Kind, Network, Remote))
|
||||
components(schemas(PaginationView<KeyView>, PaginationView<IndexView>, IndexView, DocumentDeletionByFilter, AllBatches, BatchStats, ProgressStepView, ProgressView, BatchView, RuntimeTogglableFeatures, SwapIndexesPayload, DocumentEditionByFunction, MergeFacets, FederationOptions, SearchQueryWithIndex, Federation, FederatedSearch, FederatedSearchResult, SearchResults, SearchResultWithIndex, SimilarQuery, SimilarResult, PaginationView<serde_json::Value>, BrowseQuery, UpdateIndexRequest, IndexUid, IndexCreateRequest, KeyView, Action, CreateApiKey, UpdateStderrLogs, LogMode, GetLogs, IndexStats, Stats, HealthStatus, HealthResponse, VersionResponse, Code, ErrorType, AllTasks, TaskView, Status, DetailsView, ResponseError, Settings<Unchecked>, Settings<Checked>, TypoSettings, MinWordSizeTyposSetting, FacetingSettings, PaginationSettings, SummarizedTaskView, Kind, Network, Remote, FilterableAttributesRule, FilterableAttributesPatterns, AttributePatterns, FilterableAttributesFeatures, FilterFeatures))
|
||||
)]
|
||||
pub struct MeilisearchApi;
|
||||
|
||||
|
@ -20,7 +20,7 @@ use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy};
|
||||
use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors;
|
||||
use meilisearch_types::milli::vector::Embedder;
|
||||
use meilisearch_types::milli::{
|
||||
FacetValueHit, InternalError, OrderBy, SearchForFacetValues, TimeBudget,
|
||||
FacetValueHit, InternalError, OrderBy, PatternMatch, SearchForFacetValues, TimeBudget,
|
||||
};
|
||||
use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
|
||||
use meilisearch_types::{milli, Document};
|
||||
@ -1538,8 +1538,9 @@ pub fn perform_facet_search(
|
||||
// If the facet string is not localized, we **ignore** the locales provided by the user because the facet data has no locale.
|
||||
// If the user does not provide locales, we use the locales of the facet string.
|
||||
let localized_attributes = index.localized_attributes_rules(&rtxn)?.unwrap_or_default();
|
||||
let localized_attributes_locales =
|
||||
localized_attributes.into_iter().find(|attr| attr.match_str(&facet_name));
|
||||
let localized_attributes_locales = localized_attributes
|
||||
.into_iter()
|
||||
.find(|attr| attr.match_str(&facet_name) == PatternMatch::Match);
|
||||
let locales = localized_attributes_locales.map(|attr| {
|
||||
attr.locales
|
||||
.into_iter()
|
||||
@ -1885,7 +1886,7 @@ fn format_fields(
|
||||
let locales = locales.or_else(|| {
|
||||
localized_attributes
|
||||
.iter()
|
||||
.find(|rule| rule.match_str(key))
|
||||
.find(|rule| rule.match_str(key) == PatternMatch::Match)
|
||||
.map(LocalizedAttributesRule::locales)
|
||||
});
|
||||
|
||||
|
@ -125,6 +125,12 @@ impl Server<Owned> {
|
||||
self.service.post("/indexes", body).await
|
||||
}
|
||||
|
||||
pub async fn delete_index(&self, uid: impl AsRef<str>) -> (Value, StatusCode) {
|
||||
let url = format!("/indexes/{}", urlencoding::encode(uid.as_ref()));
|
||||
let (value, code) = self.service.delete(url).await;
|
||||
(value, code)
|
||||
}
|
||||
|
||||
pub fn index_with_encoder(&self, uid: impl AsRef<str>, encoder: Encoder) -> Index<'_> {
|
||||
Index {
|
||||
uid: uid.as_ref().to_string(),
|
||||
|
@ -636,7 +636,7 @@ async fn delete_document_by_filter() {
|
||||
"originalFilter": "\"catto = jorts\""
|
||||
},
|
||||
"error": {
|
||||
"message": "Index `SHARED_DOCUMENTS`: Attribute `catto` is not filterable. Available filterable attributes are: `id`, `title`.\n1:6 catto = jorts",
|
||||
"message": "Index `SHARED_DOCUMENTS`: Attribute `catto` is not filterable. Available filterable attribute patterns are: `id`, `title`.\n1:6 catto = jorts",
|
||||
"code": "invalid_document_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
|
||||
@ -738,7 +738,7 @@ async fn fetch_document_by_filter() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Attribute `doggo` is not filterable. Available filterable attributes are: `color`.\n1:6 doggo = bernese",
|
||||
"message": "Attribute `doggo` is not filterable. Available filterable attribute patterns are: `color`.\n1:6 doggo = bernese",
|
||||
"code": "invalid_document_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
|
||||
|
@ -1,8 +1,10 @@
|
||||
use meili_snap::*;
|
||||
|
||||
use crate::common::{shared_does_not_exists_index, Server};
|
||||
use crate::common::{shared_does_not_exists_index, Server, DOCUMENTS, NESTED_DOCUMENTS};
|
||||
use crate::json;
|
||||
|
||||
use super::test_settings_documents_indexing_swapping_and_search;
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn search_unexisting_index() {
|
||||
let index = shared_does_not_exists_index().await;
|
||||
@ -430,7 +432,7 @@ async fn search_non_filterable_facets() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid facet distribution, attribute `doggo` is not filterable. The available filterable attribute is `title`.",
|
||||
"message": "Invalid facet distribution, attribute `doggo` is not filterable. The available filterable attribute pattern is `title`.",
|
||||
"code": "invalid_search_facets",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_facets"
|
||||
@ -441,7 +443,7 @@ async fn search_non_filterable_facets() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid facet distribution, attribute `doggo` is not filterable. The available filterable attribute is `title`.",
|
||||
"message": "Invalid facet distribution, attribute `doggo` is not filterable. The available filterable attribute pattern is `title`.",
|
||||
"code": "invalid_search_facets",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_facets"
|
||||
@ -461,7 +463,7 @@ async fn search_non_filterable_facets_multiple_filterable() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid facet distribution, attribute `doggo` is not filterable. The available filterable attributes are `genres, title`.",
|
||||
"message": "Invalid facet distribution, attribute `doggo` is not filterable. The available filterable attribute patterns are `genres, title`.",
|
||||
"code": "invalid_search_facets",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_facets"
|
||||
@ -472,7 +474,7 @@ async fn search_non_filterable_facets_multiple_filterable() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid facet distribution, attribute `doggo` is not filterable. The available filterable attributes are `genres, title`.",
|
||||
"message": "Invalid facet distribution, attribute `doggo` is not filterable. The available filterable attribute patterns are `genres, title`.",
|
||||
"code": "invalid_search_facets",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_facets"
|
||||
@ -522,7 +524,7 @@ async fn search_non_filterable_facets_multiple_facets() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid facet distribution, attributes `doggo, neko` are not filterable. The available filterable attributes are `genres, title`.",
|
||||
"message": "Invalid facet distribution, attributes `doggo, neko` are not filterable. The available filterable attribute patterns are `genres, title`.",
|
||||
"code": "invalid_search_facets",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_facets"
|
||||
@ -533,7 +535,7 @@ async fn search_non_filterable_facets_multiple_facets() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Invalid facet distribution, attributes `doggo, neko` are not filterable. The available filterable attributes are `genres, title`.",
|
||||
"message": "Invalid facet distribution, attributes `doggo, neko` are not filterable. The available filterable attribute patterns are `genres, title`.",
|
||||
"code": "invalid_search_facets",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_facets"
|
||||
@ -636,14 +638,11 @@ async fn search_bad_matching_strategy() {
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn filter_invalid_syntax_object() {
|
||||
let server = Server::new_shared();
|
||||
let index = server.unique_index();
|
||||
|
||||
let (task, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await;
|
||||
index.wait_task(task.uid()).await.succeeded();
|
||||
|
||||
index
|
||||
.search(json!({"filter": "title & Glass"}), |response, code| {
|
||||
test_settings_documents_indexing_swapping_and_search(
|
||||
&DOCUMENTS,
|
||||
&json!({"filterableAttributes": ["title"]}),
|
||||
&json!({"filter": "title & Glass"}),
|
||||
|response, code| {
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
|
||||
@ -653,20 +652,18 @@ async fn filter_invalid_syntax_object() {
|
||||
}
|
||||
"###);
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
})
|
||||
.await;
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn filter_invalid_syntax_array() {
|
||||
let server = Server::new_shared();
|
||||
let index = server.unique_index();
|
||||
|
||||
let (task, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await;
|
||||
index.wait_task(task.uid()).await.succeeded();
|
||||
|
||||
index
|
||||
.search(json!({"filter": ["title & Glass"]}), |response, code| {
|
||||
test_settings_documents_indexing_swapping_and_search(
|
||||
&DOCUMENTS,
|
||||
&json!({"filterableAttributes": ["title"]}),
|
||||
&json!({"filter": ["title & Glass"]}),
|
||||
|response, code| {
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `STARTS WITH`, `NOT STARTS WITH`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
|
||||
@ -676,206 +673,327 @@ async fn filter_invalid_syntax_array() {
|
||||
}
|
||||
"###);
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
})
|
||||
.await;
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn filter_invalid_syntax_string() {
|
||||
let server = Server::new_shared();
|
||||
let index = server.unique_index();
|
||||
|
||||
let (task, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await;
|
||||
index.wait_task(task.uid()).await.succeeded();
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "Found unexpected characters at the end of the filter: `XOR title = Glass`. You probably forgot an `OR` or an `AND` rule.\n15:32 title = Glass XOR title = Glass",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
});
|
||||
index
|
||||
.search(json!({"filter": "title = Glass XOR title = Glass"}), |response, code| {
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
})
|
||||
.await;
|
||||
test_settings_documents_indexing_swapping_and_search(
|
||||
&DOCUMENTS,
|
||||
&json!({"filterableAttributes": ["title"]}),
|
||||
&json!({"filter": "title = Glass XOR title = Glass"}),
|
||||
|response, code| {
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Found unexpected characters at the end of the filter: `XOR title = Glass`. You probably forgot an `OR` or an `AND` rule.\n15:32 title = Glass XOR title = Glass",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
}
|
||||
"###);
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn filter_invalid_attribute_array() {
|
||||
let server = Server::new_shared();
|
||||
let index = server.unique_index();
|
||||
|
||||
let (task, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await;
|
||||
index.wait_task(task.uid()).await.succeeded();
|
||||
|
||||
let expected_response = json!({
|
||||
"message": format!("Index `{}`: Attribute `many` is not filterable. Available filterable attributes are: `title`.\n1:5 many = Glass", index.uid),
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
});
|
||||
index
|
||||
.search(json!({"filter": ["many = Glass"]}), |response, code| {
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
})
|
||||
.await;
|
||||
test_settings_documents_indexing_swapping_and_search(
|
||||
&DOCUMENTS,
|
||||
&json!({"filterableAttributes": ["title"]}),
|
||||
&json!({"filter": ["many = Glass"]}),
|
||||
|response, code| {
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Index `test`: Attribute `many` is not filterable. Available filterable attribute patterns are: `title`.\n1:5 many = Glass",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
}
|
||||
"###);
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn filter_invalid_attribute_string() {
|
||||
let server = Server::new_shared();
|
||||
let index = server.unique_index();
|
||||
|
||||
let (task, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await;
|
||||
index.wait_task(task.uid()).await.succeeded();
|
||||
|
||||
let expected_response = json!({
|
||||
"message": format!("Index `{}`: Attribute `many` is not filterable. Available filterable attributes are: `title`.\n1:5 many = Glass", index.uid),
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
});
|
||||
index
|
||||
.search(json!({"filter": "many = Glass"}), |response, code| {
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
})
|
||||
.await;
|
||||
test_settings_documents_indexing_swapping_and_search(
|
||||
&DOCUMENTS,
|
||||
&json!({"filterableAttributes": ["title"]}),
|
||||
&json!({"filter": "many = Glass"}),
|
||||
|response, code| {
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Index `test`: Attribute `many` is not filterable. Available filterable attribute patterns are: `title`.\n1:5 many = Glass",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
}
|
||||
"###);
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn filter_reserved_geo_attribute_array() {
|
||||
let server = Server::new_shared();
|
||||
let index = server.unique_index();
|
||||
|
||||
let (task, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await;
|
||||
index.wait_task(task.uid()).await.succeeded();
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:13 _geo = Glass",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
});
|
||||
index
|
||||
.search(json!({"filter": ["_geo = Glass"]}), |response, code| {
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
})
|
||||
.await;
|
||||
test_settings_documents_indexing_swapping_and_search(
|
||||
&DOCUMENTS,
|
||||
&json!({"filterableAttributes": ["title"]}),
|
||||
&json!({"filter": ["_geo = Glass"]}),
|
||||
|response, code| {
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:13 _geo = Glass",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
}
|
||||
"###);
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn filter_reserved_geo_attribute_string() {
|
||||
let server = Server::new_shared();
|
||||
let index = server.unique_index();
|
||||
|
||||
let (task, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await;
|
||||
index.wait_task(task.uid()).await.succeeded();
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:13 _geo = Glass",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
});
|
||||
index
|
||||
.search(json!({"filter": "_geo = Glass"}), |response, code| {
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
})
|
||||
.await;
|
||||
test_settings_documents_indexing_swapping_and_search(
|
||||
&DOCUMENTS,
|
||||
&json!({"filterableAttributes": ["title"]}),
|
||||
&json!({"filter": "_geo = Glass"}),
|
||||
|response, code| {
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:13 _geo = Glass",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
}
|
||||
"###);
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn filter_reserved_attribute_array() {
|
||||
let server = Server::new_shared();
|
||||
let index = server.unique_index();
|
||||
|
||||
let (task, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await;
|
||||
index.wait_task(task.uid()).await.succeeded();
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:21 _geoDistance = Glass",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
});
|
||||
index
|
||||
.search(json!({"filter": ["_geoDistance = Glass"]}), |response, code| {
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
})
|
||||
.await;
|
||||
test_settings_documents_indexing_swapping_and_search(
|
||||
&DOCUMENTS,
|
||||
&json!({"filterableAttributes": ["title"]}),
|
||||
&json!({"filter": ["_geoDistance = Glass"]}),
|
||||
|response, code| {
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:21 _geoDistance = Glass",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
}
|
||||
"###);
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn filter_reserved_attribute_string() {
|
||||
let server = Server::new_shared();
|
||||
let index = server.unique_index();
|
||||
|
||||
let (task, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await;
|
||||
index.wait_task(task.uid()).await.succeeded();
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:21 _geoDistance = Glass",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
});
|
||||
index
|
||||
.search(json!({"filter": "_geoDistance = Glass"}), |response, code| {
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
})
|
||||
.await;
|
||||
test_settings_documents_indexing_swapping_and_search(
|
||||
&DOCUMENTS,
|
||||
&json!({"filterableAttributes": ["title"]}),
|
||||
&json!({"filter": "_geoDistance = Glass"}),
|
||||
|response, code| {
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:21 _geoDistance = Glass",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
}
|
||||
"###);
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn filter_reserved_geo_point_array() {
|
||||
let server = Server::new_shared();
|
||||
let index = server.unique_index();
|
||||
|
||||
let (task, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await;
|
||||
index.wait_task(task.uid()).await.succeeded();
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:18 _geoPoint = Glass",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
});
|
||||
index
|
||||
.search(json!({"filter": ["_geoPoint = Glass"]}), |response, code| {
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
})
|
||||
.await;
|
||||
test_settings_documents_indexing_swapping_and_search(
|
||||
&DOCUMENTS,
|
||||
&json!({"filterableAttributes": ["title"]}),
|
||||
&json!({"filter": ["_geoPoint = Glass"]}),
|
||||
|response, code| {
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:18 _geoPoint = Glass",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
}
|
||||
"###);
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn filter_reserved_geo_point_string() {
|
||||
let server = Server::new_shared();
|
||||
let index = server.unique_index();
|
||||
test_settings_documents_indexing_swapping_and_search(
|
||||
&DOCUMENTS,
|
||||
&json!({"filterableAttributes": ["title"]}),
|
||||
&json!({"filter": "_geoPoint = Glass"}),
|
||||
|response, code| {
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:18 _geoPoint = Glass",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
}
|
||||
"###);
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
let (task, _code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await;
|
||||
index.wait_task(task.uid()).await.succeeded();
|
||||
#[actix_rt::test]
|
||||
async fn search_with_pattern_filter_settings_errors() {
|
||||
// Check if the Equality filter works with patterns
|
||||
test_settings_documents_indexing_swapping_and_search(
|
||||
&NESTED_DOCUMENTS,
|
||||
&json!({"filterableAttributes": [{
|
||||
"attributePatterns": ["cattos","doggos.age"],
|
||||
"features": {
|
||||
"facetSearch": false,
|
||||
"filter": {"equality": false, "comparison": true}
|
||||
}
|
||||
}]}),
|
||||
&json!({
|
||||
"filter": "cattos = pésti"
|
||||
}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Index `test`: Filter operator `=` is not allowed for the attribute `cattos`.\n - Note: allowed operators: OR, AND, NOT, <, >, <=, >=, TO, IS EMPTY, IS NULL, EXISTS.\n - Note: field `cattos` matched rule #0 in `filterableAttributes`",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
}
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:18 _geoPoint = Glass",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
});
|
||||
index
|
||||
.search(json!({"filter": "_geoPoint = Glass"}), |response, code| {
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
})
|
||||
.await;
|
||||
test_settings_documents_indexing_swapping_and_search(
|
||||
&NESTED_DOCUMENTS,
|
||||
&json!({"filterableAttributes": [{
|
||||
"attributePatterns": ["cattos","doggos.age"],
|
||||
"features": {
|
||||
"facetSearch": false,
|
||||
"filter": {"equality": false, "comparison": true}
|
||||
}
|
||||
}]}),
|
||||
&json!({
|
||||
"filter": "cattos IN [pésti, simba]"
|
||||
}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Index `test`: Filter operator `=` is not allowed for the attribute `cattos`.\n - Note: allowed operators: OR, AND, NOT, <, >, <=, >=, TO, IS EMPTY, IS NULL, EXISTS.\n - Note: field `cattos` matched rule #0 in `filterableAttributes`",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
}
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
// Check if the Comparison filter works with patterns
|
||||
test_settings_documents_indexing_swapping_and_search(
|
||||
&NESTED_DOCUMENTS,
|
||||
&json!({"filterableAttributes": [{"attributePatterns": ["cattos","doggos.age"]}]}),
|
||||
&json!({
|
||||
"filter": "doggos.age > 2"
|
||||
}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Index `test`: Filter operator `>` is not allowed for the attribute `doggos.age`.\n - Note: allowed operators: OR, AND, NOT, =, !=, IN, IS EMPTY, IS NULL, EXISTS.\n - Note: field `doggos.age` matched rule #0 in `filterableAttributes`",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
}
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
test_settings_documents_indexing_swapping_and_search(
|
||||
&NESTED_DOCUMENTS,
|
||||
&json!({"filterableAttributes": [{
|
||||
"attributePatterns": ["cattos","doggos.age"],
|
||||
"features": {
|
||||
"facetSearch": false,
|
||||
"filter": {"equality": true, "comparison": false}
|
||||
}
|
||||
}]}),
|
||||
&json!({
|
||||
"filter": "doggos.age > 2"
|
||||
}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Index `test`: Filter operator `>` is not allowed for the attribute `doggos.age`.\n - Note: allowed operators: OR, AND, NOT, =, !=, IN, IS EMPTY, IS NULL, EXISTS.\n - Note: field `doggos.age` matched rule #0 in `filterableAttributes`",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
}
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
test_settings_documents_indexing_swapping_and_search(
|
||||
&NESTED_DOCUMENTS,
|
||||
&json!({"filterableAttributes": [{
|
||||
"attributePatterns": ["cattos","doggos.age"],
|
||||
"features": {
|
||||
"facetSearch": false,
|
||||
"filter": {"equality": true, "comparison": false}
|
||||
}
|
||||
}]}),
|
||||
&json!({
|
||||
"filter": "doggos.age 2 TO 4"
|
||||
}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Index `test`: Filter operator `TO` is not allowed for the attribute `doggos.age`.\n - Note: allowed operators: OR, AND, NOT, =, !=, IN, IS EMPTY, IS NULL, EXISTS.\n - Note: field `doggos.age` matched rule #0 in `filterableAttributes`",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
}
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
@ -1018,109 +1136,115 @@ async fn sort_unset_ranking_rule() {
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn search_on_unknown_field() {
|
||||
let server = Server::new_shared();
|
||||
let index = server.unique_index();
|
||||
let (response, _code) =
|
||||
index.update_settings_searchable_attributes(json!(["id", "title"])).await;
|
||||
index.wait_task(response.uid()).await.succeeded();
|
||||
|
||||
let expected_response = json!({
|
||||
"message": format!("Index `{}`: Attribute `unknown` is not searchable. Available searchable attributes are: `id, title`.", index.uid),
|
||||
"code": "invalid_search_attributes_to_search_on",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_attributes_to_search_on"
|
||||
});
|
||||
index
|
||||
.search(
|
||||
json!({"q": "Captain Marvel", "attributesToSearchOn": ["unknown"]}),
|
||||
|response, code| {
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
test_settings_documents_indexing_swapping_and_search(
|
||||
&DOCUMENTS,
|
||||
&json!({"searchableAttributes": ["id", "title"]}),
|
||||
&json!({"q": "Captain Marvel", "attributesToSearchOn": ["unknown"]}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Index `test`: Attribute `unknown` is not searchable. Available searchable attributes are: `id, title`.",
|
||||
"code": "invalid_search_attributes_to_search_on",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_attributes_to_search_on"
|
||||
}
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn search_on_unknown_field_plus_joker() {
|
||||
let server = Server::new_shared();
|
||||
let index = server.unique_index();
|
||||
let (response, _code) =
|
||||
index.update_settings_searchable_attributes(json!(["id", "title"])).await;
|
||||
index.wait_task(response.uid()).await.succeeded();
|
||||
test_settings_documents_indexing_swapping_and_search(
|
||||
&DOCUMENTS,
|
||||
&json!({"searchableAttributes": ["id", "title"]}),
|
||||
&json!({"q": "Captain Marvel", "attributesToSearchOn": ["*", "unknown"]}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Index `test`: Attribute `unknown` is not searchable. Available searchable attributes are: `id, title`.",
|
||||
"code": "invalid_search_attributes_to_search_on",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_attributes_to_search_on"
|
||||
}
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
let expected_response = json!({
|
||||
"message": format!("Index `{}`: Attribute `unknown` is not searchable. Available searchable attributes are: `id, title`.", index.uid),
|
||||
"code": "invalid_search_attributes_to_search_on",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_attributes_to_search_on"
|
||||
});
|
||||
index
|
||||
.search(
|
||||
json!({"q": "Captain Marvel", "attributesToSearchOn": ["*", "unknown"]}),
|
||||
|response, code| {
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(
|
||||
json!({"q": "Captain Marvel", "attributesToSearchOn": ["unknown", "*"]}),
|
||||
|response, code| {
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
test_settings_documents_indexing_swapping_and_search(
|
||||
&DOCUMENTS,
|
||||
&json!({"searchableAttributes": ["id", "title"]}),
|
||||
&json!({"q": "Captain Marvel", "attributesToSearchOn": ["unknown", "*"]}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Index `test`: Attribute `unknown` is not searchable. Available searchable attributes are: `id, title`.",
|
||||
"code": "invalid_search_attributes_to_search_on",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_attributes_to_search_on"
|
||||
}
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn distinct_at_search_time() {
|
||||
let server = Server::new_shared();
|
||||
let index = server.unique_index();
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
let (task, _) = index.create(None).await;
|
||||
index.wait_task(task.uid()).await.succeeded();
|
||||
let (response, _code) =
|
||||
index.add_documents(json!([{"id": 1, "color": "Doggo", "machin": "Action"}]), None).await;
|
||||
index.wait_task(response.uid()).await.succeeded();
|
||||
|
||||
let expected_response = json!({
|
||||
"message": format!("Index `{}`: Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. This index does not have configured filterable attributes.", index.uid),
|
||||
"code": "invalid_search_distinct",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_distinct"
|
||||
});
|
||||
let (response, code) =
|
||||
index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": "doggo.truc"})).await;
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Index `test`: Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. This index does not have configured filterable attributes.",
|
||||
"code": "invalid_search_distinct",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_distinct"
|
||||
}
|
||||
"###);
|
||||
|
||||
let (task, _) = index.update_settings_filterable_attributes(json!(["color", "machin"])).await;
|
||||
index.wait_task(task.uid()).await.succeeded();
|
||||
|
||||
let expected_response = json!({
|
||||
"message": format!("Index `{}`: Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. Available filterable attributes are: `color, machin`.", index.uid),
|
||||
"code": "invalid_search_distinct",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_distinct"
|
||||
});
|
||||
let (response, code) =
|
||||
index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": "doggo.truc"})).await;
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Index `test`: Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. Available filterable attributes patterns are: `color, machin`.",
|
||||
"code": "invalid_search_distinct",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_distinct"
|
||||
}
|
||||
"###);
|
||||
|
||||
let (task, _) = index.update_settings_displayed_attributes(json!(["color"])).await;
|
||||
index.wait_task(task.uid()).await.succeeded();
|
||||
|
||||
let expected_response = json!({
|
||||
"message": format!("Index `{}`: Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. Available filterable attributes are: `color, <..hidden-attributes>`.", index.uid),
|
||||
"code": "invalid_search_distinct",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_distinct"
|
||||
});
|
||||
let (response, code) =
|
||||
index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": "doggo.truc"})).await;
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Index `test`: Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. Available filterable attributes patterns are: `color, <..hidden-attributes>`.",
|
||||
"code": "invalid_search_distinct",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_distinct"
|
||||
}
|
||||
"###);
|
||||
|
||||
let (response, code) =
|
||||
index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": true})).await;
|
||||
|
@ -1,7 +1,9 @@
|
||||
use meili_snap::snapshot;
|
||||
use meilisearch::Opt;
|
||||
use once_cell::sync::Lazy;
|
||||
use tempfile::TempDir;
|
||||
|
||||
use crate::common::{Server, Value};
|
||||
use crate::common::{default_settings, Server, Value, NESTED_DOCUMENTS};
|
||||
use crate::json;
|
||||
|
||||
static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
@ -34,6 +36,62 @@ static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
])
|
||||
});
|
||||
|
||||
async fn test_settings_documents_indexing_swapping_and_facet_search(
|
||||
documents: &Value,
|
||||
settings: &Value,
|
||||
query: &Value,
|
||||
test: impl Fn(Value, actix_http::StatusCode) + std::panic::UnwindSafe + Clone,
|
||||
) {
|
||||
let temp = TempDir::new().unwrap();
|
||||
let server = Server::new_with_options(Opt { ..default_settings(temp.path()) }).await.unwrap();
|
||||
|
||||
eprintln!("Documents -> Settings -> test");
|
||||
let index = server.index("test");
|
||||
|
||||
let (task, code) = index.add_documents(documents.clone(), None).await;
|
||||
assert_eq!(code, 202, "{}", task);
|
||||
let response = index.wait_task(task.uid()).await;
|
||||
assert!(response.is_success(), "{:?}", response);
|
||||
|
||||
let (task, code) = index.update_settings(settings.clone()).await;
|
||||
assert_eq!(code, 202, "{}", task);
|
||||
let response = index.wait_task(task.uid()).await;
|
||||
assert!(response.is_success(), "{:?}", response);
|
||||
|
||||
let (response, code) = index.facet_search(query.clone()).await;
|
||||
insta::allow_duplicates! {
|
||||
test(response, code);
|
||||
}
|
||||
|
||||
let (task, code) = server.delete_index("test").await;
|
||||
assert_eq!(code, 202, "{}", task);
|
||||
let response = server.wait_task(task.uid()).await;
|
||||
assert!(response.is_success(), "{:?}", response);
|
||||
|
||||
eprintln!("Settings -> Documents -> test");
|
||||
let index = server.index("test");
|
||||
|
||||
let (task, code) = index.update_settings(settings.clone()).await;
|
||||
assert_eq!(code, 202, "{}", task);
|
||||
let response = index.wait_task(task.uid()).await;
|
||||
assert!(response.is_success(), "{:?}", response);
|
||||
|
||||
let (task, code) = index.add_documents(documents.clone(), None).await;
|
||||
assert_eq!(code, 202, "{}", task);
|
||||
let response = index.wait_task(task.uid()).await;
|
||||
assert!(response.is_success(), "{:?}", response);
|
||||
|
||||
let (response, code) = index.facet_search(query.clone()).await;
|
||||
insta::allow_duplicates! {
|
||||
test(response, code);
|
||||
}
|
||||
|
||||
let (task, code) = server.delete_index("test").await;
|
||||
assert_eq!(code, 202, "{}", task);
|
||||
let response = server.wait_task(task.uid()).await;
|
||||
assert!(response.is_success(), "{:?}", response);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn simple_facet_search() {
|
||||
let server = Server::new().await;
|
||||
@ -436,3 +494,124 @@ async fn deactivate_facet_search_add_documents_and_reset_facet_search() {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(dbg!(response)["facetHits"].as_array().unwrap().len(), 2);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn facet_search_with_filterable_attributes_rules() {
|
||||
test_settings_documents_indexing_swapping_and_facet_search(
|
||||
&DOCUMENTS,
|
||||
&json!({"filterableAttributes": ["genres"]}),
|
||||
&json!({"facetName": "genres", "facetQuery": "a"}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["facetHits"], @r###"[{"value":"Action","count":3},{"value":"Adventure","count":2}]"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
test_settings_documents_indexing_swapping_and_facet_search(
|
||||
&DOCUMENTS,
|
||||
&json!({"filterableAttributes": [{"attributePatterns": ["genres"], "features": {"facetSearch": true, "filter": {"equality": false, "comparison": false}}}]}),
|
||||
&json!({"facetName": "genres", "facetQuery": "a"}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["facetHits"], @r###"[{"value":"Action","count":3},{"value":"Adventure","count":2}]"###);
|
||||
},
|
||||
).await;
|
||||
|
||||
test_settings_documents_indexing_swapping_and_facet_search(
|
||||
&NESTED_DOCUMENTS,
|
||||
&json!({"filterableAttributes": ["doggos.name"]}),
|
||||
&json!({"facetName": "doggos.name", "facetQuery": "b"}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["facetHits"], @r###"[{"value":"bobby","count":1},{"value":"buddy","count":1}]"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
test_settings_documents_indexing_swapping_and_facet_search(
|
||||
&NESTED_DOCUMENTS,
|
||||
&json!({"filterableAttributes": [{"attributePatterns": ["doggos.name"], "features": {"facetSearch": true, "filter": {"equality": false, "comparison": false}}}]}),
|
||||
&json!({"facetName": "doggos.name", "facetQuery": "b"}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(response["facetHits"], @r###"[{"value":"bobby","count":1},{"value":"buddy","count":1}]"###);
|
||||
},
|
||||
).await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn facet_search_with_filterable_attributes_rules_errors() {
|
||||
test_settings_documents_indexing_swapping_and_facet_search(
|
||||
&DOCUMENTS,
|
||||
&json!({"filterableAttributes": ["genres"]}),
|
||||
&json!({"facetName": "invalid", "facetQuery": "a"}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response["message"], @r###""Attribute `invalid` is not facet-searchable. Available facet-searchable attributes patterns are: `genres`. To make it facet-searchable add it to the `filterableAttributes` index settings.""###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
test_settings_documents_indexing_swapping_and_facet_search(
|
||||
&DOCUMENTS,
|
||||
&json!({"filterableAttributes": [{"attributePatterns": ["genres"]}]}),
|
||||
&json!({"facetName": "genres", "facetQuery": "a"}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response["message"], @r###""Attribute `genres` is not facet-searchable. This index does not have configured facet-searchable attributes. To make it facet-searchable add it to the `filterableAttributes` index settings.""###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
test_settings_documents_indexing_swapping_and_facet_search(
|
||||
&DOCUMENTS,
|
||||
&json!({"filterableAttributes": [{"attributePatterns": ["genres"], "features": {"facetSearch": false, "filter": {"equality": true, "comparison": true}}}]}),
|
||||
&json!({"facetName": "genres", "facetQuery": "a"}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response["message"], @r###""Attribute `genres` is not facet-searchable. This index does not have configured facet-searchable attributes. To make it facet-searchable add it to the `filterableAttributes` index settings.""###);
|
||||
},
|
||||
).await;
|
||||
|
||||
test_settings_documents_indexing_swapping_and_facet_search(
|
||||
&DOCUMENTS,
|
||||
&json!({"filterableAttributes": [{"attributePatterns": ["genres"], "features": {"facetSearch": false, "filter": {"equality": false, "comparison": false}}}]}),
|
||||
&json!({"facetName": "genres", "facetQuery": "a"}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response["message"], @r###""Attribute `genres` is not facet-searchable. This index does not have configured facet-searchable attributes. To make it facet-searchable add it to the `filterableAttributes` index settings.""###);
|
||||
},
|
||||
).await;
|
||||
|
||||
test_settings_documents_indexing_swapping_and_facet_search(
|
||||
&NESTED_DOCUMENTS,
|
||||
&json!({"filterableAttributes": [{"attributePatterns": ["doggos.name"]}]}),
|
||||
&json!({"facetName": "invalid.name", "facetQuery": "b"}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response["message"], @r###""Attribute `invalid.name` is not facet-searchable. This index does not have configured facet-searchable attributes. To make it facet-searchable add it to the `filterableAttributes` index settings.""###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
test_settings_documents_indexing_swapping_and_facet_search(
|
||||
&NESTED_DOCUMENTS,
|
||||
&json!({"filterableAttributes": [{"attributePatterns": ["doggos.name"], "features": {"facetSearch": false, "filter": {"equality": true, "comparison": true}}}]}),
|
||||
&json!({"facetName": "doggos.name", "facetQuery": "b"}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response["message"], @r###""Attribute `doggos.name` is not facet-searchable. This index does not have configured facet-searchable attributes. To make it facet-searchable add it to the `filterableAttributes` index settings.""###);
|
||||
},
|
||||
).await;
|
||||
|
||||
test_settings_documents_indexing_swapping_and_facet_search(
|
||||
&NESTED_DOCUMENTS,
|
||||
&json!({"filterableAttributes": [{"attributePatterns": ["doggos.name"], "features": {"facetSearch": false, "filter": {"equality": false, "comparison": false}}}]}),
|
||||
&json!({"facetName": "doggos.name", "facetQuery": "b"}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response["message"], @r###""Attribute `doggos.name` is not facet-searchable. This index does not have configured facet-searchable attributes. To make it facet-searchable add it to the `filterableAttributes` index settings.""###);
|
||||
},
|
||||
).await;
|
||||
}
|
||||
|
758
crates/meilisearch/tests/search/filters.rs
Normal file
758
crates/meilisearch/tests/search/filters.rs
Normal file
@ -0,0 +1,758 @@
|
||||
use meili_snap::{json_string, snapshot};
|
||||
use meilisearch::Opt;
|
||||
use tempfile::TempDir;
|
||||
|
||||
use super::test_settings_documents_indexing_swapping_and_search;
|
||||
use crate::{
|
||||
common::{default_settings, shared_index_with_documents, Server, DOCUMENTS, NESTED_DOCUMENTS},
|
||||
json,
|
||||
};
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn search_with_filter_string_notation() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let (_, code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await;
|
||||
meili_snap::snapshot!(code, @"202 Accepted");
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
let (task, code) = index.add_documents(documents, None).await;
|
||||
meili_snap::snapshot!(code, @"202 Accepted");
|
||||
let res = index.wait_task(task.uid()).await;
|
||||
meili_snap::snapshot!(res["status"], @r###""succeeded""###);
|
||||
|
||||
index
|
||||
.search(
|
||||
json!({
|
||||
"filter": "title = Gläss"
|
||||
}),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(response["hits"].as_array().unwrap().len(), 1);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
let index = server.index("nested");
|
||||
|
||||
let (_, code) =
|
||||
index.update_settings(json!({"filterableAttributes": ["cattos", "doggos.age"]})).await;
|
||||
meili_snap::snapshot!(code, @"202 Accepted");
|
||||
|
||||
let documents = NESTED_DOCUMENTS.clone();
|
||||
let (task, code) = index.add_documents(documents, None).await;
|
||||
meili_snap::snapshot!(code, @"202 Accepted");
|
||||
let res = index.wait_task(task.uid()).await;
|
||||
meili_snap::snapshot!(res["status"], @r###""succeeded""###);
|
||||
|
||||
index
|
||||
.search(
|
||||
json!({
|
||||
"filter": "cattos = pésti"
|
||||
}),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(response["hits"].as_array().unwrap().len(), 1);
|
||||
assert_eq!(response["hits"][0]["id"], json!(852));
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(
|
||||
json!({
|
||||
"filter": "doggos.age > 5"
|
||||
}),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(response["hits"].as_array().unwrap().len(), 2);
|
||||
assert_eq!(response["hits"][0]["id"], json!(654));
|
||||
assert_eq!(response["hits"][1]["id"], json!(951));
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn search_with_filter_array_notation() {
|
||||
let index = shared_index_with_documents().await;
|
||||
let (response, code) = index
|
||||
.search_post(json!({
|
||||
"filter": ["title = Gläss"]
|
||||
}))
|
||||
.await;
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(response["hits"].as_array().unwrap().len(), 1);
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(json!({
|
||||
"filter": [["title = Gläss", "title = \"Shazam!\"", "title = \"Escape Room\""]]
|
||||
}))
|
||||
.await;
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(response["hits"].as_array().unwrap().len(), 3);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn search_with_contains_filter() {
|
||||
let temp = TempDir::new().unwrap();
|
||||
let server = Server::new_with_options(Opt {
|
||||
experimental_contains_filter: true,
|
||||
..default_settings(temp.path())
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
let index = server.index("movies");
|
||||
|
||||
index.update_settings(json!({"filterableAttributes": ["title"]})).await;
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
let (request, _code) = index.add_documents(documents, None).await;
|
||||
index.wait_task(request.uid()).await.succeeded();
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(json!({
|
||||
"filter": "title CONTAINS cap"
|
||||
}))
|
||||
.await;
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(response["hits"].as_array().unwrap().len(), 2);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn search_with_pattern_filter_settings() {
|
||||
// Check if the Equality filter works with patterns
|
||||
test_settings_documents_indexing_swapping_and_search(
|
||||
&NESTED_DOCUMENTS,
|
||||
&json!({"filterableAttributes": [{"attributePatterns": ["cattos","doggos.age"]}]}),
|
||||
&json!({
|
||||
"filter": "cattos = pésti"
|
||||
}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 852,
|
||||
"father": "jean",
|
||||
"mother": "michelle",
|
||||
"doggos": [
|
||||
{
|
||||
"name": "bobby",
|
||||
"age": 2
|
||||
},
|
||||
{
|
||||
"name": "buddy",
|
||||
"age": 4
|
||||
}
|
||||
],
|
||||
"cattos": "pésti"
|
||||
}
|
||||
]
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
test_settings_documents_indexing_swapping_and_search(
|
||||
&NESTED_DOCUMENTS,
|
||||
&json!({"filterableAttributes": [{
|
||||
"attributePatterns": ["cattos","doggos.age"],
|
||||
"features": {
|
||||
"facetSearch": false,
|
||||
"filter": {"equality": true, "comparison": false}
|
||||
}
|
||||
}]}),
|
||||
&json!({
|
||||
"filter": "cattos = pésti"
|
||||
}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 852,
|
||||
"father": "jean",
|
||||
"mother": "michelle",
|
||||
"doggos": [
|
||||
{
|
||||
"name": "bobby",
|
||||
"age": 2
|
||||
},
|
||||
{
|
||||
"name": "buddy",
|
||||
"age": 4
|
||||
}
|
||||
],
|
||||
"cattos": "pésti"
|
||||
}
|
||||
]
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
// Check if the Comparison filter works with patterns
|
||||
test_settings_documents_indexing_swapping_and_search(
|
||||
&NESTED_DOCUMENTS,
|
||||
&json!({"filterableAttributes": [{
|
||||
"attributePatterns": ["cattos","doggos.age"],
|
||||
"features": {
|
||||
"facetSearch": false,
|
||||
"filter": {"equality": false, "comparison": true}
|
||||
}
|
||||
}]}),
|
||||
&json!({
|
||||
"filter": "doggos.age > 2"
|
||||
}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 852,
|
||||
"father": "jean",
|
||||
"mother": "michelle",
|
||||
"doggos": [
|
||||
{
|
||||
"name": "bobby",
|
||||
"age": 2
|
||||
},
|
||||
{
|
||||
"name": "buddy",
|
||||
"age": 4
|
||||
}
|
||||
],
|
||||
"cattos": "pésti"
|
||||
},
|
||||
{
|
||||
"id": 654,
|
||||
"father": "pierre",
|
||||
"mother": "sabine",
|
||||
"doggos": [
|
||||
{
|
||||
"name": "gros bill",
|
||||
"age": 8
|
||||
}
|
||||
],
|
||||
"cattos": [
|
||||
"simba",
|
||||
"pestiféré"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 951,
|
||||
"father": "jean-baptiste",
|
||||
"mother": "sophie",
|
||||
"doggos": [
|
||||
{
|
||||
"name": "turbo",
|
||||
"age": 5
|
||||
},
|
||||
{
|
||||
"name": "fast",
|
||||
"age": 6
|
||||
}
|
||||
],
|
||||
"cattos": [
|
||||
"moumoute",
|
||||
"gomez"
|
||||
]
|
||||
}
|
||||
]
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn search_with_pattern_filter_settings_scenario_1() {
|
||||
let temp = TempDir::new().unwrap();
|
||||
let server = Server::new_with_options(Opt { ..default_settings(temp.path()) }).await.unwrap();
|
||||
|
||||
eprintln!("Documents -> Settings -> test");
|
||||
let index = server.index("test");
|
||||
|
||||
let (task, code) = index.add_documents(NESTED_DOCUMENTS.clone(), None).await;
|
||||
assert_eq!(code, 202, "{}", task);
|
||||
let response = index.wait_task(task.uid()).await;
|
||||
snapshot!(response["status"], @r###""succeeded""###);
|
||||
|
||||
let (task, code) = index
|
||||
.update_settings(json!({"filterableAttributes": [{
|
||||
"attributePatterns": ["cattos","doggos.age"],
|
||||
"features": {
|
||||
"facetSearch": false,
|
||||
"filter": {"equality": true, "comparison": false}
|
||||
}
|
||||
}]}))
|
||||
.await;
|
||||
assert_eq!(code, 202, "{}", task);
|
||||
let response = index.wait_task(task.uid()).await;
|
||||
snapshot!(response["status"], @r###""succeeded""###);
|
||||
|
||||
// Check if the Equality filter works
|
||||
index
|
||||
.search(
|
||||
json!({
|
||||
"filter": "cattos = pésti"
|
||||
}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 852,
|
||||
"father": "jean",
|
||||
"mother": "michelle",
|
||||
"doggos": [
|
||||
{
|
||||
"name": "bobby",
|
||||
"age": 2
|
||||
},
|
||||
{
|
||||
"name": "buddy",
|
||||
"age": 4
|
||||
}
|
||||
],
|
||||
"cattos": "pésti"
|
||||
}
|
||||
]
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
// Check if the Comparison filter returns an error
|
||||
index
|
||||
.search(
|
||||
json!({
|
||||
"filter": "doggos.age > 2"
|
||||
}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Index `test`: Filter operator `>` is not allowed for the attribute `doggos.age`.\n - Note: allowed operators: OR, AND, NOT, =, !=, IN, IS EMPTY, IS NULL, EXISTS.\n - Note: field `doggos.age` matched rule #0 in `filterableAttributes`",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
}
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
// Update the settings activate comparison filter
|
||||
let (task, code) = index
|
||||
.update_settings(json!({"filterableAttributes": [{
|
||||
"attributePatterns": ["cattos","doggos.age"],
|
||||
"features": {
|
||||
"facetSearch": false,
|
||||
"filter": {"equality": true, "comparison": true}
|
||||
}
|
||||
}]}))
|
||||
.await;
|
||||
assert_eq!(code, 202, "{}", task);
|
||||
let response = index.wait_task(task.uid()).await;
|
||||
snapshot!(response["status"], @r###""succeeded""###);
|
||||
|
||||
// Check if the Equality filter works
|
||||
index
|
||||
.search(
|
||||
json!({
|
||||
"filter": "cattos = pésti"
|
||||
}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 852,
|
||||
"father": "jean",
|
||||
"mother": "michelle",
|
||||
"doggos": [
|
||||
{
|
||||
"name": "bobby",
|
||||
"age": 2
|
||||
},
|
||||
{
|
||||
"name": "buddy",
|
||||
"age": 4
|
||||
}
|
||||
],
|
||||
"cattos": "pésti"
|
||||
}
|
||||
]
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
// Check if the Comparison filter works
|
||||
index
|
||||
.search(
|
||||
json!({
|
||||
"filter": "doggos.age > 2"
|
||||
}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 852,
|
||||
"father": "jean",
|
||||
"mother": "michelle",
|
||||
"doggos": [
|
||||
{
|
||||
"name": "bobby",
|
||||
"age": 2
|
||||
},
|
||||
{
|
||||
"name": "buddy",
|
||||
"age": 4
|
||||
}
|
||||
],
|
||||
"cattos": "pésti"
|
||||
},
|
||||
{
|
||||
"id": 654,
|
||||
"father": "pierre",
|
||||
"mother": "sabine",
|
||||
"doggos": [
|
||||
{
|
||||
"name": "gros bill",
|
||||
"age": 8
|
||||
}
|
||||
],
|
||||
"cattos": [
|
||||
"simba",
|
||||
"pestiféré"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 951,
|
||||
"father": "jean-baptiste",
|
||||
"mother": "sophie",
|
||||
"doggos": [
|
||||
{
|
||||
"name": "turbo",
|
||||
"age": 5
|
||||
},
|
||||
{
|
||||
"name": "fast",
|
||||
"age": 6
|
||||
}
|
||||
],
|
||||
"cattos": [
|
||||
"moumoute",
|
||||
"gomez"
|
||||
]
|
||||
}
|
||||
]
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
// Update the settings deactivate equality filter
|
||||
let (task, code) = index
|
||||
.update_settings(json!({"filterableAttributes": [{
|
||||
"attributePatterns": ["cattos","doggos.age"],
|
||||
"features": {
|
||||
"facetSearch": false,
|
||||
"filter": {"equality": false, "comparison": true}
|
||||
}
|
||||
}]}))
|
||||
.await;
|
||||
assert_eq!(code, 202, "{}", task);
|
||||
let response = index.wait_task(task.uid()).await;
|
||||
snapshot!(response["status"], @r###""succeeded""###);
|
||||
|
||||
// Check if the Equality filter returns an error
|
||||
index
|
||||
.search(
|
||||
json!({
|
||||
"filter": "cattos = pésti"
|
||||
}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Index `test`: Filter operator `=` is not allowed for the attribute `cattos`.\n - Note: allowed operators: OR, AND, NOT, <, >, <=, >=, TO, IS EMPTY, IS NULL, EXISTS.\n - Note: field `cattos` matched rule #0 in `filterableAttributes`",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
}
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
// Check if the Comparison filter works
|
||||
index
|
||||
.search(
|
||||
json!({
|
||||
"filter": "doggos.age > 2"
|
||||
}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 852,
|
||||
"father": "jean",
|
||||
"mother": "michelle",
|
||||
"doggos": [
|
||||
{
|
||||
"name": "bobby",
|
||||
"age": 2
|
||||
},
|
||||
{
|
||||
"name": "buddy",
|
||||
"age": 4
|
||||
}
|
||||
],
|
||||
"cattos": "pésti"
|
||||
},
|
||||
{
|
||||
"id": 654,
|
||||
"father": "pierre",
|
||||
"mother": "sabine",
|
||||
"doggos": [
|
||||
{
|
||||
"name": "gros bill",
|
||||
"age": 8
|
||||
}
|
||||
],
|
||||
"cattos": [
|
||||
"simba",
|
||||
"pestiféré"
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 951,
|
||||
"father": "jean-baptiste",
|
||||
"mother": "sophie",
|
||||
"doggos": [
|
||||
{
|
||||
"name": "turbo",
|
||||
"age": 5
|
||||
},
|
||||
{
|
||||
"name": "fast",
|
||||
"age": 6
|
||||
}
|
||||
],
|
||||
"cattos": [
|
||||
"moumoute",
|
||||
"gomez"
|
||||
]
|
||||
}
|
||||
]
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
// rollback the settings
|
||||
let (task, code) = index
|
||||
.update_settings(json!({"filterableAttributes": [{
|
||||
"attributePatterns": ["cattos","doggos.age"],
|
||||
"features": {
|
||||
"facetSearch": false,
|
||||
"filter": {"equality": true, "comparison": false}
|
||||
}
|
||||
}]}))
|
||||
.await;
|
||||
assert_eq!(code, 202, "{}", task);
|
||||
let response = index.wait_task(task.uid()).await;
|
||||
snapshot!(response["status"], @r###""succeeded""###);
|
||||
|
||||
// Check if the Equality filter works
|
||||
index
|
||||
.search(
|
||||
json!({
|
||||
"filter": "cattos = pésti"
|
||||
}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 852,
|
||||
"father": "jean",
|
||||
"mother": "michelle",
|
||||
"doggos": [
|
||||
{
|
||||
"name": "bobby",
|
||||
"age": 2
|
||||
},
|
||||
{
|
||||
"name": "buddy",
|
||||
"age": 4
|
||||
}
|
||||
],
|
||||
"cattos": "pésti"
|
||||
}
|
||||
]
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
// Check if the Comparison filter returns an error
|
||||
index
|
||||
.search(
|
||||
json!({
|
||||
"filter": "doggos.age > 2"
|
||||
}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Index `test`: Filter operator `>` is not allowed for the attribute `doggos.age`.\n - Note: allowed operators: OR, AND, NOT, =, !=, IN, IS EMPTY, IS NULL, EXISTS.\n - Note: field `doggos.age` matched rule #0 in `filterableAttributes`",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
}
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn test_filterable_attributes_priority() {
|
||||
// Test that the filterable attributes priority is respected
|
||||
|
||||
// check if doggos.name is filterable
|
||||
test_settings_documents_indexing_swapping_and_search(
|
||||
&NESTED_DOCUMENTS,
|
||||
&json!({"filterableAttributes": [
|
||||
// deactivated filter
|
||||
{"attributePatterns": ["doggos.a*"], "features": {"facetSearch": false, "filter": {"equality": false, "comparison": false}}},
|
||||
// activated filter
|
||||
{"attributePatterns": ["doggos.*"]},
|
||||
]}),
|
||||
&json!({
|
||||
"filter": "doggos.name = bobby"
|
||||
}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 852,
|
||||
"father": "jean",
|
||||
"mother": "michelle",
|
||||
"doggos": [
|
||||
{
|
||||
"name": "bobby",
|
||||
"age": 2
|
||||
},
|
||||
{
|
||||
"name": "buddy",
|
||||
"age": 4
|
||||
}
|
||||
],
|
||||
"cattos": "pésti"
|
||||
}
|
||||
]
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
// check if doggos.name is filterable 2
|
||||
test_settings_documents_indexing_swapping_and_search(
|
||||
&NESTED_DOCUMENTS,
|
||||
&json!({"filterableAttributes": [
|
||||
// deactivated filter
|
||||
{"attributePatterns": ["doggos"], "features": {"facetSearch": false, "filter": {"equality": false, "comparison": false}}},
|
||||
// activated filter
|
||||
{"attributePatterns": ["doggos.*"]},
|
||||
]}),
|
||||
&json!({
|
||||
"filter": "doggos.name = bobby"
|
||||
}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"200 OK");
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 852,
|
||||
"father": "jean",
|
||||
"mother": "michelle",
|
||||
"doggos": [
|
||||
{
|
||||
"name": "bobby",
|
||||
"age": 2
|
||||
},
|
||||
{
|
||||
"name": "buddy",
|
||||
"age": 4
|
||||
}
|
||||
],
|
||||
"cattos": "pésti"
|
||||
}
|
||||
]
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
// check if doggos.age is not filterable
|
||||
test_settings_documents_indexing_swapping_and_search(
|
||||
&NESTED_DOCUMENTS,
|
||||
&json!({"filterableAttributes": [
|
||||
// deactivated filter
|
||||
{"attributePatterns": ["doggos.a*"], "features": {"facetSearch": false, "filter": {"equality": false, "comparison": false}}},
|
||||
// activated filter
|
||||
{"attributePatterns": ["doggos.*"]},
|
||||
]}),
|
||||
&json!({
|
||||
"filter": "doggos.age > 2"
|
||||
}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Index `test`: Attribute `doggos.age` is not filterable. Available filterable attribute patterns are: `doggos.*`.\n1:11 doggos.age > 2",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
}
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
// check if doggos is not filterable
|
||||
test_settings_documents_indexing_swapping_and_search(
|
||||
&NESTED_DOCUMENTS,
|
||||
&json!({"filterableAttributes": [
|
||||
// deactivated filter
|
||||
{"attributePatterns": ["doggos"], "features": {"facetSearch": false, "filter": {"equality": false, "comparison": false}}},
|
||||
// activated filter
|
||||
{"attributePatterns": ["doggos.*"]},
|
||||
]}),
|
||||
&json!({
|
||||
"filter": "doggos EXISTS"
|
||||
}),
|
||||
|response, code| {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Index `test`: Attribute `doggos` is not filterable. Available filterable attribute patterns are: `doggos.*`.\n1:7 doggos EXISTS",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
}
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
@ -1,9 +1,12 @@
|
||||
use meili_snap::{json_string, snapshot};
|
||||
use meilisearch_types::milli::constants::RESERVED_GEO_FIELD_NAME;
|
||||
use once_cell::sync::Lazy;
|
||||
|
||||
use crate::common::{Server, Value};
|
||||
use crate::json;
|
||||
|
||||
use super::test_settings_documents_indexing_swapping_and_search;
|
||||
|
||||
static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
|
||||
json!([
|
||||
{
|
||||
@ -184,3 +187,184 @@ async fn bug_4640() {
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn geo_asc_with_words() {
|
||||
let documents = json!([
|
||||
{ "id": 0, "doggo": "jean", RESERVED_GEO_FIELD_NAME: { "lat": 0, "lng": 0 } },
|
||||
{ "id": 1, "doggo": "intel", RESERVED_GEO_FIELD_NAME: { "lat": 88, "lng": 0 } },
|
||||
{ "id": 2, "doggo": "jean bob", RESERVED_GEO_FIELD_NAME: { "lat": -89, "lng": 0 } },
|
||||
{ "id": 3, "doggo": "jean michel", RESERVED_GEO_FIELD_NAME: { "lat": 0, "lng": 178 } },
|
||||
{ "id": 4, "doggo": "bob marley", RESERVED_GEO_FIELD_NAME: { "lat": 0, "lng": -179 } },
|
||||
]);
|
||||
|
||||
test_settings_documents_indexing_swapping_and_search(
|
||||
&documents,
|
||||
&json!({"searchableAttributes": ["id", "doggo"], "rankingRules": ["words", "geo:asc"]}),
|
||||
&json!({"q": "jean"}),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###"
|
||||
{
|
||||
"hits": [
|
||||
{
|
||||
"id": 0,
|
||||
"doggo": "jean",
|
||||
"_geo": {
|
||||
"lat": 0,
|
||||
"lng": 0
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"doggo": "jean bob",
|
||||
"_geo": {
|
||||
"lat": -89,
|
||||
"lng": 0
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"doggo": "jean michel",
|
||||
"_geo": {
|
||||
"lat": 0,
|
||||
"lng": 178
|
||||
}
|
||||
}
|
||||
],
|
||||
"query": "jean",
|
||||
"processingTimeMs": "[time]",
|
||||
"limit": 20,
|
||||
"offset": 0,
|
||||
"estimatedTotalHits": 3
|
||||
}
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
test_settings_documents_indexing_swapping_and_search(
|
||||
&documents,
|
||||
&json!({"searchableAttributes": ["id", "doggo"], "rankingRules": ["words", "geo:asc"]}),
|
||||
&json!({"q": "bob"}),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###"
|
||||
{
|
||||
"hits": [
|
||||
{
|
||||
"id": 2,
|
||||
"doggo": "jean bob",
|
||||
"_geo": {
|
||||
"lat": -89,
|
||||
"lng": 0
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"doggo": "bob marley",
|
||||
"_geo": {
|
||||
"lat": 0,
|
||||
"lng": -179
|
||||
}
|
||||
}
|
||||
],
|
||||
"query": "bob",
|
||||
"processingTimeMs": "[time]",
|
||||
"limit": 20,
|
||||
"offset": 0,
|
||||
"estimatedTotalHits": 2
|
||||
}
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
test_settings_documents_indexing_swapping_and_search(
|
||||
&documents,
|
||||
&json!({"searchableAttributes": ["id", "doggo"], "rankingRules": ["words", "geo:asc"]}),
|
||||
&json!({"q": "intel"}),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###"
|
||||
{
|
||||
"hits": [
|
||||
{
|
||||
"id": 1,
|
||||
"doggo": "intel",
|
||||
"_geo": {
|
||||
"lat": 88,
|
||||
"lng": 0
|
||||
}
|
||||
}
|
||||
],
|
||||
"query": "intel",
|
||||
"processingTimeMs": "[time]",
|
||||
"limit": 20,
|
||||
"offset": 0,
|
||||
"estimatedTotalHits": 1
|
||||
}
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn geo_sort_with_words() {
|
||||
let documents = json!([
|
||||
{ "id": 0, "doggo": "jean", RESERVED_GEO_FIELD_NAME: { "lat": 0, "lng": 0 } },
|
||||
{ "id": 1, "doggo": "intel", RESERVED_GEO_FIELD_NAME: { "lat": 88, "lng": 0 } },
|
||||
{ "id": 2, "doggo": "jean bob", RESERVED_GEO_FIELD_NAME: { "lat": -89, "lng": 0 } },
|
||||
{ "id": 3, "doggo": "jean michel", RESERVED_GEO_FIELD_NAME: { "lat": 0, "lng": 178 } },
|
||||
{ "id": 4, "doggo": "bob marley", RESERVED_GEO_FIELD_NAME: { "lat": 0, "lng": -179 } },
|
||||
]);
|
||||
|
||||
test_settings_documents_indexing_swapping_and_search(
|
||||
&documents,
|
||||
&json!({"searchableAttributes": ["id", "doggo"], "rankingRules": ["words", "sort"], "sortableAttributes": [RESERVED_GEO_FIELD_NAME]}),
|
||||
&json!({"q": "jean", "sort": ["_geoPoint(0.0, 0.0):asc"]}),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
snapshot!(json_string!(response, { ".processingTimeMs" => "[time]" }), @r###"
|
||||
{
|
||||
"hits": [
|
||||
{
|
||||
"id": 0,
|
||||
"doggo": "jean",
|
||||
"_geo": {
|
||||
"lat": 0,
|
||||
"lng": 0
|
||||
},
|
||||
"_geoDistance": 0
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"doggo": "jean bob",
|
||||
"_geo": {
|
||||
"lat": -89,
|
||||
"lng": 0
|
||||
},
|
||||
"_geoDistance": 9896348
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"doggo": "jean michel",
|
||||
"_geo": {
|
||||
"lat": 0,
|
||||
"lng": 178
|
||||
},
|
||||
"_geoDistance": 19792697
|
||||
}
|
||||
],
|
||||
"query": "jean",
|
||||
"processingTimeMs": "[time]",
|
||||
"limit": 20,
|
||||
"offset": 0,
|
||||
"estimatedTotalHits": 3
|
||||
}
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
@ -4,6 +4,7 @@
|
||||
mod distinct;
|
||||
mod errors;
|
||||
mod facet_search;
|
||||
mod filters;
|
||||
mod formatted;
|
||||
mod geo;
|
||||
mod hybrid;
|
||||
@ -21,10 +22,58 @@ use tempfile::TempDir;
|
||||
|
||||
use crate::common::{
|
||||
default_settings, shared_index_with_documents, shared_index_with_nested_documents, Server,
|
||||
DOCUMENTS, FRUITS_DOCUMENTS, NESTED_DOCUMENTS, SCORE_DOCUMENTS, VECTOR_DOCUMENTS,
|
||||
Value, DOCUMENTS, FRUITS_DOCUMENTS, NESTED_DOCUMENTS, SCORE_DOCUMENTS, VECTOR_DOCUMENTS,
|
||||
};
|
||||
use crate::json;
|
||||
|
||||
async fn test_settings_documents_indexing_swapping_and_search(
|
||||
documents: &Value,
|
||||
settings: &Value,
|
||||
query: &Value,
|
||||
test: impl Fn(Value, actix_http::StatusCode) + std::panic::UnwindSafe + Clone,
|
||||
) {
|
||||
let temp = TempDir::new().unwrap();
|
||||
let server = Server::new_with_options(Opt { ..default_settings(temp.path()) }).await.unwrap();
|
||||
|
||||
eprintln!("Documents -> Settings -> test");
|
||||
let index = server.index("test");
|
||||
|
||||
let (task, code) = index.add_documents(documents.clone(), None).await;
|
||||
assert_eq!(code, 202, "{}", task);
|
||||
let response = index.wait_task(task.uid()).await;
|
||||
assert!(response.is_success(), "{:?}", response);
|
||||
|
||||
let (task, code) = index.update_settings(settings.clone()).await;
|
||||
assert_eq!(code, 202, "{}", task);
|
||||
let response = index.wait_task(task.uid()).await;
|
||||
assert!(response.is_success(), "{:?}", response);
|
||||
|
||||
index.search(query.clone(), test.clone()).await;
|
||||
let (task, code) = server.delete_index("test").await;
|
||||
assert_eq!(code, 202, "{}", task);
|
||||
let response = server.wait_task(task.uid()).await;
|
||||
assert!(response.is_success(), "{:?}", response);
|
||||
|
||||
eprintln!("Settings -> Documents -> test");
|
||||
let index = server.index("test");
|
||||
|
||||
let (task, code) = index.update_settings(settings.clone()).await;
|
||||
assert_eq!(code, 202, "{}", task);
|
||||
let response = index.wait_task(task.uid()).await;
|
||||
assert!(response.is_success(), "{:?}", response);
|
||||
|
||||
let (task, code) = index.add_documents(documents.clone(), None).await;
|
||||
assert_eq!(code, 202, "{}", task);
|
||||
let response = index.wait_task(task.uid()).await;
|
||||
assert!(response.is_success(), "{:?}", response);
|
||||
|
||||
index.search(query.clone(), test.clone()).await;
|
||||
let (task, code) = server.delete_index("test").await;
|
||||
assert_eq!(code, 202, "{}", task);
|
||||
let response = server.wait_task(task.uid()).await;
|
||||
assert!(response.is_success(), "{:?}", response);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn simple_placeholder_search() {
|
||||
let index = shared_index_with_documents().await;
|
||||
@ -355,118 +404,6 @@ async fn search_multiple_params() {
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn search_with_filter_string_notation() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let (_, code) = index.update_settings(json!({"filterableAttributes": ["title"]})).await;
|
||||
meili_snap::snapshot!(code, @"202 Accepted");
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
let (task, code) = index.add_documents(documents, None).await;
|
||||
meili_snap::snapshot!(code, @"202 Accepted");
|
||||
let res = index.wait_task(task.uid()).await;
|
||||
meili_snap::snapshot!(res["status"], @r###""succeeded""###);
|
||||
|
||||
index
|
||||
.search(
|
||||
json!({
|
||||
"filter": "title = Gläss"
|
||||
}),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(response["hits"].as_array().unwrap().len(), 1);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
let index = server.index("nested");
|
||||
|
||||
let (_, code) =
|
||||
index.update_settings(json!({"filterableAttributes": ["cattos", "doggos.age"]})).await;
|
||||
meili_snap::snapshot!(code, @"202 Accepted");
|
||||
|
||||
let documents = NESTED_DOCUMENTS.clone();
|
||||
let (task, code) = index.add_documents(documents, None).await;
|
||||
meili_snap::snapshot!(code, @"202 Accepted");
|
||||
let res = index.wait_task(task.uid()).await;
|
||||
meili_snap::snapshot!(res["status"], @r###""succeeded""###);
|
||||
|
||||
index
|
||||
.search(
|
||||
json!({
|
||||
"filter": "cattos = pésti"
|
||||
}),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(response["hits"].as_array().unwrap().len(), 1);
|
||||
assert_eq!(response["hits"][0]["id"], json!(852));
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(
|
||||
json!({
|
||||
"filter": "doggos.age > 5"
|
||||
}),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(response["hits"].as_array().unwrap().len(), 2);
|
||||
assert_eq!(response["hits"][0]["id"], json!(654));
|
||||
assert_eq!(response["hits"][1]["id"], json!(951));
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn search_with_filter_array_notation() {
|
||||
let index = shared_index_with_documents().await;
|
||||
let (response, code) = index
|
||||
.search_post(json!({
|
||||
"filter": ["title = Gläss"]
|
||||
}))
|
||||
.await;
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(response["hits"].as_array().unwrap().len(), 1);
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(json!({
|
||||
"filter": [["title = Gläss", "title = \"Shazam!\"", "title = \"Escape Room\""]]
|
||||
}))
|
||||
.await;
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(response["hits"].as_array().unwrap().len(), 3);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn search_with_contains_filter() {
|
||||
let temp = TempDir::new().unwrap();
|
||||
let server = Server::new_with_options(Opt {
|
||||
experimental_contains_filter: true,
|
||||
..default_settings(temp.path())
|
||||
})
|
||||
.await
|
||||
.unwrap();
|
||||
let index = server.index("movies");
|
||||
|
||||
index.update_settings(json!({"filterableAttributes": ["title"]})).await;
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
let (request, _code) = index.add_documents(documents, None).await;
|
||||
index.wait_task(request.uid()).await.succeeded();
|
||||
|
||||
let (response, code) = index
|
||||
.search_post(json!({
|
||||
"filter": "title CONTAINS cap"
|
||||
}))
|
||||
.await;
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
assert_eq!(response["hits"].as_array().unwrap().len(), 2);
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn search_with_sort_on_numbers() {
|
||||
let index = shared_index_with_documents().await;
|
||||
@ -589,7 +526,7 @@ async fn search_facet_distribution() {
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
let dist = response["facetDistribution"].as_object().unwrap();
|
||||
assert_eq!(dist.len(), 1);
|
||||
assert_eq!(dist.len(), 1, "{:?}", dist);
|
||||
assert_eq!(
|
||||
dist["doggos.name"],
|
||||
json!({ "bobby": 1, "buddy": 1, "gros bill": 1, "turbo": 1, "fast": 1})
|
||||
@ -606,7 +543,7 @@ async fn search_facet_distribution() {
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
let dist = response["facetDistribution"].as_object().unwrap();
|
||||
assert_eq!(dist.len(), 3);
|
||||
assert_eq!(dist.len(), 3, "{:?}", dist);
|
||||
assert_eq!(
|
||||
dist["doggos.name"],
|
||||
json!({ "bobby": 1, "buddy": 1, "gros bill": 1, "turbo": 1, "fast": 1})
|
||||
@ -1559,6 +1496,293 @@ async fn change_attributes_settings() {
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn test_nested_fields() {
|
||||
let documents = json!([
|
||||
{
|
||||
"id": 0,
|
||||
"title": "The zeroth document",
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"title": "The first document",
|
||||
"nested": {
|
||||
"object": "field",
|
||||
"machin": "bidule",
|
||||
},
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"title": "The second document",
|
||||
"nested": [
|
||||
"array",
|
||||
{
|
||||
"object": "field",
|
||||
},
|
||||
{
|
||||
"prout": "truc",
|
||||
"machin": "lol",
|
||||
},
|
||||
],
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"title": "The third document",
|
||||
"nested": "I lied",
|
||||
},
|
||||
]);
|
||||
|
||||
let settings = json!({
|
||||
"searchableAttributes": ["title", "nested.object", "nested.machin"],
|
||||
"filterableAttributes": ["title", "nested.object", "nested.machin"]
|
||||
});
|
||||
|
||||
// Test empty search returns all documents
|
||||
test_settings_documents_indexing_swapping_and_search(
|
||||
&documents,
|
||||
&settings,
|
||||
&json!({"q": "document"}),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 0,
|
||||
"title": "The zeroth document"
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"title": "The first document",
|
||||
"nested": {
|
||||
"object": "field",
|
||||
"machin": "bidule"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"title": "The second document",
|
||||
"nested": [
|
||||
"array",
|
||||
{
|
||||
"object": "field"
|
||||
},
|
||||
{
|
||||
"prout": "truc",
|
||||
"machin": "lol"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"title": "The third document",
|
||||
"nested": "I lied"
|
||||
}
|
||||
]
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
// Test searching specific documents
|
||||
test_settings_documents_indexing_swapping_and_search(
|
||||
&documents,
|
||||
&settings,
|
||||
&json!({"q": "zeroth"}),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 0,
|
||||
"title": "The zeroth document"
|
||||
}
|
||||
]
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
test_settings_documents_indexing_swapping_and_search(
|
||||
&documents,
|
||||
&settings,
|
||||
&json!({"q": "first"}),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 1,
|
||||
"title": "The first document",
|
||||
"nested": {
|
||||
"object": "field",
|
||||
"machin": "bidule"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
// Test searching nested fields
|
||||
test_settings_documents_indexing_swapping_and_search(
|
||||
&documents,
|
||||
&settings,
|
||||
&json!({"q": "field"}),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 1,
|
||||
"title": "The first document",
|
||||
"nested": {
|
||||
"object": "field",
|
||||
"machin": "bidule"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"title": "The second document",
|
||||
"nested": [
|
||||
"array",
|
||||
{
|
||||
"object": "field"
|
||||
},
|
||||
{
|
||||
"prout": "truc",
|
||||
"machin": "lol"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
test_settings_documents_indexing_swapping_and_search(
|
||||
&documents,
|
||||
&settings,
|
||||
&json!({"q": "array"}),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
// nested is not searchable
|
||||
snapshot!(json_string!(response["hits"]), @"[]");
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
test_settings_documents_indexing_swapping_and_search(
|
||||
&documents,
|
||||
&settings,
|
||||
&json!({"q": "lied"}),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
// nested is not searchable
|
||||
snapshot!(json_string!(response["hits"]), @"[]");
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
// Test filtering on nested fields
|
||||
test_settings_documents_indexing_swapping_and_search(
|
||||
&documents,
|
||||
&settings,
|
||||
&json!({"filter": "nested.object = field"}),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 1,
|
||||
"title": "The first document",
|
||||
"nested": {
|
||||
"object": "field",
|
||||
"machin": "bidule"
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"title": "The second document",
|
||||
"nested": [
|
||||
"array",
|
||||
{
|
||||
"object": "field"
|
||||
},
|
||||
{
|
||||
"prout": "truc",
|
||||
"machin": "lol"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
test_settings_documents_indexing_swapping_and_search(
|
||||
&documents,
|
||||
&settings,
|
||||
&json!({"filter": "nested.machin = bidule"}),
|
||||
|response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
snapshot!(json_string!(response["hits"]), @r###"
|
||||
[
|
||||
{
|
||||
"id": 1,
|
||||
"title": "The first document",
|
||||
"nested": {
|
||||
"object": "field",
|
||||
"machin": "bidule"
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
// Test filtering on non-filterable nested field fails
|
||||
test_settings_documents_indexing_swapping_and_search(
|
||||
&documents,
|
||||
&settings,
|
||||
&json!({"filter": "nested = array"}),
|
||||
|response, code| {
|
||||
assert_eq!(code, 400, "{}", response);
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Index `test`: Attribute `nested` is not filterable. Available filterable attribute patterns are: `nested.machin`, `nested.object`, `title`.\n1:7 nested = array",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
}
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
||||
// Test filtering on non-filterable nested field fails
|
||||
test_settings_documents_indexing_swapping_and_search(
|
||||
&documents,
|
||||
&settings,
|
||||
&json!({"filter": r#"nested = "I lied""#}),
|
||||
|response, code| {
|
||||
assert_eq!(code, 400, "{}", response);
|
||||
snapshot!(json_string!(response), @r###"
|
||||
{
|
||||
"message": "Index `test`: Attribute `nested` is not filterable. Available filterable attribute patterns are: `nested.machin`, `nested.object`, `title`.\n1:7 nested = \"I lied\"",
|
||||
"code": "invalid_search_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
|
||||
}
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
}
|
||||
|
||||
/// Modifying facets with different casing should work correctly
|
||||
#[actix_rt::test]
|
||||
async fn change_facet_casing() {
|
||||
|
@ -3647,7 +3647,7 @@ async fn federation_non_faceted_for_an_index() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###"
|
||||
{
|
||||
"message": "Inside `.federation.facetsByIndex.fruits-no-name`: Invalid facet distribution, attribute `name` is not filterable. The available filterable attributes are `BOOST, id`.\n - Note: index `fruits-no-name` used in `.queries[1]`",
|
||||
"message": "Inside `.federation.facetsByIndex.fruits-no-name`: Invalid facet distribution, attribute `name` is not filterable. The available filterable attribute patterns are `BOOST, id`.\n - Note: index `fruits-no-name` used in `.queries[1]`",
|
||||
"code": "invalid_multi_search_facets",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_multi_search_facets"
|
||||
@ -3669,7 +3669,7 @@ async fn federation_non_faceted_for_an_index() {
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
insta::assert_json_snapshot!(response, { ".processingTimeMs" => "[time]" }, @r###"
|
||||
{
|
||||
"message": "Inside `.federation.facetsByIndex.fruits-no-name`: Invalid facet distribution, attribute `name` is not filterable. The available filterable attributes are `BOOST, id`.\n - Note: index `fruits-no-name` is not used in queries",
|
||||
"message": "Inside `.federation.facetsByIndex.fruits-no-name`: Invalid facet distribution, attribute `name` is not filterable. The available filterable attribute patterns are `BOOST, id`.\n - Note: index `fruits-no-name` is not used in queries",
|
||||
"code": "invalid_multi_search_facets",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_multi_search_facets"
|
||||
|
@ -1,3 +1,5 @@
|
||||
use meili_snap::{json_string, snapshot};
|
||||
|
||||
use crate::common::Server;
|
||||
use crate::json;
|
||||
|
||||
@ -510,3 +512,127 @@ async fn set_and_reset_distinct_attribute_with_dedicated_route() {
|
||||
|
||||
assert_eq!(response, json!(null));
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn granular_filterable_attributes() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
index.create(None).await;
|
||||
|
||||
let (response, code) =
|
||||
index.update_settings(json!({ "filterableAttributes": [
|
||||
{ "attributePatterns": ["name"], "features": { "facetSearch": true, "filter": {"equality": true, "comparison": false} } },
|
||||
{ "attributePatterns": ["age"], "features": { "facetSearch": false, "filter": {"equality": true, "comparison": true} } },
|
||||
{ "attributePatterns": ["id"] },
|
||||
{ "attributePatterns": ["default-filterable-features-null"], "features": { "facetSearch": true } },
|
||||
{ "attributePatterns": ["default-filterable-features-equality"], "features": { "facetSearch": true, "filter": {"comparison": true} } },
|
||||
{ "attributePatterns": ["default-filterable-features-comparison"], "features": { "facetSearch": true, "filter": {"equality": true} } },
|
||||
{ "attributePatterns": ["default-filterable-features-empty"], "features": { "facetSearch": true, "filter": {} } },
|
||||
{ "attributePatterns": ["default-facet-search"], "features": { "filter": {"equality": true, "comparison": true} } },
|
||||
] })).await;
|
||||
assert_eq!(code, 202);
|
||||
index.wait_task(response.uid()).await.succeeded();
|
||||
|
||||
let (response, code) = index.settings().await;
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
snapshot!(json_string!(response["filterableAttributes"]), @r###"
|
||||
[
|
||||
{
|
||||
"attributePatterns": [
|
||||
"name"
|
||||
],
|
||||
"features": {
|
||||
"facetSearch": true,
|
||||
"filter": {
|
||||
"equality": true,
|
||||
"comparison": false
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"attributePatterns": [
|
||||
"age"
|
||||
],
|
||||
"features": {
|
||||
"facetSearch": false,
|
||||
"filter": {
|
||||
"equality": true,
|
||||
"comparison": true
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"attributePatterns": [
|
||||
"id"
|
||||
],
|
||||
"features": {
|
||||
"facetSearch": false,
|
||||
"filter": {
|
||||
"equality": true,
|
||||
"comparison": false
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"attributePatterns": [
|
||||
"default-filterable-features-null"
|
||||
],
|
||||
"features": {
|
||||
"facetSearch": true,
|
||||
"filter": {
|
||||
"equality": true,
|
||||
"comparison": false
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"attributePatterns": [
|
||||
"default-filterable-features-equality"
|
||||
],
|
||||
"features": {
|
||||
"facetSearch": true,
|
||||
"filter": {
|
||||
"equality": true,
|
||||
"comparison": true
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"attributePatterns": [
|
||||
"default-filterable-features-comparison"
|
||||
],
|
||||
"features": {
|
||||
"facetSearch": true,
|
||||
"filter": {
|
||||
"equality": true,
|
||||
"comparison": false
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"attributePatterns": [
|
||||
"default-filterable-features-empty"
|
||||
],
|
||||
"features": {
|
||||
"facetSearch": true,
|
||||
"filter": {
|
||||
"equality": true,
|
||||
"comparison": false
|
||||
}
|
||||
}
|
||||
},
|
||||
{
|
||||
"attributePatterns": [
|
||||
"default-facet-search"
|
||||
],
|
||||
"features": {
|
||||
"facetSearch": false,
|
||||
"filter": {
|
||||
"equality": true,
|
||||
"comparison": true
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
@ -452,18 +452,19 @@ async fn filter_invalid_attribute_array() {
|
||||
snapshot!(code, @"202 Accepted");
|
||||
index.wait_task(value.uid()).await.succeeded();
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "Attribute `many` is not filterable. Available filterable attributes are: `title`.\n1:5 many = Glass",
|
||||
"code": "invalid_similar_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||
});
|
||||
index
|
||||
.similar(
|
||||
json!({"id": 287947, "filter": ["many = Glass"], "embedder": "manual"}),
|
||||
|response, code| {
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Attribute `many` is not filterable. Available filterable attribute patterns are: `title`.\n1:5 many = Glass",
|
||||
"code": "invalid_similar_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||
}
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
@ -492,18 +493,19 @@ async fn filter_invalid_attribute_string() {
|
||||
snapshot!(code, @"202 Accepted");
|
||||
index.wait_task(value.uid()).await.succeeded();
|
||||
|
||||
let expected_response = json!({
|
||||
"message": "Attribute `many` is not filterable. Available filterable attributes are: `title`.\n1:5 many = Glass",
|
||||
"code": "invalid_similar_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||
});
|
||||
index
|
||||
.similar(
|
||||
json!({"id": 287947, "filter": "many = Glass", "embedder": "manual"}),
|
||||
|response, code| {
|
||||
assert_eq!(response, expected_response);
|
||||
assert_eq!(code, 400);
|
||||
snapshot!(code, @"400 Bad Request");
|
||||
snapshot!(response, @r###"
|
||||
{
|
||||
"message": "Attribute `many` is not filterable. Available filterable attribute patterns are: `title`.\n1:5 many = Glass",
|
||||
"code": "invalid_similar_filter",
|
||||
"type": "invalid_request",
|
||||
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
|
||||
}
|
||||
"###);
|
||||
},
|
||||
)
|
||||
.await;
|
||||
|
152
crates/milli/src/attribute_patterns.rs
Normal file
152
crates/milli/src/attribute_patterns.rs
Normal file
@ -0,0 +1,152 @@
|
||||
use deserr::Deserr;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use utoipa::ToSchema;
|
||||
|
||||
use crate::is_faceted_by;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, ToSchema)]
|
||||
#[repr(transparent)]
|
||||
#[serde(transparent)]
|
||||
pub struct AttributePatterns {
|
||||
#[schema(example = json!(["title", "overview_*", "release_date"]))]
|
||||
pub patterns: Vec<String>,
|
||||
}
|
||||
|
||||
impl<E: deserr::DeserializeError> Deserr<E> for AttributePatterns {
|
||||
fn deserialize_from_value<V: deserr::IntoValue>(
|
||||
value: deserr::Value<V>,
|
||||
location: deserr::ValuePointerRef,
|
||||
) -> Result<Self, E> {
|
||||
Vec::<String>::deserialize_from_value(value, location).map(|patterns| Self { patterns })
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Vec<String>> for AttributePatterns {
|
||||
fn from(patterns: Vec<String>) -> Self {
|
||||
Self { patterns }
|
||||
}
|
||||
}
|
||||
|
||||
impl AttributePatterns {
|
||||
/// Match a string against the attribute patterns using the match_pattern function.
|
||||
pub fn match_str(&self, str: &str) -> PatternMatch {
|
||||
let mut pattern_match = PatternMatch::NoMatch;
|
||||
for pattern in &self.patterns {
|
||||
match match_pattern(pattern, str) {
|
||||
PatternMatch::Match => return PatternMatch::Match,
|
||||
PatternMatch::Parent => pattern_match = PatternMatch::Parent,
|
||||
PatternMatch::NoMatch => {}
|
||||
}
|
||||
}
|
||||
pattern_match
|
||||
}
|
||||
}
|
||||
|
||||
/// Match a string against a pattern.
|
||||
///
|
||||
/// The pattern can be a wildcard, a prefix, a suffix or an exact match.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `pattern` - The pattern to match against.
|
||||
/// * `str` - The string to match against the pattern.
|
||||
fn match_pattern(pattern: &str, str: &str) -> PatternMatch {
|
||||
// If the pattern is a wildcard, return Match
|
||||
if pattern == "*" {
|
||||
return PatternMatch::Match;
|
||||
} else if pattern.starts_with('*') && pattern.ends_with('*') {
|
||||
// If the pattern starts and ends with a wildcard, return Match if the string contains the pattern without the wildcards
|
||||
if str.contains(&pattern[1..pattern.len() - 1]) {
|
||||
return PatternMatch::Match;
|
||||
}
|
||||
} else if let Some(pattern) = pattern.strip_prefix('*') {
|
||||
// If the pattern starts with a wildcard, return Match if the string ends with the pattern without the wildcard
|
||||
if str.ends_with(pattern) {
|
||||
return PatternMatch::Match;
|
||||
}
|
||||
} else if let Some(pattern) = pattern.strip_suffix('*') {
|
||||
// If the pattern ends with a wildcard, return Match if the string starts with the pattern without the wildcard
|
||||
if str.starts_with(pattern) {
|
||||
return PatternMatch::Match;
|
||||
}
|
||||
} else if pattern == str {
|
||||
// If the pattern is exactly the string, return Match
|
||||
return PatternMatch::Match;
|
||||
}
|
||||
|
||||
// If the field is a parent field of the pattern, return Parent
|
||||
if is_faceted_by(pattern, str) {
|
||||
PatternMatch::Parent
|
||||
} else {
|
||||
PatternMatch::NoMatch
|
||||
}
|
||||
}
|
||||
|
||||
/// Match a field against a pattern using the legacy behavior.
|
||||
///
|
||||
/// A field matches a pattern if it is a parent of the pattern or if it is the pattern itself.
|
||||
/// This behavior is used to match the sortable attributes, the searchable attributes and the filterable attributes rules `Field`.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `pattern` - The pattern to match against.
|
||||
/// * `field` - The field to match against the pattern.
|
||||
pub fn match_field_legacy(pattern: &str, field: &str) -> PatternMatch {
|
||||
if is_faceted_by(field, pattern) {
|
||||
// If the field matches the pattern or is a nested field of the pattern, return Match (legacy behavior)
|
||||
PatternMatch::Match
|
||||
} else if is_faceted_by(pattern, field) {
|
||||
// If the field is a parent field of the pattern, return Parent
|
||||
PatternMatch::Parent
|
||||
} else {
|
||||
// If the field does not match the pattern and is not a parent of a nested field that matches the pattern, return NoMatch
|
||||
PatternMatch::NoMatch
|
||||
}
|
||||
}
|
||||
|
||||
/// Match a field against a distinct field.
|
||||
pub fn match_distinct_field(distinct_field: Option<&str>, field: &str) -> PatternMatch {
|
||||
if let Some(distinct_field) = distinct_field {
|
||||
if field == distinct_field {
|
||||
// If the field matches exactly the distinct field, return Match
|
||||
return PatternMatch::Match;
|
||||
} else if is_faceted_by(distinct_field, field) {
|
||||
// If the field is a parent field of the distinct field, return Parent
|
||||
return PatternMatch::Parent;
|
||||
}
|
||||
}
|
||||
// If the field does not match the distinct field and is not a parent of a nested field that matches the distinct field, return NoMatch
|
||||
PatternMatch::NoMatch
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum PatternMatch {
|
||||
/// The field is a parent of a nested field that matches the pattern
|
||||
/// For example, the field is `toto`, and the pattern is `toto.titi`
|
||||
Parent,
|
||||
/// The field matches the pattern
|
||||
Match,
|
||||
/// The field does not match the pattern
|
||||
NoMatch,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_match_pattern() {
|
||||
assert_eq!(match_pattern("*", "test"), PatternMatch::Match);
|
||||
assert_eq!(match_pattern("test*", "test"), PatternMatch::Match);
|
||||
assert_eq!(match_pattern("test*", "testa"), PatternMatch::Match);
|
||||
assert_eq!(match_pattern("*test", "test"), PatternMatch::Match);
|
||||
assert_eq!(match_pattern("*test", "atest"), PatternMatch::Match);
|
||||
assert_eq!(match_pattern("*test*", "test"), PatternMatch::Match);
|
||||
assert_eq!(match_pattern("*test*", "atesta"), PatternMatch::Match);
|
||||
assert_eq!(match_pattern("*test*", "atest"), PatternMatch::Match);
|
||||
assert_eq!(match_pattern("*test*", "testa"), PatternMatch::Match);
|
||||
assert_eq!(match_pattern("test*test", "test"), PatternMatch::NoMatch);
|
||||
assert_eq!(match_pattern("*test", "testa"), PatternMatch::NoMatch);
|
||||
assert_eq!(match_pattern("test*", "atest"), PatternMatch::NoMatch);
|
||||
}
|
||||
}
|
@ -122,10 +122,10 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
|
||||
and can not be more than 511 bytes.", .document_id.to_string()
|
||||
)]
|
||||
InvalidDocumentId { document_id: Value },
|
||||
#[error("Invalid facet distribution, {}", format_invalid_filter_distribution(.invalid_facets_name, .valid_facets_name))]
|
||||
#[error("Invalid facet distribution, {}", format_invalid_filter_distribution(.invalid_facets_name, .valid_patterns))]
|
||||
InvalidFacetsDistribution {
|
||||
invalid_facets_name: BTreeSet<String>,
|
||||
valid_facets_name: BTreeSet<String>,
|
||||
valid_patterns: BTreeSet<String>,
|
||||
},
|
||||
#[error(transparent)]
|
||||
InvalidGeoField(#[from] GeoError),
|
||||
@ -139,6 +139,13 @@ and can not be more than 511 bytes.", .document_id.to_string()
|
||||
InvalidFilter(String),
|
||||
#[error("Invalid type for filter subexpression: expected: {}, found: {}.", .0.join(", "), .1)]
|
||||
InvalidFilterExpression(&'static [&'static str], Value),
|
||||
#[error("Filter operator `{operator}` is not allowed for the attribute `{field}`.\n - Note: allowed operators: {}.\n - Note: field `{field}` {} in `filterableAttributes`", allowed_operators.join(", "), format!("matched rule #{rule_index}"))]
|
||||
FilterOperatorNotAllowed {
|
||||
field: String,
|
||||
allowed_operators: Vec<String>,
|
||||
operator: String,
|
||||
rule_index: usize,
|
||||
},
|
||||
#[error("Attribute `{}` is not sortable. {}",
|
||||
.field,
|
||||
match .valid_fields.is_empty() {
|
||||
@ -152,28 +159,32 @@ and can not be more than 511 bytes.", .document_id.to_string()
|
||||
InvalidSortableAttribute { field: String, valid_fields: BTreeSet<String>, hidden_fields: bool },
|
||||
#[error("Attribute `{}` is not filterable and thus, cannot be used as distinct attribute. {}",
|
||||
.field,
|
||||
match .valid_fields.is_empty() {
|
||||
match .valid_patterns.is_empty() {
|
||||
true => "This index does not have configured filterable attributes.".to_string(),
|
||||
false => format!("Available filterable attributes are: `{}{}`.",
|
||||
valid_fields.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", "),
|
||||
false => format!("Available filterable attributes patterns are: `{}{}`.",
|
||||
valid_patterns.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", "),
|
||||
.hidden_fields.then_some(", <..hidden-attributes>").unwrap_or(""),
|
||||
),
|
||||
}
|
||||
)]
|
||||
InvalidDistinctAttribute { field: String, valid_fields: BTreeSet<String>, hidden_fields: bool },
|
||||
InvalidDistinctAttribute {
|
||||
field: String,
|
||||
valid_patterns: BTreeSet<String>,
|
||||
hidden_fields: bool,
|
||||
},
|
||||
#[error("Attribute `{}` is not facet-searchable. {}",
|
||||
.field,
|
||||
match .valid_fields.is_empty() {
|
||||
match .valid_patterns.is_empty() {
|
||||
true => "This index does not have configured facet-searchable attributes. To make it facet-searchable add it to the `filterableAttributes` index settings.".to_string(),
|
||||
false => format!("Available facet-searchable attributes are: `{}{}`. To make it facet-searchable add it to the `filterableAttributes` index settings.",
|
||||
valid_fields.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", "),
|
||||
false => format!("Available facet-searchable attributes patterns are: `{}{}`. To make it facet-searchable add it to the `filterableAttributes` index settings.",
|
||||
valid_patterns.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", "),
|
||||
.hidden_fields.then_some(", <..hidden-attributes>").unwrap_or(""),
|
||||
),
|
||||
}
|
||||
)]
|
||||
InvalidFacetSearchFacetName {
|
||||
field: String,
|
||||
valid_fields: BTreeSet<String>,
|
||||
valid_patterns: BTreeSet<String>,
|
||||
hidden_fields: bool,
|
||||
},
|
||||
#[error("Attribute `{}` is not searchable. Available searchable attributes are: `{}{}`.",
|
||||
@ -380,9 +391,9 @@ pub enum GeoError {
|
||||
|
||||
fn format_invalid_filter_distribution(
|
||||
invalid_facets_name: &BTreeSet<String>,
|
||||
valid_facets_name: &BTreeSet<String>,
|
||||
valid_patterns: &BTreeSet<String>,
|
||||
) -> String {
|
||||
if valid_facets_name.is_empty() {
|
||||
if valid_patterns.is_empty() {
|
||||
return "this index does not have configured filterable attributes.".into();
|
||||
}
|
||||
|
||||
@ -404,17 +415,17 @@ fn format_invalid_filter_distribution(
|
||||
.unwrap(),
|
||||
};
|
||||
|
||||
match valid_facets_name.len() {
|
||||
match valid_patterns.len() {
|
||||
1 => write!(
|
||||
result,
|
||||
" The available filterable attribute is `{}`.",
|
||||
valid_facets_name.first().unwrap()
|
||||
" The available filterable attribute pattern is `{}`.",
|
||||
valid_patterns.first().unwrap()
|
||||
)
|
||||
.unwrap(),
|
||||
_ => write!(
|
||||
result,
|
||||
" The available filterable attributes are `{}`.",
|
||||
valid_facets_name.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", ")
|
||||
" The available filterable attribute patterns are `{}`.",
|
||||
valid_patterns.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", ")
|
||||
)
|
||||
.unwrap(),
|
||||
}
|
||||
|
@ -43,11 +43,6 @@ impl FieldidsWeightsMap {
|
||||
self.map.get(&fid).copied()
|
||||
}
|
||||
|
||||
/// Returns highest weight contained in the map if any.
|
||||
pub fn max_weight(&self) -> Option<Weight> {
|
||||
self.map.values().copied().max()
|
||||
}
|
||||
|
||||
/// Return an iterator visiting all field ids in arbitrary order.
|
||||
pub fn ids(&self) -> impl Iterator<Item = FieldId> + '_ {
|
||||
self.map.keys().copied()
|
||||
|
@ -105,6 +105,18 @@ impl<'indexing> GlobalFieldsIdsMap<'indexing> {
|
||||
|
||||
self.local.name(id)
|
||||
}
|
||||
|
||||
/// Get the metadata of a field based on its id.
|
||||
pub fn metadata(&mut self, id: FieldId) -> Option<Metadata> {
|
||||
if self.local.metadata(id).is_none() {
|
||||
let global = self.global.read().unwrap();
|
||||
|
||||
let (name, metadata) = global.name_with_metadata(id)?;
|
||||
self.local.insert(name, id, metadata);
|
||||
}
|
||||
|
||||
self.local.metadata(id)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'indexing> MutFieldIdMapper for GlobalFieldsIdsMap<'indexing> {
|
||||
|
@ -5,14 +5,29 @@ use charabia::Language;
|
||||
use heed::RoTxn;
|
||||
|
||||
use super::FieldsIdsMap;
|
||||
use crate::{FieldId, Index, LocalizedAttributesRule, Result};
|
||||
use crate::attribute_patterns::{match_field_legacy, PatternMatch};
|
||||
use crate::constants::{RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME};
|
||||
use crate::{
|
||||
is_faceted_by, FieldId, FilterableAttributesFeatures, FilterableAttributesRule, Index,
|
||||
LocalizedAttributesRule, Result, Weight,
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct Metadata {
|
||||
pub searchable: bool,
|
||||
pub filterable: bool,
|
||||
/// The weight as defined in the FieldidsWeightsMap of the searchable attribute if it is searchable.
|
||||
pub searchable: Option<Weight>,
|
||||
/// The field is part of the sortable attributes.
|
||||
pub sortable: bool,
|
||||
localized_attributes_rule_id: Option<NonZeroU16>,
|
||||
/// The field is defined as the distinct attribute.
|
||||
pub distinct: bool,
|
||||
/// The field has been defined as asc/desc in the ranking rules.
|
||||
pub asc_desc: bool,
|
||||
/// The field is a geo field (`_geo`, `_geo.lat`, `_geo.lng`).
|
||||
pub geo: bool,
|
||||
/// The id of the localized attributes rule if the field is localized.
|
||||
pub localized_attributes_rule_id: Option<NonZeroU16>,
|
||||
/// The id of the filterable attributes rule if the field is filterable.
|
||||
pub filterable_attributes_rule_id: Option<NonZeroU16>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
@ -106,76 +121,227 @@ impl Metadata {
|
||||
let rule = rules.get((localized_attributes_rule_id - 1) as usize).unwrap();
|
||||
Some(rule.locales())
|
||||
}
|
||||
|
||||
pub fn filterable_attributes<'rules>(
|
||||
&self,
|
||||
rules: &'rules [FilterableAttributesRule],
|
||||
) -> Option<&'rules FilterableAttributesRule> {
|
||||
self.filterable_attributes_with_rule_index(rules).map(|(_, rule)| rule)
|
||||
}
|
||||
|
||||
pub fn filterable_attributes_with_rule_index<'rules>(
|
||||
&self,
|
||||
rules: &'rules [FilterableAttributesRule],
|
||||
) -> Option<(usize, &'rules FilterableAttributesRule)> {
|
||||
let filterable_attributes_rule_id = self.filterable_attributes_rule_id?.get();
|
||||
let rule_id = (filterable_attributes_rule_id - 1) as usize;
|
||||
let rule = rules.get(rule_id).unwrap();
|
||||
Some((rule_id, rule))
|
||||
}
|
||||
|
||||
pub fn filterable_attributes_features(
|
||||
&self,
|
||||
rules: &[FilterableAttributesRule],
|
||||
) -> FilterableAttributesFeatures {
|
||||
let (_, features) = self.filterable_attributes_features_with_rule_index(rules);
|
||||
features
|
||||
}
|
||||
|
||||
pub fn filterable_attributes_features_with_rule_index(
|
||||
&self,
|
||||
rules: &[FilterableAttributesRule],
|
||||
) -> (Option<usize>, FilterableAttributesFeatures) {
|
||||
self.filterable_attributes_with_rule_index(rules)
|
||||
.map(|(rule_index, rule)| (Some(rule_index), rule.features()))
|
||||
// if there is no filterable attributes rule, return no features
|
||||
.unwrap_or_else(|| (None, FilterableAttributesFeatures::no_features()))
|
||||
}
|
||||
|
||||
pub fn is_sortable(&self) -> bool {
|
||||
self.sortable
|
||||
}
|
||||
|
||||
pub fn is_searchable(&self) -> bool {
|
||||
self.searchable.is_some()
|
||||
}
|
||||
|
||||
pub fn searchable_weight(&self) -> Option<Weight> {
|
||||
self.searchable
|
||||
}
|
||||
|
||||
pub fn is_distinct(&self) -> bool {
|
||||
self.distinct
|
||||
}
|
||||
|
||||
pub fn is_asc_desc(&self) -> bool {
|
||||
self.asc_desc
|
||||
}
|
||||
|
||||
pub fn is_geo(&self) -> bool {
|
||||
self.geo
|
||||
}
|
||||
|
||||
/// Returns `true` if the field is part of the facet databases. (sortable, distinct, asc_desc, filterable or facet searchable)
|
||||
pub fn is_faceted(&self, rules: &[FilterableAttributesRule]) -> bool {
|
||||
if self.is_distinct() || self.is_sortable() || self.is_asc_desc() {
|
||||
return true;
|
||||
}
|
||||
|
||||
let features = self.filterable_attributes_features(rules);
|
||||
if features.is_filterable() || features.is_facet_searchable() {
|
||||
return true;
|
||||
}
|
||||
|
||||
false
|
||||
}
|
||||
|
||||
pub fn require_facet_level_database(&self, rules: &[FilterableAttributesRule]) -> bool {
|
||||
let features = self.filterable_attributes_features(rules);
|
||||
|
||||
self.is_sortable() || self.is_asc_desc() || features.is_filterable_comparison()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct MetadataBuilder {
|
||||
searchable_attributes: Vec<String>,
|
||||
filterable_attributes: HashSet<String>,
|
||||
searchable_attributes: Option<Vec<String>>,
|
||||
filterable_attributes: Vec<FilterableAttributesRule>,
|
||||
sortable_attributes: HashSet<String>,
|
||||
localized_attributes: Option<Vec<LocalizedAttributesRule>>,
|
||||
distinct_attribute: Option<String>,
|
||||
asc_desc_attributes: HashSet<String>,
|
||||
}
|
||||
|
||||
impl MetadataBuilder {
|
||||
pub fn from_index(index: &Index, rtxn: &RoTxn) -> Result<Self> {
|
||||
let searchable_attributes =
|
||||
index.searchable_fields(rtxn)?.into_iter().map(|s| s.to_string()).collect();
|
||||
let filterable_attributes = index.filterable_fields(rtxn)?;
|
||||
let searchable_attributes = index
|
||||
.user_defined_searchable_fields(rtxn)?
|
||||
.map(|fields| fields.into_iter().map(|s| s.to_string()).collect());
|
||||
let filterable_attributes = index.filterable_attributes_rules(rtxn)?;
|
||||
let sortable_attributes = index.sortable_fields(rtxn)?;
|
||||
let localized_attributes = index.localized_attributes_rules(rtxn)?;
|
||||
let distinct_attribute = index.distinct_field(rtxn)?.map(|s| s.to_string());
|
||||
let asc_desc_attributes = index.asc_desc_fields(rtxn)?;
|
||||
|
||||
Ok(Self {
|
||||
Ok(Self::new(
|
||||
searchable_attributes,
|
||||
filterable_attributes,
|
||||
sortable_attributes,
|
||||
localized_attributes,
|
||||
})
|
||||
distinct_attribute,
|
||||
asc_desc_attributes,
|
||||
))
|
||||
}
|
||||
|
||||
/// Build a new `MetadataBuilder` from the given parameters.
|
||||
///
|
||||
/// This is used for testing, prefer using `MetadataBuilder::from_index` instead.
|
||||
pub fn new(
|
||||
searchable_attributes: Vec<String>,
|
||||
filterable_attributes: HashSet<String>,
|
||||
searchable_attributes: Option<Vec<String>>,
|
||||
filterable_attributes: Vec<FilterableAttributesRule>,
|
||||
sortable_attributes: HashSet<String>,
|
||||
localized_attributes: Option<Vec<LocalizedAttributesRule>>,
|
||||
distinct_attribute: Option<String>,
|
||||
asc_desc_attributes: HashSet<String>,
|
||||
) -> Self {
|
||||
let searchable_attributes = match searchable_attributes {
|
||||
Some(fields) if fields.iter().any(|f| f == "*") => None,
|
||||
Some(fields) => Some(fields),
|
||||
None => None,
|
||||
};
|
||||
|
||||
Self {
|
||||
searchable_attributes,
|
||||
filterable_attributes,
|
||||
sortable_attributes,
|
||||
localized_attributes,
|
||||
distinct_attribute,
|
||||
asc_desc_attributes,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn metadata_for_field(&self, field: &str) -> Metadata {
|
||||
let searchable = self
|
||||
.searchable_attributes
|
||||
if is_faceted_by(field, RESERVED_VECTORS_FIELD_NAME) {
|
||||
// Vectors fields are not searchable, filterable, distinct or asc_desc
|
||||
return Metadata {
|
||||
searchable: None,
|
||||
sortable: false,
|
||||
distinct: false,
|
||||
asc_desc: false,
|
||||
geo: false,
|
||||
localized_attributes_rule_id: None,
|
||||
filterable_attributes_rule_id: None,
|
||||
};
|
||||
}
|
||||
|
||||
// A field is sortable if it is faceted by a sortable attribute
|
||||
let sortable = self
|
||||
.sortable_attributes
|
||||
.iter()
|
||||
.any(|attribute| attribute == "*" || attribute == field);
|
||||
.any(|pattern| match_field_legacy(pattern, field) == PatternMatch::Match);
|
||||
|
||||
let filterable = self.filterable_attributes.contains(field);
|
||||
let filterable_attributes_rule_id = self
|
||||
.filterable_attributes
|
||||
.iter()
|
||||
.position(|attribute| attribute.match_str(field) == PatternMatch::Match)
|
||||
// saturating_add(1): make `id` `NonZero`
|
||||
.map(|id| NonZeroU16::new(id.saturating_add(1).try_into().unwrap()).unwrap());
|
||||
|
||||
let sortable = self.sortable_attributes.contains(field);
|
||||
if match_field_legacy(RESERVED_GEO_FIELD_NAME, field) == PatternMatch::Match {
|
||||
// Geo fields are not searchable, distinct or asc_desc
|
||||
return Metadata {
|
||||
searchable: None,
|
||||
sortable,
|
||||
distinct: false,
|
||||
asc_desc: false,
|
||||
geo: true,
|
||||
localized_attributes_rule_id: None,
|
||||
filterable_attributes_rule_id,
|
||||
};
|
||||
}
|
||||
|
||||
let searchable = match &self.searchable_attributes {
|
||||
// A field is searchable if it is faceted by a searchable attribute
|
||||
Some(attributes) => attributes
|
||||
.iter()
|
||||
.enumerate()
|
||||
.find(|(_i, pattern)| is_faceted_by(field, pattern))
|
||||
.map(|(i, _)| i as u16),
|
||||
None => Some(0),
|
||||
};
|
||||
|
||||
let distinct =
|
||||
self.distinct_attribute.as_ref().is_some_and(|distinct_field| field == distinct_field);
|
||||
let asc_desc = self.asc_desc_attributes.contains(field);
|
||||
|
||||
let localized_attributes_rule_id = self
|
||||
.localized_attributes
|
||||
.iter()
|
||||
.flat_map(|v| v.iter())
|
||||
.position(|rule| rule.match_str(field))
|
||||
.position(|rule| rule.match_str(field) == PatternMatch::Match)
|
||||
// saturating_add(1): make `id` `NonZero`
|
||||
.map(|id| NonZeroU16::new(id.saturating_add(1).try_into().unwrap()).unwrap());
|
||||
|
||||
Metadata { searchable, filterable, sortable, localized_attributes_rule_id }
|
||||
Metadata {
|
||||
searchable,
|
||||
sortable,
|
||||
distinct,
|
||||
asc_desc,
|
||||
geo: false,
|
||||
localized_attributes_rule_id,
|
||||
filterable_attributes_rule_id,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn searchable_attributes(&self) -> &[String] {
|
||||
self.searchable_attributes.as_slice()
|
||||
pub fn searchable_attributes(&self) -> Option<&[String]> {
|
||||
self.searchable_attributes.as_deref()
|
||||
}
|
||||
|
||||
pub fn sortable_attributes(&self) -> &HashSet<String> {
|
||||
&self.sortable_attributes
|
||||
}
|
||||
|
||||
pub fn filterable_attributes(&self) -> &HashSet<String> {
|
||||
pub fn filterable_attributes(&self) -> &[FilterableAttributesRule] {
|
||||
&self.filterable_attributes
|
||||
}
|
||||
|
||||
|
368
crates/milli/src/filterable_attributes_rules.rs
Normal file
368
crates/milli/src/filterable_attributes_rules.rs
Normal file
@ -0,0 +1,368 @@
|
||||
use deserr::{DeserializeError, Deserr, ValuePointerRef};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::{BTreeSet, HashSet};
|
||||
use utoipa::ToSchema;
|
||||
|
||||
use crate::{
|
||||
attribute_patterns::{match_distinct_field, match_field_legacy, PatternMatch},
|
||||
constants::RESERVED_GEO_FIELD_NAME,
|
||||
AttributePatterns,
|
||||
};
|
||||
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug, ToSchema)]
|
||||
#[serde(untagged)]
|
||||
pub enum FilterableAttributesRule {
|
||||
Field(String),
|
||||
Pattern(FilterableAttributesPatterns),
|
||||
}
|
||||
|
||||
impl FilterableAttributesRule {
|
||||
/// Match a field against the filterable attributes rule.
|
||||
pub fn match_str(&self, field: &str) -> PatternMatch {
|
||||
match self {
|
||||
// If the rule is a field, match the field against the pattern using the legacy behavior
|
||||
FilterableAttributesRule::Field(pattern) => match_field_legacy(pattern, field),
|
||||
// If the rule is a pattern, match the field against the pattern using the new behavior
|
||||
FilterableAttributesRule::Pattern(patterns) => patterns.match_str(field),
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if the rule is a geo field.
|
||||
///
|
||||
/// prefer using `index.is_geo_enabled`, `index.is_geo_filtering_enabled` or `index.is_geo_sorting_enabled`
|
||||
/// to check if the geo feature is enabled.
|
||||
pub fn has_geo(&self) -> bool {
|
||||
matches!(self, FilterableAttributesRule::Field(field_name) if field_name == RESERVED_GEO_FIELD_NAME)
|
||||
}
|
||||
|
||||
/// Get the features of the rule.
|
||||
pub fn features(&self) -> FilterableAttributesFeatures {
|
||||
match self {
|
||||
// If the rule is a field, return the legacy default features
|
||||
FilterableAttributesRule::Field(_) => FilterableAttributesFeatures::legacy_default(),
|
||||
// If the rule is a pattern, return the features of the pattern
|
||||
FilterableAttributesRule::Pattern(patterns) => patterns.features(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Debug, Deserr, ToSchema)]
|
||||
#[serde(deny_unknown_fields, rename_all = "camelCase")]
|
||||
#[deserr(rename_all = camelCase, deny_unknown_fields)]
|
||||
pub struct FilterableAttributesPatterns {
|
||||
pub attribute_patterns: AttributePatterns,
|
||||
#[serde(default)]
|
||||
#[deserr(default)]
|
||||
pub features: FilterableAttributesFeatures,
|
||||
}
|
||||
|
||||
impl FilterableAttributesPatterns {
|
||||
pub fn match_str(&self, field: &str) -> PatternMatch {
|
||||
self.attribute_patterns.match_str(field)
|
||||
}
|
||||
|
||||
pub fn features(&self) -> FilterableAttributesFeatures {
|
||||
self.features
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Copy, Debug, Deserr, ToSchema)]
|
||||
#[serde(deny_unknown_fields, rename_all = "camelCase")]
|
||||
#[deserr(rename_all = camelCase, deny_unknown_fields)]
|
||||
#[derive(Default)]
|
||||
pub struct FilterableAttributesFeatures {
|
||||
#[serde(default)]
|
||||
#[deserr(default)]
|
||||
facet_search: bool,
|
||||
#[serde(default)]
|
||||
#[deserr(default)]
|
||||
filter: FilterFeatures,
|
||||
}
|
||||
|
||||
impl FilterableAttributesFeatures {
|
||||
/// Create a new `FilterableAttributesFeatures` with the legacy default features.
|
||||
///
|
||||
/// This is the default behavior for `FilterableAttributesRule::Field`.
|
||||
/// This will set the facet search to true and activate all the filter operators.
|
||||
pub fn legacy_default() -> Self {
|
||||
Self { facet_search: true, filter: FilterFeatures::legacy_default() }
|
||||
}
|
||||
|
||||
/// Create a new `FilterableAttributesFeatures` with no features.
|
||||
pub fn no_features() -> Self {
|
||||
Self { facet_search: false, filter: FilterFeatures::no_features() }
|
||||
}
|
||||
|
||||
pub fn is_filterable(&self) -> bool {
|
||||
self.filter.is_filterable()
|
||||
}
|
||||
|
||||
/// Check if `IS EMPTY` is allowed
|
||||
pub fn is_filterable_empty(&self) -> bool {
|
||||
self.filter.is_filterable_empty()
|
||||
}
|
||||
|
||||
/// Check if `=` and `IN` are allowed
|
||||
pub fn is_filterable_equality(&self) -> bool {
|
||||
self.filter.is_filterable_equality()
|
||||
}
|
||||
|
||||
/// Check if `IS NULL` is allowed
|
||||
pub fn is_filterable_null(&self) -> bool {
|
||||
self.filter.is_filterable_null()
|
||||
}
|
||||
|
||||
/// Check if `IS EXISTS` is allowed
|
||||
pub fn is_filterable_exists(&self) -> bool {
|
||||
self.filter.is_filterable_exists()
|
||||
}
|
||||
|
||||
/// Check if `<`, `>`, `<=`, `>=` or `TO` are allowed
|
||||
pub fn is_filterable_comparison(&self) -> bool {
|
||||
self.filter.is_filterable_comparison()
|
||||
}
|
||||
|
||||
/// Check if the facet search is allowed
|
||||
pub fn is_facet_searchable(&self) -> bool {
|
||||
self.facet_search
|
||||
}
|
||||
|
||||
pub fn allowed_filter_operators(&self) -> Vec<String> {
|
||||
self.filter.allowed_operators()
|
||||
}
|
||||
}
|
||||
|
||||
impl<E: DeserializeError> Deserr<E> for FilterableAttributesRule {
|
||||
fn deserialize_from_value<V: deserr::IntoValue>(
|
||||
value: deserr::Value<V>,
|
||||
location: ValuePointerRef,
|
||||
) -> Result<Self, E> {
|
||||
if value.kind() == deserr::ValueKind::Map {
|
||||
Ok(Self::Pattern(FilterableAttributesPatterns::deserialize_from_value(
|
||||
value, location,
|
||||
)?))
|
||||
} else {
|
||||
Ok(Self::Field(String::deserialize_from_value(value, location)?))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize, PartialEq, Eq, Clone, Copy, Debug, Deserr, ToSchema)]
|
||||
#[serde(deny_unknown_fields, rename_all = "camelCase")]
|
||||
#[deserr(rename_all = camelCase, deny_unknown_fields)]
|
||||
pub struct FilterFeatures {
|
||||
#[serde(default = "default_true")]
|
||||
#[deserr(default = true)]
|
||||
equality: bool,
|
||||
#[serde(default)]
|
||||
#[deserr(default)]
|
||||
comparison: bool,
|
||||
}
|
||||
|
||||
fn default_true() -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
impl FilterFeatures {
|
||||
/// Get the allowed operators for the filter.
|
||||
pub fn allowed_operators(&self) -> Vec<String> {
|
||||
if !self.is_filterable() {
|
||||
return vec![];
|
||||
}
|
||||
|
||||
let mut operators = vec!["OR", "AND", "NOT"];
|
||||
if self.is_filterable_equality() {
|
||||
operators.extend_from_slice(&["=", "!=", "IN"]);
|
||||
}
|
||||
if self.is_filterable_comparison() {
|
||||
operators.extend_from_slice(&["<", ">", "<=", ">=", "TO"]);
|
||||
}
|
||||
if self.is_filterable_empty() {
|
||||
operators.push("IS EMPTY");
|
||||
}
|
||||
if self.is_filterable_null() {
|
||||
operators.push("IS NULL");
|
||||
}
|
||||
if self.is_filterable_exists() {
|
||||
operators.push("EXISTS");
|
||||
}
|
||||
|
||||
operators.into_iter().map(String::from).collect()
|
||||
}
|
||||
|
||||
pub fn is_filterable(&self) -> bool {
|
||||
self.equality || self.comparison
|
||||
}
|
||||
|
||||
pub fn is_filterable_equality(&self) -> bool {
|
||||
self.equality
|
||||
}
|
||||
|
||||
/// Check if `<`, `>`, `<=`, `>=` or `TO` are allowed
|
||||
pub fn is_filterable_comparison(&self) -> bool {
|
||||
self.comparison
|
||||
}
|
||||
|
||||
/// Check if `IS EMPTY` is allowed
|
||||
pub fn is_filterable_empty(&self) -> bool {
|
||||
self.is_filterable()
|
||||
}
|
||||
|
||||
/// Check if `IS EXISTS` is allowed
|
||||
pub fn is_filterable_exists(&self) -> bool {
|
||||
self.is_filterable()
|
||||
}
|
||||
|
||||
/// Check if `IS NULL` is allowed
|
||||
pub fn is_filterable_null(&self) -> bool {
|
||||
self.is_filterable()
|
||||
}
|
||||
|
||||
/// Create a new `FilterFeatures` with the legacy default features.
|
||||
///
|
||||
/// This is the default behavior for `FilterableAttributesRule::Field`.
|
||||
/// This will set the equality and comparison to true.
|
||||
pub fn legacy_default() -> Self {
|
||||
Self { equality: true, comparison: true }
|
||||
}
|
||||
|
||||
/// Create a new `FilterFeatures` with no features.
|
||||
pub fn no_features() -> Self {
|
||||
Self { equality: false, comparison: false }
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for FilterFeatures {
|
||||
fn default() -> Self {
|
||||
Self { equality: true, comparison: false }
|
||||
}
|
||||
}
|
||||
|
||||
/// Match a field against a set of filterable attributes rules.
|
||||
///
|
||||
/// This function will return the set of patterns that match the given filter.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `filterable_attributes` - The set of filterable attributes rules to match against.
|
||||
/// * `filter` - The filter function to apply to the filterable attributes rules.
|
||||
pub fn filtered_matching_patterns<'patterns>(
|
||||
filterable_attributes: &'patterns [FilterableAttributesRule],
|
||||
filter: &impl Fn(FilterableAttributesFeatures) -> bool,
|
||||
) -> BTreeSet<&'patterns str> {
|
||||
let mut result = BTreeSet::new();
|
||||
|
||||
for rule in filterable_attributes {
|
||||
if filter(rule.features()) {
|
||||
match rule {
|
||||
FilterableAttributesRule::Field(field) => {
|
||||
result.insert(field.as_str());
|
||||
}
|
||||
FilterableAttributesRule::Pattern(patterns) => {
|
||||
patterns.attribute_patterns.patterns.iter().for_each(|pattern| {
|
||||
result.insert(pattern);
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
/// Match a field against a set of filterable attributes rules.
|
||||
///
|
||||
/// This function will return the features that match the given field name.
|
||||
///
|
||||
/// # Arguments
|
||||
///
|
||||
/// * `field_name` - The field name to match against.
|
||||
/// * `filterable_attributes` - The set of filterable attributes rules to match against.
|
||||
///
|
||||
/// # Returns
|
||||
///
|
||||
/// * `Some((rule_index, features))` - The features of the matching rule and the index of the rule in the `filterable_attributes` array.
|
||||
/// * `None` - No matching rule was found.
|
||||
pub fn matching_features(
|
||||
field_name: &str,
|
||||
filterable_attributes: &[FilterableAttributesRule],
|
||||
) -> Option<(usize, FilterableAttributesFeatures)> {
|
||||
for (id, filterable_attribute) in filterable_attributes.iter().enumerate() {
|
||||
if filterable_attribute.match_str(field_name) == PatternMatch::Match {
|
||||
return Some((id, filterable_attribute.features()));
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Match a field against a set of filterable, facet searchable fields, distinct field, sortable fields, and asc_desc fields.
|
||||
pub fn match_faceted_field(
|
||||
field_name: &str,
|
||||
filterable_fields: &[FilterableAttributesRule],
|
||||
sortable_fields: &HashSet<String>,
|
||||
asc_desc_fields: &HashSet<String>,
|
||||
distinct_field: &Option<String>,
|
||||
) -> PatternMatch {
|
||||
// Check if the field matches any filterable or facet searchable field
|
||||
let mut selection = match_pattern_by_features(field_name, filterable_fields, &|features| {
|
||||
features.is_facet_searchable() || features.is_filterable()
|
||||
});
|
||||
|
||||
// If the field matches the pattern, return Match
|
||||
if selection == PatternMatch::Match {
|
||||
return selection;
|
||||
}
|
||||
|
||||
match match_distinct_field(distinct_field.as_deref(), field_name) {
|
||||
PatternMatch::Match => return PatternMatch::Match,
|
||||
PatternMatch::Parent => selection = PatternMatch::Parent,
|
||||
PatternMatch::NoMatch => (),
|
||||
}
|
||||
|
||||
// Otherwise, check if the field matches any sortable/asc_desc field
|
||||
for pattern in sortable_fields.iter().chain(asc_desc_fields.iter()) {
|
||||
match match_field_legacy(pattern, field_name) {
|
||||
PatternMatch::Match => return PatternMatch::Match,
|
||||
PatternMatch::Parent => selection = PatternMatch::Parent,
|
||||
PatternMatch::NoMatch => (),
|
||||
}
|
||||
}
|
||||
|
||||
selection
|
||||
}
|
||||
|
||||
fn match_pattern_by_features(
|
||||
field_name: &str,
|
||||
filterable_attributes: &[FilterableAttributesRule],
|
||||
filter: &impl Fn(FilterableAttributesFeatures) -> bool,
|
||||
) -> PatternMatch {
|
||||
let mut selection = PatternMatch::NoMatch;
|
||||
|
||||
// `can_match` becomes false if the field name matches (PatternMatch::Match) any pattern that is not facet searchable or filterable,
|
||||
// this ensures that the field doesn't match a pattern with a lower priority, however it can still match a pattern for a nested field as a parent (PatternMatch::Parent).
|
||||
// See the test `search::filters::test_filterable_attributes_priority` for more details.
|
||||
let mut can_match = true;
|
||||
|
||||
// Check if the field name matches any pattern that is facet searchable or filterable
|
||||
for pattern in filterable_attributes {
|
||||
match pattern.match_str(field_name) {
|
||||
PatternMatch::Match => {
|
||||
let features = pattern.features();
|
||||
if filter(features) && can_match {
|
||||
return PatternMatch::Match;
|
||||
} else {
|
||||
can_match = false;
|
||||
}
|
||||
}
|
||||
PatternMatch::Parent => {
|
||||
let features = pattern.features();
|
||||
if filter(features) {
|
||||
selection = PatternMatch::Parent;
|
||||
}
|
||||
}
|
||||
PatternMatch::NoMatch => (),
|
||||
}
|
||||
}
|
||||
|
||||
selection
|
||||
}
|
@ -1,6 +1,5 @@
|
||||
use std::borrow::Cow;
|
||||
use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
|
||||
use std::convert::TryInto;
|
||||
use std::fs::File;
|
||||
use std::path::Path;
|
||||
|
||||
@ -10,10 +9,11 @@ use roaring::RoaringBitmap;
|
||||
use rstar::RTree;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::constants::{self, RESERVED_VECTORS_FIELD_NAME};
|
||||
use crate::constants::{self, RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME};
|
||||
use crate::database_stats::DatabaseStats;
|
||||
use crate::documents::PrimaryKey;
|
||||
use crate::error::{InternalError, UserError};
|
||||
use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
|
||||
use crate::fields_ids_map::FieldsIdsMap;
|
||||
use crate::heed_codec::facet::{
|
||||
FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec,
|
||||
@ -27,8 +27,9 @@ use crate::vector::{ArroyStats, ArroyWrapper, Embedding, EmbeddingConfig};
|
||||
use crate::{
|
||||
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
|
||||
FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec,
|
||||
FieldidsWeightsMap, GeoPoint, LocalizedAttributesRule, ObkvCodec, Result, RoaringBitmapCodec,
|
||||
RoaringBitmapLenCodec, Search, U8StrStrCodec, Weight, BEU16, BEU32, BEU64,
|
||||
FieldidsWeightsMap, FilterableAttributesRule, GeoPoint, LocalizedAttributesRule, ObkvCodec,
|
||||
Result, RoaringBitmapCodec, RoaringBitmapLenCodec, Search, U8StrStrCodec, Weight, BEU16, BEU32,
|
||||
BEU64,
|
||||
};
|
||||
|
||||
pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5;
|
||||
@ -513,6 +514,16 @@ impl Index {
|
||||
.unwrap_or_default())
|
||||
}
|
||||
|
||||
/// Returns the fields ids map with metadata.
|
||||
///
|
||||
/// This structure is not yet stored in the index, and is generated on the fly.
|
||||
pub fn fields_ids_map_with_metadata(&self, rtxn: &RoTxn<'_>) -> Result<FieldIdMapWithMetadata> {
|
||||
Ok(FieldIdMapWithMetadata::new(
|
||||
self.fields_ids_map(rtxn)?,
|
||||
MetadataBuilder::from_index(self, rtxn)?,
|
||||
))
|
||||
}
|
||||
|
||||
/* fieldids weights map */
|
||||
// This maps the fields ids to their weights.
|
||||
// Their weights is defined by the ordering of the searchable attributes.
|
||||
@ -548,6 +559,17 @@ impl Index {
|
||||
self.main.remap_key_type::<Str>().delete(wtxn, main_key::FIELDIDS_WEIGHTS_MAP_KEY)
|
||||
}
|
||||
|
||||
pub fn max_searchable_attribute_weight(&self, rtxn: &RoTxn<'_>) -> Result<Option<Weight>> {
|
||||
let user_defined_searchable_fields = self.user_defined_searchable_fields(rtxn)?;
|
||||
if let Some(user_defined_searchable_fields) = user_defined_searchable_fields {
|
||||
if !user_defined_searchable_fields.contains(&"*") {
|
||||
return Ok(Some(user_defined_searchable_fields.len().saturating_sub(1) as Weight));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(None)
|
||||
}
|
||||
|
||||
pub fn searchable_fields_and_weights<'a>(
|
||||
&self,
|
||||
rtxn: &'a RoTxn<'a>,
|
||||
@ -738,8 +760,7 @@ impl Index {
|
||||
&self,
|
||||
wtxn: &mut RwTxn<'_>,
|
||||
user_fields: &[&str],
|
||||
non_searchable_fields_ids: &[FieldId],
|
||||
fields_ids_map: &FieldsIdsMap,
|
||||
fields_ids_map: &FieldIdMapWithMetadata,
|
||||
) -> Result<()> {
|
||||
// We can write the user defined searchable fields as-is.
|
||||
self.put_user_defined_searchable_fields(wtxn, user_fields)?;
|
||||
@ -747,29 +768,17 @@ impl Index {
|
||||
let mut weights = FieldidsWeightsMap::default();
|
||||
|
||||
// Now we generate the real searchable fields:
|
||||
// 1. Take the user defined searchable fields as-is to keep the priority defined by the attributes criterion.
|
||||
// 2. Iterate over the user defined searchable fields.
|
||||
// 3. If a user defined field is a subset of a field defined in the fields_ids_map
|
||||
// (ie doggo.name is a subset of doggo) right after doggo and with the same weight.
|
||||
let mut real_fields = Vec::new();
|
||||
|
||||
for (id, field_from_map) in fields_ids_map.iter() {
|
||||
for (weight, user_field) in user_fields.iter().enumerate() {
|
||||
if crate::is_faceted_by(field_from_map, user_field)
|
||||
&& !real_fields.contains(&field_from_map)
|
||||
&& !non_searchable_fields_ids.contains(&id)
|
||||
{
|
||||
real_fields.push(field_from_map);
|
||||
|
||||
let weight: u16 =
|
||||
weight.try_into().map_err(|_| UserError::AttributeLimitReached)?;
|
||||
weights.insert(id, weight);
|
||||
}
|
||||
for (id, field_from_map, metadata) in fields_ids_map.iter() {
|
||||
if let Some(weight) = metadata.searchable_weight() {
|
||||
real_fields.push(field_from_map);
|
||||
weights.insert(id, weight);
|
||||
}
|
||||
}
|
||||
|
||||
self.put_searchable_fields(wtxn, &real_fields)?;
|
||||
self.put_fieldids_weights_map(wtxn, &weights)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@ -876,26 +885,32 @@ impl Index {
|
||||
|
||||
/* filterable fields */
|
||||
|
||||
/// Writes the filterable fields names in the database.
|
||||
pub(crate) fn put_filterable_fields(
|
||||
/// Writes the filterable attributes rules in the database.
|
||||
pub(crate) fn put_filterable_attributes_rules(
|
||||
&self,
|
||||
wtxn: &mut RwTxn<'_>,
|
||||
fields: &HashSet<String>,
|
||||
fields: &[FilterableAttributesRule],
|
||||
) -> heed::Result<()> {
|
||||
self.main.remap_types::<Str, SerdeJson<_>>().put(
|
||||
wtxn,
|
||||
main_key::FILTERABLE_FIELDS_KEY,
|
||||
fields,
|
||||
&fields,
|
||||
)
|
||||
}
|
||||
|
||||
/// Deletes the filterable fields ids in the database.
|
||||
pub(crate) fn delete_filterable_fields(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
|
||||
/// Deletes the filterable attributes rules in the database.
|
||||
pub(crate) fn delete_filterable_attributes_rules(
|
||||
&self,
|
||||
wtxn: &mut RwTxn<'_>,
|
||||
) -> heed::Result<bool> {
|
||||
self.main.remap_key_type::<Str>().delete(wtxn, main_key::FILTERABLE_FIELDS_KEY)
|
||||
}
|
||||
|
||||
/// Returns the filterable fields names.
|
||||
pub fn filterable_fields(&self, rtxn: &RoTxn<'_>) -> heed::Result<HashSet<String>> {
|
||||
/// Returns the filterable attributes rules.
|
||||
pub fn filterable_attributes_rules(
|
||||
&self,
|
||||
rtxn: &RoTxn<'_>,
|
||||
) -> heed::Result<Vec<FilterableAttributesRule>> {
|
||||
Ok(self
|
||||
.main
|
||||
.remap_types::<Str, SerdeJson<_>>()
|
||||
@ -903,21 +918,6 @@ impl Index {
|
||||
.unwrap_or_default())
|
||||
}
|
||||
|
||||
/// Identical to `filterable_fields`, but returns ids instead.
|
||||
pub fn filterable_fields_ids(&self, rtxn: &RoTxn<'_>) -> Result<HashSet<FieldId>> {
|
||||
let fields = self.filterable_fields(rtxn)?;
|
||||
let fields_ids_map = self.fields_ids_map(rtxn)?;
|
||||
|
||||
let mut fields_ids = HashSet::new();
|
||||
for name in fields {
|
||||
if let Some(field_id) = fields_ids_map.id(&name) {
|
||||
fields_ids.insert(field_id);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(fields_ids)
|
||||
}
|
||||
|
||||
/* sortable fields */
|
||||
|
||||
/// Writes the sortable fields names in the database.
|
||||
@ -954,83 +954,37 @@ impl Index {
|
||||
Ok(fields.into_iter().filter_map(|name| fields_ids_map.id(&name)).collect())
|
||||
}
|
||||
|
||||
/* faceted fields */
|
||||
|
||||
/// Writes the faceted fields in the database.
|
||||
pub(crate) fn put_faceted_fields(
|
||||
&self,
|
||||
wtxn: &mut RwTxn<'_>,
|
||||
fields: &HashSet<String>,
|
||||
) -> heed::Result<()> {
|
||||
self.main.remap_types::<Str, SerdeJson<_>>().put(
|
||||
wtxn,
|
||||
main_key::HIDDEN_FACETED_FIELDS_KEY,
|
||||
fields,
|
||||
)
|
||||
/// Returns true if the geo feature is enabled.
|
||||
pub fn is_geo_enabled(&self, rtxn: &RoTxn<'_>) -> Result<bool> {
|
||||
let geo_filter = self.is_geo_filtering_enabled(rtxn)?;
|
||||
let geo_sortable = self.is_geo_sorting_enabled(rtxn)?;
|
||||
Ok(geo_filter || geo_sortable)
|
||||
}
|
||||
|
||||
/// Returns the faceted fields names.
|
||||
pub fn faceted_fields(&self, rtxn: &RoTxn<'_>) -> heed::Result<HashSet<String>> {
|
||||
Ok(self
|
||||
.main
|
||||
.remap_types::<Str, SerdeJson<_>>()
|
||||
.get(rtxn, main_key::HIDDEN_FACETED_FIELDS_KEY)?
|
||||
.unwrap_or_default())
|
||||
/// Returns true if the geo sorting feature is enabled.
|
||||
pub fn is_geo_sorting_enabled(&self, rtxn: &RoTxn<'_>) -> Result<bool> {
|
||||
let geo_sortable = self.sortable_fields(rtxn)?.contains(RESERVED_GEO_FIELD_NAME);
|
||||
Ok(geo_sortable)
|
||||
}
|
||||
|
||||
/// Identical to `faceted_fields`, but returns ids instead.
|
||||
pub fn faceted_fields_ids(&self, rtxn: &RoTxn<'_>) -> Result<HashSet<FieldId>> {
|
||||
let fields = self.faceted_fields(rtxn)?;
|
||||
let fields_ids_map = self.fields_ids_map(rtxn)?;
|
||||
|
||||
let mut fields_ids = HashSet::new();
|
||||
for name in fields {
|
||||
if let Some(field_id) = fields_ids_map.id(&name) {
|
||||
fields_ids.insert(field_id);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(fields_ids)
|
||||
/// Returns true if the geo filtering feature is enabled.
|
||||
pub fn is_geo_filtering_enabled(&self, rtxn: &RoTxn<'_>) -> Result<bool> {
|
||||
let geo_filter =
|
||||
self.filterable_attributes_rules(rtxn)?.iter().any(|field| field.has_geo());
|
||||
Ok(geo_filter)
|
||||
}
|
||||
|
||||
/* faceted documents ids */
|
||||
|
||||
/// Returns the user defined faceted fields names.
|
||||
///
|
||||
/// The user faceted fields are the union of all the filterable, sortable, distinct, and Asc/Desc fields.
|
||||
pub fn user_defined_faceted_fields(&self, rtxn: &RoTxn<'_>) -> Result<HashSet<String>> {
|
||||
let filterable_fields = self.filterable_fields(rtxn)?;
|
||||
let sortable_fields = self.sortable_fields(rtxn)?;
|
||||
let distinct_field = self.distinct_field(rtxn)?;
|
||||
let asc_desc_fields =
|
||||
self.criteria(rtxn)?.into_iter().filter_map(|criterion| match criterion {
|
||||
pub fn asc_desc_fields(&self, rtxn: &RoTxn<'_>) -> Result<HashSet<String>> {
|
||||
let asc_desc_fields = self
|
||||
.criteria(rtxn)?
|
||||
.into_iter()
|
||||
.filter_map(|criterion| match criterion {
|
||||
Criterion::Asc(field) | Criterion::Desc(field) => Some(field),
|
||||
_otherwise => None,
|
||||
});
|
||||
})
|
||||
.collect();
|
||||
|
||||
let mut faceted_fields = filterable_fields;
|
||||
faceted_fields.extend(sortable_fields);
|
||||
faceted_fields.extend(asc_desc_fields);
|
||||
if let Some(field) = distinct_field {
|
||||
faceted_fields.insert(field.to_owned());
|
||||
}
|
||||
|
||||
Ok(faceted_fields)
|
||||
}
|
||||
|
||||
/// Identical to `user_defined_faceted_fields`, but returns ids instead.
|
||||
pub fn user_defined_faceted_fields_ids(&self, rtxn: &RoTxn<'_>) -> Result<HashSet<FieldId>> {
|
||||
let fields = self.user_defined_faceted_fields(rtxn)?;
|
||||
let fields_ids_map = self.fields_ids_map(rtxn)?;
|
||||
|
||||
let mut fields_ids = HashSet::new();
|
||||
for name in fields {
|
||||
if let Some(field_id) = fields_ids_map.id(&name) {
|
||||
fields_ids.insert(field_id);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(fields_ids)
|
||||
Ok(asc_desc_fields)
|
||||
}
|
||||
|
||||
/* faceted documents ids */
|
||||
@ -1833,7 +1787,7 @@ pub(crate) mod tests {
|
||||
use big_s::S;
|
||||
use bumpalo::Bump;
|
||||
use heed::{EnvOpenOptions, RwTxn};
|
||||
use maplit::{btreemap, hashset};
|
||||
use maplit::btreemap;
|
||||
use memmap2::Mmap;
|
||||
use tempfile::TempDir;
|
||||
|
||||
@ -1849,7 +1803,8 @@ pub(crate) mod tests {
|
||||
use crate::vector::settings::{EmbedderSource, EmbeddingSettings};
|
||||
use crate::vector::EmbeddingConfigs;
|
||||
use crate::{
|
||||
db_snap, obkv_to_json, Filter, Index, Search, SearchResult, ThreadPoolNoAbortBuilder,
|
||||
db_snap, obkv_to_json, Filter, FilterableAttributesRule, Index, Search, SearchResult,
|
||||
ThreadPoolNoAbortBuilder,
|
||||
};
|
||||
|
||||
pub(crate) struct TempIndex {
|
||||
@ -2256,7 +2211,7 @@ pub(crate) mod tests {
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let real = index.searchable_fields(&rtxn).unwrap();
|
||||
assert_eq!(real, &["doggo", "name"]);
|
||||
assert!(real.is_empty());
|
||||
let user_defined = index.user_defined_searchable_fields(&rtxn).unwrap().unwrap();
|
||||
assert_eq!(user_defined, &["doggo", "name"]);
|
||||
|
||||
@ -2284,7 +2239,9 @@ pub(crate) mod tests {
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_filterable_fields(hashset! { S(RESERVED_GEO_FIELD_NAME) });
|
||||
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(
|
||||
RESERVED_GEO_FIELD_NAME.to_string(),
|
||||
)]);
|
||||
})
|
||||
.unwrap();
|
||||
index
|
||||
@ -2392,7 +2349,9 @@ pub(crate) mod tests {
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_filterable_fields(hashset! { S("doggo") });
|
||||
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(
|
||||
"doggo".to_string(),
|
||||
)]);
|
||||
})
|
||||
.unwrap();
|
||||
index
|
||||
@ -2429,15 +2388,14 @@ pub(crate) mod tests {
|
||||
|
||||
#[test]
|
||||
fn replace_documents_external_ids_and_soft_deletion_check() {
|
||||
use big_s::S;
|
||||
use maplit::hashset;
|
||||
|
||||
let index = TempIndex::new();
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_primary_key("id".to_owned());
|
||||
settings.set_filterable_fields(hashset! { S("doggo") });
|
||||
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(
|
||||
"doggo".to_string(),
|
||||
)]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@ -2970,8 +2928,9 @@ pub(crate) mod tests {
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_primary_key("id".to_string());
|
||||
settings
|
||||
.set_filterable_fields(HashSet::from([RESERVED_GEO_FIELD_NAME.to_string()]));
|
||||
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(
|
||||
RESERVED_GEO_FIELD_NAME.to_string(),
|
||||
)]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@ -3005,8 +2964,9 @@ pub(crate) mod tests {
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_primary_key("id".to_string());
|
||||
settings
|
||||
.set_filterable_fields(HashSet::from([RESERVED_GEO_FIELD_NAME.to_string()]));
|
||||
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(
|
||||
RESERVED_GEO_FIELD_NAME.to_string(),
|
||||
)]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@ -3039,7 +2999,9 @@ pub(crate) mod tests {
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_searchable_fields(vec![S("name")]);
|
||||
settings.set_filterable_fields(HashSet::from([S("age")]));
|
||||
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(
|
||||
"age".to_string(),
|
||||
)]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@ -3047,35 +3009,37 @@ pub(crate) mod tests {
|
||||
.add_documents(documents!({ "id": 1, "name": "Many", "age": 28, "realName": "Maxime" }))
|
||||
.unwrap();
|
||||
db_snap!(index, fields_ids_map, @r###"
|
||||
0 name |
|
||||
1 id |
|
||||
0 id |
|
||||
1 name |
|
||||
2 age |
|
||||
3 realName |
|
||||
"###);
|
||||
db_snap!(index, searchable_fields, @r###"["name"]"###);
|
||||
db_snap!(index, fieldids_weights_map, @r###"
|
||||
fid weight
|
||||
0 0 |
|
||||
1 0 |
|
||||
"###);
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_searchable_fields(vec![S("name"), S("realName")]);
|
||||
settings.set_filterable_fields(HashSet::from([S("age")]));
|
||||
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(
|
||||
"age".to_string(),
|
||||
)]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
// The order of the field id map shouldn't change
|
||||
db_snap!(index, fields_ids_map, @r###"
|
||||
0 name |
|
||||
1 id |
|
||||
0 id |
|
||||
1 name |
|
||||
2 age |
|
||||
3 realName |
|
||||
"###);
|
||||
db_snap!(index, searchable_fields, @r###"["name", "realName"]"###);
|
||||
db_snap!(index, fieldids_weights_map, @r###"
|
||||
fid weight
|
||||
0 0 |
|
||||
1 0 |
|
||||
3 1 |
|
||||
"###);
|
||||
}
|
||||
@ -3160,14 +3124,16 @@ pub(crate) mod tests {
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_searchable_fields(vec![S("_vectors"), S("_vectors.doggo")]);
|
||||
settings.set_filterable_fields(hashset![S("_vectors"), S("_vectors.doggo")]);
|
||||
settings.set_filterable_fields(vec![
|
||||
FilterableAttributesRule::Field("_vectors".to_string()),
|
||||
FilterableAttributesRule::Field("_vectors.doggo".to_string()),
|
||||
]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
db_snap!(index, fields_ids_map, @r###"
|
||||
0 id |
|
||||
1 _vectors |
|
||||
2 _vectors.doggo |
|
||||
"###);
|
||||
db_snap!(index, searchable_fields, @"[]");
|
||||
db_snap!(index, fieldids_weights_map, @r###"
|
||||
@ -3200,7 +3166,6 @@ pub(crate) mod tests {
|
||||
db_snap!(index, fields_ids_map, @r###"
|
||||
0 id |
|
||||
1 _vectors |
|
||||
2 _vectors.doggo |
|
||||
"###);
|
||||
db_snap!(index, searchable_fields, @"[]");
|
||||
db_snap!(index, fieldids_weights_map, @r###"
|
||||
|
@ -9,12 +9,14 @@ pub static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
pub mod documents;
|
||||
|
||||
mod asc_desc;
|
||||
mod attribute_patterns;
|
||||
mod criterion;
|
||||
pub mod database_stats;
|
||||
mod error;
|
||||
mod external_documents_ids;
|
||||
pub mod facet;
|
||||
mod fields_ids_map;
|
||||
mod filterable_attributes_rules;
|
||||
pub mod heed_codec;
|
||||
pub mod index;
|
||||
mod localized_attributes_rules;
|
||||
@ -52,6 +54,8 @@ pub use thread_pool_no_abort::{PanicCatched, ThreadPoolNoAbort, ThreadPoolNoAbor
|
||||
pub use {charabia as tokenizer, heed, rhai};
|
||||
|
||||
pub use self::asc_desc::{AscDesc, AscDescError, Member, SortError};
|
||||
pub use self::attribute_patterns::AttributePatterns;
|
||||
pub use self::attribute_patterns::PatternMatch;
|
||||
pub use self::criterion::{default_criteria, Criterion, CriterionError};
|
||||
pub use self::error::{
|
||||
Error, FieldIdMapMissingEntry, InternalError, SerializationError, UserError,
|
||||
@ -59,6 +63,10 @@ pub use self::error::{
|
||||
pub use self::external_documents_ids::ExternalDocumentsIds;
|
||||
pub use self::fieldids_weights_map::FieldidsWeightsMap;
|
||||
pub use self::fields_ids_map::{FieldsIdsMap, GlobalFieldsIdsMap};
|
||||
pub use self::filterable_attributes_rules::{
|
||||
FilterFeatures, FilterableAttributesFeatures, FilterableAttributesPatterns,
|
||||
FilterableAttributesRule,
|
||||
};
|
||||
pub use self::heed_codec::{
|
||||
BEU16StrCodec, BEU32StrCodec, BoRoaringBitmapCodec, BoRoaringBitmapLenCodec,
|
||||
CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, FieldIdWordCountCodec, ObkvCodec,
|
||||
@ -67,7 +75,6 @@ pub use self::heed_codec::{
|
||||
};
|
||||
pub use self::index::Index;
|
||||
pub use self::localized_attributes_rules::LocalizedAttributesRule;
|
||||
use self::localized_attributes_rules::LocalizedFieldIds;
|
||||
pub use self::search::facet::{FacetValueHit, SearchForFacetValues};
|
||||
pub use self::search::similar::Similar;
|
||||
pub use self::search::{
|
||||
|
@ -4,8 +4,9 @@ use charabia::Language;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use utoipa::ToSchema;
|
||||
|
||||
use crate::attribute_patterns::PatternMatch;
|
||||
use crate::fields_ids_map::FieldsIdsMap;
|
||||
use crate::FieldId;
|
||||
use crate::{AttributePatterns, FieldId};
|
||||
|
||||
/// A rule that defines which locales are supported for a given attribute.
|
||||
///
|
||||
@ -17,18 +18,18 @@ use crate::FieldId;
|
||||
/// The pattern `*attribute_name*` matches any attribute name that contains `attribute_name`.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, ToSchema)]
|
||||
pub struct LocalizedAttributesRule {
|
||||
pub attribute_patterns: Vec<String>,
|
||||
pub attribute_patterns: AttributePatterns,
|
||||
#[schema(value_type = Vec<String>)]
|
||||
pub locales: Vec<Language>,
|
||||
}
|
||||
|
||||
impl LocalizedAttributesRule {
|
||||
pub fn new(attribute_patterns: Vec<String>, locales: Vec<Language>) -> Self {
|
||||
Self { attribute_patterns, locales }
|
||||
Self { attribute_patterns: AttributePatterns::from(attribute_patterns), locales }
|
||||
}
|
||||
|
||||
pub fn match_str(&self, str: &str) -> bool {
|
||||
self.attribute_patterns.iter().any(|pattern| match_pattern(pattern.as_str(), str))
|
||||
pub fn match_str(&self, str: &str) -> PatternMatch {
|
||||
self.attribute_patterns.match_str(str)
|
||||
}
|
||||
|
||||
pub fn locales(&self) -> &[Language] {
|
||||
@ -36,20 +37,6 @@ impl LocalizedAttributesRule {
|
||||
}
|
||||
}
|
||||
|
||||
fn match_pattern(pattern: &str, str: &str) -> bool {
|
||||
if pattern == "*" {
|
||||
true
|
||||
} else if pattern.starts_with('*') && pattern.ends_with('*') {
|
||||
str.contains(&pattern[1..pattern.len() - 1])
|
||||
} else if let Some(pattern) = pattern.strip_prefix('*') {
|
||||
str.ends_with(pattern)
|
||||
} else if let Some(pattern) = pattern.strip_suffix('*') {
|
||||
str.starts_with(pattern)
|
||||
} else {
|
||||
pattern == str
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct LocalizedFieldIds {
|
||||
field_id_to_locales: HashMap<FieldId, Vec<Language>>,
|
||||
@ -65,13 +52,13 @@ impl LocalizedFieldIds {
|
||||
|
||||
if let Some(rules) = rules {
|
||||
let fields = fields_ids.filter_map(|field_id| {
|
||||
fields_ids_map.name(field_id).map(|field_name| (field_id, field_name))
|
||||
fields_ids_map.name(field_id).map(|field_name: &str| (field_id, field_name))
|
||||
});
|
||||
|
||||
for (field_id, field_name) in fields {
|
||||
let mut locales = Vec::new();
|
||||
for rule in rules {
|
||||
if rule.match_str(field_name) {
|
||||
if rule.match_str(field_name) == PatternMatch::Match {
|
||||
locales.extend(rule.locales.iter());
|
||||
// Take the first rule that matches
|
||||
break;
|
||||
@ -89,10 +76,6 @@ impl LocalizedFieldIds {
|
||||
Self { field_id_to_locales }
|
||||
}
|
||||
|
||||
pub fn locales(&self, fields_id: FieldId) -> Option<&[Language]> {
|
||||
self.field_id_to_locales.get(&fields_id).map(Vec::as_slice)
|
||||
}
|
||||
|
||||
pub fn all_locales(&self) -> Vec<Language> {
|
||||
let mut locales = Vec::new();
|
||||
for field_locales in self.field_id_to_locales.values() {
|
||||
@ -108,24 +91,3 @@ impl LocalizedFieldIds {
|
||||
locales
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_match_pattern() {
|
||||
assert!(match_pattern("*", "test"));
|
||||
assert!(match_pattern("test*", "test"));
|
||||
assert!(match_pattern("test*", "testa"));
|
||||
assert!(match_pattern("*test", "test"));
|
||||
assert!(match_pattern("*test", "atest"));
|
||||
assert!(match_pattern("*test*", "test"));
|
||||
assert!(match_pattern("*test*", "atesta"));
|
||||
assert!(match_pattern("*test*", "atest"));
|
||||
assert!(match_pattern("*test*", "testa"));
|
||||
assert!(!match_pattern("test*test", "test"));
|
||||
assert!(!match_pattern("*test", "testa"));
|
||||
assert!(!match_pattern("test*", "atest"));
|
||||
}
|
||||
}
|
||||
|
@ -7,14 +7,14 @@ use liquid::model::{
|
||||
};
|
||||
use liquid::{ObjectView, ValueView};
|
||||
|
||||
use super::{FieldMetadata, FieldsIdsMapWithMetadata};
|
||||
use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, Metadata};
|
||||
use crate::GlobalFieldsIdsMap;
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct FieldValue<'a, D: ObjectView> {
|
||||
name: &'a str,
|
||||
document: &'a D,
|
||||
metadata: FieldMetadata,
|
||||
metadata: Metadata,
|
||||
}
|
||||
|
||||
impl<'a, D: ObjectView> ValueView for FieldValue<'a, D> {
|
||||
@ -67,7 +67,10 @@ impl<'a, D: ObjectView> FieldValue<'a, D> {
|
||||
}
|
||||
|
||||
pub fn is_searchable(&self) -> &bool {
|
||||
&self.metadata.searchable
|
||||
match self.metadata.is_searchable() {
|
||||
true => &true,
|
||||
false => &false,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
@ -125,15 +128,11 @@ pub struct BorrowedFields<'a, 'map, D: ObjectView> {
|
||||
}
|
||||
|
||||
impl<'a, D: ObjectView> OwnedFields<'a, D> {
|
||||
pub fn new(document: &'a D, field_id_map: &'a FieldsIdsMapWithMetadata<'a>) -> Self {
|
||||
pub fn new(document: &'a D, field_id_map: &'a FieldIdMapWithMetadata) -> Self {
|
||||
Self(
|
||||
std::iter::repeat(document)
|
||||
.zip(field_id_map.iter())
|
||||
.map(|(document, (fid, name))| FieldValue {
|
||||
document,
|
||||
name,
|
||||
metadata: field_id_map.metadata(fid).unwrap_or_default(),
|
||||
})
|
||||
.map(|(document, (_fid, name, metadata))| FieldValue { document, name, metadata })
|
||||
.collect(),
|
||||
)
|
||||
}
|
||||
@ -187,7 +186,7 @@ impl<'a, 'map, D: ObjectView> ArrayView for BorrowedFields<'a, 'map, D> {
|
||||
let fv = self.doc_alloc.alloc(FieldValue {
|
||||
name: self.doc_alloc.alloc_str(&k),
|
||||
document: self.document,
|
||||
metadata: FieldMetadata { searchable: metadata.searchable },
|
||||
metadata,
|
||||
});
|
||||
fv as _
|
||||
}))
|
||||
@ -207,7 +206,7 @@ impl<'a, 'map, D: ObjectView> ArrayView for BorrowedFields<'a, 'map, D> {
|
||||
let fv = self.doc_alloc.alloc(FieldValue {
|
||||
name: self.doc_alloc.alloc_str(&key),
|
||||
document: self.document,
|
||||
metadata: FieldMetadata { searchable: metadata.searchable },
|
||||
metadata,
|
||||
});
|
||||
Some(fv as _)
|
||||
}
|
||||
|
@ -5,11 +5,9 @@ mod fields;
|
||||
mod template_checker;
|
||||
|
||||
use std::cell::RefCell;
|
||||
use std::collections::BTreeMap;
|
||||
use std::convert::TryFrom;
|
||||
use std::fmt::Debug;
|
||||
use std::num::NonZeroUsize;
|
||||
use std::ops::Deref;
|
||||
|
||||
use bumpalo::Bump;
|
||||
use document::ParseableDocument;
|
||||
@ -18,8 +16,9 @@ use fields::{BorrowedFields, OwnedFields};
|
||||
|
||||
use self::context::Context;
|
||||
use self::document::Document;
|
||||
use crate::fields_ids_map::metadata::FieldIdMapWithMetadata;
|
||||
use crate::update::del_add::DelAdd;
|
||||
use crate::{FieldId, FieldsIdsMap, GlobalFieldsIdsMap};
|
||||
use crate::GlobalFieldsIdsMap;
|
||||
|
||||
pub struct Prompt {
|
||||
template: liquid::Template,
|
||||
@ -145,9 +144,9 @@ impl Prompt {
|
||||
&self,
|
||||
document: &obkv::KvReaderU16,
|
||||
side: DelAdd,
|
||||
field_id_map: &FieldsIdsMapWithMetadata,
|
||||
field_id_map: &FieldIdMapWithMetadata,
|
||||
) -> Result<String, RenderPromptError> {
|
||||
let document = Document::new(document, side, field_id_map);
|
||||
let document = Document::new(document, side, field_id_map.as_fields_ids_map());
|
||||
let fields = OwnedFields::new(&document, field_id_map);
|
||||
let context = Context::new(&document, &fields);
|
||||
|
||||
@ -172,40 +171,6 @@ fn truncate(s: &mut String, max_bytes: usize) {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct FieldsIdsMapWithMetadata<'a> {
|
||||
fields_ids_map: &'a FieldsIdsMap,
|
||||
metadata: BTreeMap<FieldId, FieldMetadata>,
|
||||
}
|
||||
|
||||
impl<'a> FieldsIdsMapWithMetadata<'a> {
|
||||
pub fn new(fields_ids_map: &'a FieldsIdsMap, searchable_fields_ids: &'_ [FieldId]) -> Self {
|
||||
let mut metadata: BTreeMap<FieldId, FieldMetadata> =
|
||||
fields_ids_map.ids().map(|id| (id, Default::default())).collect();
|
||||
for searchable_field_id in searchable_fields_ids {
|
||||
let Some(metadata) = metadata.get_mut(searchable_field_id) else { continue };
|
||||
metadata.searchable = true;
|
||||
}
|
||||
Self { fields_ids_map, metadata }
|
||||
}
|
||||
|
||||
pub fn metadata(&self, field_id: FieldId) -> Option<FieldMetadata> {
|
||||
self.metadata.get(&field_id).copied()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Deref for FieldsIdsMapWithMetadata<'a> {
|
||||
type Target = FieldsIdsMap;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
self.fields_ids_map
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, Clone, Copy)]
|
||||
pub struct FieldMetadata {
|
||||
pub searchable: bool,
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::Prompt;
|
||||
|
@ -1,4 +1,4 @@
|
||||
use std::collections::{BTreeMap, HashMap, HashSet};
|
||||
use std::collections::{BTreeMap, BTreeSet, HashMap};
|
||||
use std::fmt::Display;
|
||||
use std::ops::ControlFlow;
|
||||
use std::{fmt, mem};
|
||||
@ -9,8 +9,9 @@ use indexmap::IndexMap;
|
||||
use roaring::RoaringBitmap;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::error::UserError;
|
||||
use crate::attribute_patterns::match_field_legacy;
|
||||
use crate::facet::FacetType;
|
||||
use crate::filterable_attributes_rules::{filtered_matching_patterns, matching_features};
|
||||
use crate::heed_codec::facet::{
|
||||
FacetGroupKeyCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, OrderedF64Codec,
|
||||
};
|
||||
@ -18,7 +19,7 @@ use crate::heed_codec::{BytesRefCodec, StrRefCodec};
|
||||
use crate::search::facet::facet_distribution_iter::{
|
||||
count_iterate_over_facet_distribution, lexicographically_iterate_over_facet_distribution,
|
||||
};
|
||||
use crate::{FieldId, Index, Result};
|
||||
use crate::{Error, FieldId, FilterableAttributesRule, Index, PatternMatch, Result, UserError};
|
||||
|
||||
/// The default number of values by facets that will
|
||||
/// be fetched from the key-value store.
|
||||
@ -287,37 +288,19 @@ impl<'a> FacetDistribution<'a> {
|
||||
}
|
||||
|
||||
pub fn compute_stats(&self) -> Result<BTreeMap<String, (f64, f64)>> {
|
||||
let fields_ids_map = self.index.fields_ids_map(self.rtxn)?;
|
||||
let filterable_fields = self.index.filterable_fields(self.rtxn)?;
|
||||
let candidates = if let Some(candidates) = self.candidates.clone() {
|
||||
candidates
|
||||
} else {
|
||||
return Ok(Default::default());
|
||||
};
|
||||
|
||||
let fields = match &self.facets {
|
||||
Some(facets) => {
|
||||
let invalid_fields: HashSet<_> = facets
|
||||
.iter()
|
||||
.map(|(name, _)| name)
|
||||
.filter(|facet| !crate::is_faceted(facet, &filterable_fields))
|
||||
.collect();
|
||||
if !invalid_fields.is_empty() {
|
||||
return Err(UserError::InvalidFacetsDistribution {
|
||||
invalid_facets_name: invalid_fields.into_iter().cloned().collect(),
|
||||
valid_facets_name: filterable_fields.into_iter().collect(),
|
||||
}
|
||||
.into());
|
||||
} else {
|
||||
facets.iter().map(|(name, _)| name).cloned().collect()
|
||||
}
|
||||
}
|
||||
None => filterable_fields,
|
||||
};
|
||||
let fields_ids_map = self.index.fields_ids_map(self.rtxn)?;
|
||||
let filterable_attributes_rules = self.index.filterable_attributes_rules(self.rtxn)?;
|
||||
self.check_faceted_fields(&filterable_attributes_rules)?;
|
||||
|
||||
let mut distribution = BTreeMap::new();
|
||||
for (fid, name) in fields_ids_map.iter() {
|
||||
if crate::is_faceted(name, &fields) {
|
||||
if self.select_field(name, &filterable_attributes_rules) {
|
||||
let min_value = if let Some(min_value) = crate::search::facet::facet_min_value(
|
||||
self.index,
|
||||
self.rtxn,
|
||||
@ -348,31 +331,12 @@ impl<'a> FacetDistribution<'a> {
|
||||
|
||||
pub fn execute(&self) -> Result<BTreeMap<String, IndexMap<String, u64>>> {
|
||||
let fields_ids_map = self.index.fields_ids_map(self.rtxn)?;
|
||||
let filterable_fields = self.index.filterable_fields(self.rtxn)?;
|
||||
|
||||
let fields = match self.facets {
|
||||
Some(ref facets) => {
|
||||
let invalid_fields: HashSet<_> = facets
|
||||
.iter()
|
||||
.map(|(name, _)| name)
|
||||
.filter(|facet| !crate::is_faceted(facet, &filterable_fields))
|
||||
.collect();
|
||||
if !invalid_fields.is_empty() {
|
||||
return Err(UserError::InvalidFacetsDistribution {
|
||||
invalid_facets_name: invalid_fields.into_iter().cloned().collect(),
|
||||
valid_facets_name: filterable_fields.into_iter().collect(),
|
||||
}
|
||||
.into());
|
||||
} else {
|
||||
facets.iter().map(|(name, _)| name).cloned().collect()
|
||||
}
|
||||
}
|
||||
None => filterable_fields,
|
||||
};
|
||||
let filterable_attributes_rules = self.index.filterable_attributes_rules(self.rtxn)?;
|
||||
self.check_faceted_fields(&filterable_attributes_rules)?;
|
||||
|
||||
let mut distribution = BTreeMap::new();
|
||||
for (fid, name) in fields_ids_map.iter() {
|
||||
if crate::is_faceted(name, &fields) {
|
||||
if self.select_field(name, &filterable_attributes_rules) {
|
||||
let order_by = self
|
||||
.facets
|
||||
.as_ref()
|
||||
@ -385,6 +349,62 @@ impl<'a> FacetDistribution<'a> {
|
||||
|
||||
Ok(distribution)
|
||||
}
|
||||
|
||||
/// Select a field if it is filterable and in the facets.
|
||||
fn select_field(
|
||||
&self,
|
||||
name: &str,
|
||||
filterable_attributes_rules: &[FilterableAttributesRule],
|
||||
) -> bool {
|
||||
// If the field is not filterable, we don't want to compute the facet distribution.
|
||||
if !matching_features(name, filterable_attributes_rules)
|
||||
.map_or(false, |(_, features)| features.is_filterable())
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
match &self.facets {
|
||||
Some(facets) => {
|
||||
// The list of facets provided by the user is a legacy pattern ("dog.age" must be selected with "dog").
|
||||
facets.keys().any(|key| match_field_legacy(key, name) == PatternMatch::Match)
|
||||
}
|
||||
None => true,
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if the fields in the facets are valid filterable fields.
|
||||
fn check_faceted_fields(
|
||||
&self,
|
||||
filterable_attributes_rules: &[FilterableAttributesRule],
|
||||
) -> Result<()> {
|
||||
let mut invalid_facets = BTreeSet::new();
|
||||
if let Some(facets) = &self.facets {
|
||||
for field in facets.keys() {
|
||||
let is_valid_filterable_field =
|
||||
matching_features(field, filterable_attributes_rules)
|
||||
.map_or(false, |(_, features)| features.is_filterable());
|
||||
if !is_valid_filterable_field {
|
||||
invalid_facets.insert(field.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !invalid_facets.is_empty() {
|
||||
let valid_patterns =
|
||||
filtered_matching_patterns(filterable_attributes_rules, &|features| {
|
||||
features.is_filterable()
|
||||
})
|
||||
.into_iter()
|
||||
.map(String::from)
|
||||
.collect();
|
||||
return Err(Error::UserError(UserError::InvalidFacetsDistribution {
|
||||
invalid_facets_name: invalid_facets,
|
||||
valid_patterns,
|
||||
}));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for FacetDistribution<'_> {
|
||||
@ -412,11 +432,10 @@ mod tests {
|
||||
use std::iter;
|
||||
|
||||
use big_s::S;
|
||||
use maplit::hashset;
|
||||
|
||||
use crate::documents::mmap_from_objects;
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::{milli_snap, FacetDistribution, OrderBy};
|
||||
use crate::{milli_snap, FacetDistribution, FilterableAttributesRule, OrderBy};
|
||||
|
||||
#[test]
|
||||
fn few_candidates_few_facet_values() {
|
||||
@ -426,7 +445,9 @@ mod tests {
|
||||
let index = TempIndex::new();
|
||||
|
||||
index
|
||||
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
|
||||
.update_settings(|settings| {
|
||||
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(S("colour"))])
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let documents = documents!([
|
||||
@ -497,7 +518,9 @@ mod tests {
|
||||
let index = TempIndex::new_with_map_size(4096 * 10_000);
|
||||
|
||||
index
|
||||
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
|
||||
.update_settings(|settings| {
|
||||
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(S("colour"))])
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let facet_values = ["Red", "RED", " red ", "Blue", "BLUE"];
|
||||
@ -582,7 +605,9 @@ mod tests {
|
||||
let index = TempIndex::new_with_map_size(4096 * 10_000);
|
||||
|
||||
index
|
||||
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
|
||||
.update_settings(|settings| {
|
||||
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(S("colour"))])
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let facet_values = (0..1000).map(|x| format!("{x:x}")).collect::<Vec<_>>();
|
||||
@ -641,7 +666,9 @@ mod tests {
|
||||
let index = TempIndex::new_with_map_size(4096 * 10_000);
|
||||
|
||||
index
|
||||
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
|
||||
.update_settings(|settings| {
|
||||
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(S("colour"))])
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let facet_values = (0..1000).collect::<Vec<_>>();
|
||||
@ -692,7 +719,9 @@ mod tests {
|
||||
let index = TempIndex::new_with_map_size(4096 * 10_000);
|
||||
|
||||
index
|
||||
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
|
||||
.update_settings(|settings| {
|
||||
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(S("colour"))])
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let facet_values = (0..1000).collect::<Vec<_>>();
|
||||
@ -743,7 +772,9 @@ mod tests {
|
||||
let index = TempIndex::new_with_map_size(4096 * 10_000);
|
||||
|
||||
index
|
||||
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
|
||||
.update_settings(|settings| {
|
||||
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(S("colour"))])
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let facet_values = (0..1000).collect::<Vec<_>>();
|
||||
@ -794,7 +825,9 @@ mod tests {
|
||||
let index = TempIndex::new_with_map_size(4096 * 10_000);
|
||||
|
||||
index
|
||||
.update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
|
||||
.update_settings(|settings| {
|
||||
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(S("colour"))])
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let facet_values = (0..1000).collect::<Vec<_>>();
|
||||
|
@ -1,4 +1,4 @@
|
||||
use std::collections::HashSet;
|
||||
use std::collections::BTreeSet;
|
||||
use std::fmt::{Debug, Display};
|
||||
use std::ops::Bound::{self, Excluded, Included};
|
||||
|
||||
@ -12,12 +12,14 @@ use serde_json::Value;
|
||||
use super::facet_range_search;
|
||||
use crate::constants::RESERVED_GEO_FIELD_NAME;
|
||||
use crate::error::{Error, UserError};
|
||||
use crate::filterable_attributes_rules::{filtered_matching_patterns, matching_features};
|
||||
use crate::heed_codec::facet::{
|
||||
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, OrderedF64Codec,
|
||||
};
|
||||
use crate::index::db_name::FACET_ID_STRING_DOCIDS;
|
||||
use crate::{
|
||||
distance_between_two_points, lat_lng_to_xyz, FieldId, Index, InternalError, Result,
|
||||
distance_between_two_points, lat_lng_to_xyz, FieldId, FieldsIdsMap,
|
||||
FilterableAttributesFeatures, FilterableAttributesRule, Index, InternalError, Result,
|
||||
SerializationError,
|
||||
};
|
||||
|
||||
@ -60,7 +62,7 @@ impl Display for BadGeoError {
|
||||
|
||||
#[derive(Debug)]
|
||||
enum FilterError<'a> {
|
||||
AttributeNotFilterable { attribute: &'a str, filterable_fields: HashSet<String> },
|
||||
AttributeNotFilterable { attribute: &'a str, filterable_patterns: BTreeSet<&'a str> },
|
||||
ParseGeoError(BadGeoError),
|
||||
TooDeep,
|
||||
}
|
||||
@ -75,14 +77,14 @@ impl<'a> From<BadGeoError> for FilterError<'a> {
|
||||
impl<'a> Display for FilterError<'a> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self {
|
||||
Self::AttributeNotFilterable { attribute, filterable_fields } => {
|
||||
Self::AttributeNotFilterable { attribute, filterable_patterns } => {
|
||||
write!(f, "Attribute `{attribute}` is not filterable.")?;
|
||||
if filterable_fields.is_empty() {
|
||||
if filterable_patterns.is_empty() {
|
||||
write!(f, " This index does not have configured filterable attributes.")
|
||||
} else {
|
||||
write!(f, " Available filterable attributes are: ")?;
|
||||
write!(f, " Available filterable attribute patterns are: ")?;
|
||||
let mut filterables_list =
|
||||
filterable_fields.iter().map(AsRef::as_ref).collect::<Vec<&str>>();
|
||||
filterable_patterns.iter().map(AsRef::as_ref).collect::<Vec<&str>>();
|
||||
filterables_list.sort_unstable();
|
||||
for (idx, filterable) in filterables_list.iter().enumerate() {
|
||||
write!(f, "`{filterable}`")?;
|
||||
@ -230,17 +232,27 @@ impl<'a> Filter<'a> {
|
||||
impl<'a> Filter<'a> {
|
||||
pub fn evaluate(&self, rtxn: &heed::RoTxn<'_>, index: &Index) -> Result<RoaringBitmap> {
|
||||
// to avoid doing this for each recursive call we're going to do it ONCE ahead of time
|
||||
let filterable_fields = index.filterable_fields(rtxn)?;
|
||||
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||
let filterable_attributes_rules = index.filterable_attributes_rules(rtxn)?;
|
||||
for fid in self.condition.fids(MAX_FILTER_DEPTH) {
|
||||
let attribute = fid.value();
|
||||
if !crate::is_faceted(attribute, &filterable_fields) {
|
||||
return Err(fid.as_external_error(FilterError::AttributeNotFilterable {
|
||||
attribute,
|
||||
filterable_fields,
|
||||
}))?;
|
||||
if matching_features(attribute, &filterable_attributes_rules)
|
||||
.map_or(false, |(_, features)| features.is_filterable())
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// If the field is not filterable, return an error
|
||||
return Err(fid.as_external_error(FilterError::AttributeNotFilterable {
|
||||
attribute,
|
||||
filterable_patterns: filtered_matching_patterns(
|
||||
&filterable_attributes_rules,
|
||||
&|features| features.is_filterable(),
|
||||
),
|
||||
}))?;
|
||||
}
|
||||
self.inner_evaluate(rtxn, index, &filterable_fields, None)
|
||||
|
||||
self.inner_evaluate(rtxn, index, &fields_ids_map, &filterable_attributes_rules, None)
|
||||
}
|
||||
|
||||
fn evaluate_operator(
|
||||
@ -249,6 +261,8 @@ impl<'a> Filter<'a> {
|
||||
field_id: FieldId,
|
||||
universe: Option<&RoaringBitmap>,
|
||||
operator: &Condition<'a>,
|
||||
features: &FilterableAttributesFeatures,
|
||||
rule_index: usize,
|
||||
) -> Result<RoaringBitmap> {
|
||||
let numbers_db = index.facet_id_f64_docids;
|
||||
let strings_db = index.facet_id_string_docids;
|
||||
@ -258,6 +272,38 @@ impl<'a> Filter<'a> {
|
||||
// field id and the level.
|
||||
|
||||
let (left, right) = match operator {
|
||||
// return an error if the filter is not allowed for this field
|
||||
Condition::GreaterThan(_)
|
||||
| Condition::GreaterThanOrEqual(_)
|
||||
| Condition::LowerThan(_)
|
||||
| Condition::LowerThanOrEqual(_)
|
||||
| Condition::Between { .. }
|
||||
if !features.is_filterable_comparison() =>
|
||||
{
|
||||
return Err(generate_filter_error(
|
||||
rtxn, index, field_id, operator, features, rule_index,
|
||||
));
|
||||
}
|
||||
Condition::Empty if !features.is_filterable_empty() => {
|
||||
return Err(generate_filter_error(
|
||||
rtxn, index, field_id, operator, features, rule_index,
|
||||
));
|
||||
}
|
||||
Condition::Null if !features.is_filterable_null() => {
|
||||
return Err(generate_filter_error(
|
||||
rtxn, index, field_id, operator, features, rule_index,
|
||||
));
|
||||
}
|
||||
Condition::Exists if !features.is_filterable_exists() => {
|
||||
return Err(generate_filter_error(
|
||||
rtxn, index, field_id, operator, features, rule_index,
|
||||
));
|
||||
}
|
||||
Condition::Equal(_) | Condition::NotEqual(_) if !features.is_filterable_equality() => {
|
||||
return Err(generate_filter_error(
|
||||
rtxn, index, field_id, operator, features, rule_index,
|
||||
));
|
||||
}
|
||||
Condition::GreaterThan(val) => {
|
||||
(Excluded(val.parse_finite_float()?), Included(f64::MAX))
|
||||
}
|
||||
@ -307,7 +353,9 @@ impl<'a> Filter<'a> {
|
||||
}
|
||||
Condition::NotEqual(val) => {
|
||||
let operator = Condition::Equal(val.clone());
|
||||
let docids = Self::evaluate_operator(rtxn, index, field_id, None, &operator)?;
|
||||
let docids = Self::evaluate_operator(
|
||||
rtxn, index, field_id, None, &operator, features, rule_index,
|
||||
)?;
|
||||
let all_ids = index.documents_ids(rtxn)?;
|
||||
return Ok(all_ids - docids);
|
||||
}
|
||||
@ -409,7 +457,8 @@ impl<'a> Filter<'a> {
|
||||
&self,
|
||||
rtxn: &heed::RoTxn<'_>,
|
||||
index: &Index,
|
||||
filterable_fields: &HashSet<String>,
|
||||
field_ids_map: &FieldsIdsMap,
|
||||
filterable_attribute_rules: &[FilterableAttributesRule],
|
||||
universe: Option<&RoaringBitmap>,
|
||||
) -> Result<RoaringBitmap> {
|
||||
if universe.map_or(false, |u| u.is_empty()) {
|
||||
@ -422,7 +471,8 @@ impl<'a> Filter<'a> {
|
||||
&(f.as_ref().clone()).into(),
|
||||
rtxn,
|
||||
index,
|
||||
filterable_fields,
|
||||
field_ids_map,
|
||||
filterable_attribute_rules,
|
||||
universe,
|
||||
)?;
|
||||
match universe {
|
||||
@ -434,42 +484,49 @@ impl<'a> Filter<'a> {
|
||||
}
|
||||
}
|
||||
FilterCondition::In { fid, els } => {
|
||||
if crate::is_faceted(fid.value(), filterable_fields) {
|
||||
let field_ids_map = index.fields_ids_map(rtxn)?;
|
||||
if let Some(fid) = field_ids_map.id(fid.value()) {
|
||||
els.iter()
|
||||
.map(|el| Condition::Equal(el.clone()))
|
||||
.map(|op| Self::evaluate_operator(rtxn, index, fid, universe, &op))
|
||||
.union()
|
||||
} else {
|
||||
Ok(RoaringBitmap::new())
|
||||
}
|
||||
} else {
|
||||
Err(fid.as_external_error(FilterError::AttributeNotFilterable {
|
||||
attribute: fid.value(),
|
||||
filterable_fields: filterable_fields.clone(),
|
||||
}))?
|
||||
}
|
||||
let Some(field_id) = field_ids_map.id(fid.value()) else {
|
||||
return Ok(RoaringBitmap::new());
|
||||
};
|
||||
let Some((rule_index, features)) =
|
||||
matching_features(fid.value(), filterable_attribute_rules)
|
||||
else {
|
||||
return Ok(RoaringBitmap::new());
|
||||
};
|
||||
|
||||
els.iter()
|
||||
.map(|el| Condition::Equal(el.clone()))
|
||||
.map(|op| {
|
||||
Self::evaluate_operator(
|
||||
rtxn, index, field_id, universe, &op, &features, rule_index,
|
||||
)
|
||||
})
|
||||
.union()
|
||||
}
|
||||
FilterCondition::Condition { fid, op } => {
|
||||
if crate::is_faceted(fid.value(), filterable_fields) {
|
||||
let field_ids_map = index.fields_ids_map(rtxn)?;
|
||||
if let Some(fid) = field_ids_map.id(fid.value()) {
|
||||
Self::evaluate_operator(rtxn, index, fid, universe, op)
|
||||
} else {
|
||||
Ok(RoaringBitmap::new())
|
||||
}
|
||||
} else {
|
||||
Err(fid.as_external_error(FilterError::AttributeNotFilterable {
|
||||
attribute: fid.value(),
|
||||
filterable_fields: filterable_fields.clone(),
|
||||
}))?
|
||||
}
|
||||
let Some(field_id) = field_ids_map.id(fid.value()) else {
|
||||
return Ok(RoaringBitmap::new());
|
||||
};
|
||||
let Some((rule_index, features)) =
|
||||
matching_features(fid.value(), filterable_attribute_rules)
|
||||
else {
|
||||
return Ok(RoaringBitmap::new());
|
||||
};
|
||||
|
||||
Self::evaluate_operator(rtxn, index, field_id, universe, op, &features, rule_index)
|
||||
}
|
||||
FilterCondition::Or(subfilters) => subfilters
|
||||
.iter()
|
||||
.cloned()
|
||||
.map(|f| Self::inner_evaluate(&f.into(), rtxn, index, filterable_fields, universe))
|
||||
.map(|f| {
|
||||
Self::inner_evaluate(
|
||||
&f.into(),
|
||||
rtxn,
|
||||
index,
|
||||
field_ids_map,
|
||||
filterable_attribute_rules,
|
||||
universe,
|
||||
)
|
||||
})
|
||||
.union(),
|
||||
FilterCondition::And(subfilters) => {
|
||||
let mut subfilters_iter = subfilters.iter();
|
||||
@ -478,7 +535,8 @@ impl<'a> Filter<'a> {
|
||||
&(first_subfilter.clone()).into(),
|
||||
rtxn,
|
||||
index,
|
||||
filterable_fields,
|
||||
field_ids_map,
|
||||
filterable_attribute_rules,
|
||||
universe,
|
||||
)?;
|
||||
for f in subfilters_iter {
|
||||
@ -492,7 +550,8 @@ impl<'a> Filter<'a> {
|
||||
&(f.clone()).into(),
|
||||
rtxn,
|
||||
index,
|
||||
filterable_fields,
|
||||
field_ids_map,
|
||||
filterable_attribute_rules,
|
||||
Some(&bitmap),
|
||||
)?;
|
||||
}
|
||||
@ -502,7 +561,7 @@ impl<'a> Filter<'a> {
|
||||
}
|
||||
}
|
||||
FilterCondition::GeoLowerThan { point, radius } => {
|
||||
if filterable_fields.contains(RESERVED_GEO_FIELD_NAME) {
|
||||
if index.is_geo_filtering_enabled(rtxn)? {
|
||||
let base_point: [f64; 2] =
|
||||
[point[0].parse_finite_float()?, point[1].parse_finite_float()?];
|
||||
if !(-90.0..=90.0).contains(&base_point[0]) {
|
||||
@ -532,12 +591,15 @@ impl<'a> Filter<'a> {
|
||||
} else {
|
||||
Err(point[0].as_external_error(FilterError::AttributeNotFilterable {
|
||||
attribute: RESERVED_GEO_FIELD_NAME,
|
||||
filterable_fields: filterable_fields.clone(),
|
||||
filterable_patterns: filtered_matching_patterns(
|
||||
filterable_attribute_rules,
|
||||
&|features| features.is_filterable(),
|
||||
),
|
||||
}))?
|
||||
}
|
||||
}
|
||||
FilterCondition::GeoBoundingBox { top_right_point, bottom_left_point } => {
|
||||
if filterable_fields.contains(RESERVED_GEO_FIELD_NAME) {
|
||||
if index.is_geo_filtering_enabled(rtxn)? {
|
||||
let top_right: [f64; 2] = [
|
||||
top_right_point[0].parse_finite_float()?,
|
||||
top_right_point[1].parse_finite_float()?,
|
||||
@ -592,7 +654,8 @@ impl<'a> Filter<'a> {
|
||||
let selected_lat = Filter { condition: condition_lat }.inner_evaluate(
|
||||
rtxn,
|
||||
index,
|
||||
filterable_fields,
|
||||
field_ids_map,
|
||||
filterable_attribute_rules,
|
||||
universe,
|
||||
)?;
|
||||
|
||||
@ -625,7 +688,8 @@ impl<'a> Filter<'a> {
|
||||
let left = Filter { condition: condition_left }.inner_evaluate(
|
||||
rtxn,
|
||||
index,
|
||||
filterable_fields,
|
||||
field_ids_map,
|
||||
filterable_attribute_rules,
|
||||
universe,
|
||||
)?;
|
||||
|
||||
@ -639,7 +703,8 @@ impl<'a> Filter<'a> {
|
||||
let right = Filter { condition: condition_right }.inner_evaluate(
|
||||
rtxn,
|
||||
index,
|
||||
filterable_fields,
|
||||
field_ids_map,
|
||||
filterable_attribute_rules,
|
||||
universe,
|
||||
)?;
|
||||
|
||||
@ -655,7 +720,8 @@ impl<'a> Filter<'a> {
|
||||
Filter { condition: condition_lng }.inner_evaluate(
|
||||
rtxn,
|
||||
index,
|
||||
filterable_fields,
|
||||
field_ids_map,
|
||||
filterable_attribute_rules,
|
||||
universe,
|
||||
)?
|
||||
};
|
||||
@ -665,7 +731,10 @@ impl<'a> Filter<'a> {
|
||||
Err(top_right_point[0].as_external_error(
|
||||
FilterError::AttributeNotFilterable {
|
||||
attribute: RESERVED_GEO_FIELD_NAME,
|
||||
filterable_fields: filterable_fields.clone(),
|
||||
filterable_patterns: filtered_matching_patterns(
|
||||
filterable_attribute_rules,
|
||||
&|features| features.is_filterable(),
|
||||
),
|
||||
},
|
||||
))?
|
||||
}
|
||||
@ -674,6 +743,28 @@ impl<'a> Filter<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
fn generate_filter_error(
|
||||
rtxn: &heed::RoTxn<'_>,
|
||||
index: &Index,
|
||||
field_id: FieldId,
|
||||
operator: &Condition<'_>,
|
||||
features: &FilterableAttributesFeatures,
|
||||
rule_index: usize,
|
||||
) -> Error {
|
||||
match index.fields_ids_map(rtxn) {
|
||||
Ok(fields_ids_map) => {
|
||||
let field = fields_ids_map.name(field_id).unwrap_or_default();
|
||||
Error::UserError(UserError::FilterOperatorNotAllowed {
|
||||
field: field.to_string(),
|
||||
allowed_operators: features.allowed_filter_operators(),
|
||||
operator: operator.operator().to_string(),
|
||||
rule_index,
|
||||
})
|
||||
}
|
||||
Err(e) => e.into(),
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<FilterCondition<'a>> for Filter<'a> {
|
||||
fn from(fc: FilterCondition<'a>) -> Self {
|
||||
Self { condition: fc }
|
||||
@ -687,12 +778,12 @@ mod tests {
|
||||
|
||||
use big_s::S;
|
||||
use either::Either;
|
||||
use maplit::hashset;
|
||||
use meili_snap::snapshot;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::constants::RESERVED_GEO_FIELD_NAME;
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::Filter;
|
||||
use crate::{Filter, FilterableAttributesRule};
|
||||
|
||||
#[test]
|
||||
fn empty_db() {
|
||||
@ -700,7 +791,9 @@ mod tests {
|
||||
//Set the filterable fields to be the channel.
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_filterable_fields(hashset! { S("PrIcE") });
|
||||
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(
|
||||
"PrIcE".to_string(),
|
||||
)]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@ -784,27 +877,32 @@ mod tests {
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let filter = Filter::from_str("_geoRadius(42, 150, 10)").unwrap().unwrap();
|
||||
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
||||
assert!(error.to_string().starts_with(
|
||||
"Attribute `_geo` is not filterable. This index does not have configured filterable attributes."
|
||||
));
|
||||
snapshot!(error.to_string(), @r###"
|
||||
Attribute `_geo` is not filterable. This index does not have configured filterable attributes.
|
||||
12:14 _geoRadius(42, 150, 10)
|
||||
"###);
|
||||
|
||||
let filter = Filter::from_str("_geoBoundingBox([42, 150], [30, 10])").unwrap().unwrap();
|
||||
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
||||
assert!(error.to_string().starts_with(
|
||||
"Attribute `_geo` is not filterable. This index does not have configured filterable attributes."
|
||||
));
|
||||
snapshot!(error.to_string(), @r###"
|
||||
Attribute `_geo` is not filterable. This index does not have configured filterable attributes.
|
||||
18:20 _geoBoundingBox([42, 150], [30, 10])
|
||||
"###);
|
||||
|
||||
let filter = Filter::from_str("dog = \"bernese mountain\"").unwrap().unwrap();
|
||||
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
||||
assert!(error.to_string().starts_with(
|
||||
"Attribute `dog` is not filterable. This index does not have configured filterable attributes."
|
||||
));
|
||||
snapshot!(error.to_string(), @r###"
|
||||
Attribute `dog` is not filterable. This index does not have configured filterable attributes.
|
||||
1:4 dog = "bernese mountain"
|
||||
"###);
|
||||
drop(rtxn);
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_searchable_fields(vec![S("title")]);
|
||||
settings.set_filterable_fields(hashset! { S("title") });
|
||||
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(
|
||||
"title".to_string(),
|
||||
)]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@ -812,39 +910,45 @@ mod tests {
|
||||
|
||||
let filter = Filter::from_str("_geoRadius(-100, 150, 10)").unwrap().unwrap();
|
||||
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
||||
assert!(error.to_string().starts_with(
|
||||
"Attribute `_geo` is not filterable. Available filterable attributes are: `title`."
|
||||
));
|
||||
snapshot!(error.to_string(), @r###"
|
||||
Attribute `_geo` is not filterable. Available filterable attribute patterns are: `title`.
|
||||
12:16 _geoRadius(-100, 150, 10)
|
||||
"###);
|
||||
|
||||
let filter = Filter::from_str("_geoBoundingBox([42, 150], [30, 10])").unwrap().unwrap();
|
||||
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
||||
assert!(error.to_string().starts_with(
|
||||
"Attribute `_geo` is not filterable. Available filterable attributes are: `title`."
|
||||
));
|
||||
snapshot!(error.to_string(), @r###"
|
||||
Attribute `_geo` is not filterable. Available filterable attribute patterns are: `title`.
|
||||
18:20 _geoBoundingBox([42, 150], [30, 10])
|
||||
"###);
|
||||
|
||||
let filter = Filter::from_str("name = 12").unwrap().unwrap();
|
||||
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
||||
assert!(error.to_string().starts_with(
|
||||
"Attribute `name` is not filterable. Available filterable attributes are: `title`."
|
||||
));
|
||||
snapshot!(error.to_string(), @r###"
|
||||
Attribute `name` is not filterable. Available filterable attribute patterns are: `title`.
|
||||
1:5 name = 12
|
||||
"###);
|
||||
|
||||
let filter = Filter::from_str("title = \"test\" AND name = 12").unwrap().unwrap();
|
||||
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
||||
assert!(error.to_string().starts_with(
|
||||
"Attribute `name` is not filterable. Available filterable attributes are: `title`."
|
||||
));
|
||||
snapshot!(error.to_string(), @r###"
|
||||
Attribute `name` is not filterable. Available filterable attribute patterns are: `title`.
|
||||
20:24 title = "test" AND name = 12
|
||||
"###);
|
||||
|
||||
let filter = Filter::from_str("title = \"test\" AND name IN [12]").unwrap().unwrap();
|
||||
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
||||
assert!(error.to_string().starts_with(
|
||||
"Attribute `name` is not filterable. Available filterable attributes are: `title`."
|
||||
));
|
||||
snapshot!(error.to_string(), @r###"
|
||||
Attribute `name` is not filterable. Available filterable attribute patterns are: `title`.
|
||||
20:24 title = "test" AND name IN [12]
|
||||
"###);
|
||||
|
||||
let filter = Filter::from_str("title = \"test\" AND name != 12").unwrap().unwrap();
|
||||
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
||||
assert!(error.to_string().starts_with(
|
||||
"Attribute `name` is not filterable. Available filterable attributes are: `title`."
|
||||
));
|
||||
snapshot!(error.to_string(), @r###"
|
||||
Attribute `name` is not filterable. Available filterable attribute patterns are: `title`.
|
||||
20:24 title = "test" AND name != 12
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -870,7 +974,9 @@ mod tests {
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_filterable_fields(hashset!(S("monitor_diagonal")));
|
||||
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(
|
||||
"monitor_diagonal".to_string(),
|
||||
)]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@ -901,7 +1007,9 @@ mod tests {
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_filterable_fields(hashset! { S(RESERVED_GEO_FIELD_NAME) });
|
||||
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(S(
|
||||
RESERVED_GEO_FIELD_NAME,
|
||||
))]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@ -948,7 +1056,10 @@ mod tests {
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_searchable_fields(vec![S(RESERVED_GEO_FIELD_NAME), S("price")]); // to keep the fields order
|
||||
settings.set_filterable_fields(hashset! { S(RESERVED_GEO_FIELD_NAME), S("price") });
|
||||
settings.set_filterable_fields(vec![
|
||||
FilterableAttributesRule::Field(S(RESERVED_GEO_FIELD_NAME)),
|
||||
FilterableAttributesRule::Field("price".to_string()),
|
||||
]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@ -998,7 +1109,10 @@ mod tests {
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_searchable_fields(vec![S(RESERVED_GEO_FIELD_NAME), S("price")]); // to keep the fields order
|
||||
settings.set_filterable_fields(hashset! { S(RESERVED_GEO_FIELD_NAME), S("price") });
|
||||
settings.set_filterable_fields(vec![
|
||||
FilterableAttributesRule::Field(S(RESERVED_GEO_FIELD_NAME)),
|
||||
FilterableAttributesRule::Field("price".to_string()),
|
||||
]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@ -1108,7 +1222,9 @@ mod tests {
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_searchable_fields(vec![S("price")]); // to keep the fields order
|
||||
settings.set_filterable_fields(hashset! { S("price") });
|
||||
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(
|
||||
"price".to_string(),
|
||||
)]);
|
||||
})
|
||||
.unwrap();
|
||||
index
|
||||
@ -1164,7 +1280,11 @@ mod tests {
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_primary_key("id".to_owned());
|
||||
settings.set_filterable_fields(hashset! { S("id"), S("one"), S("two") });
|
||||
settings.set_filterable_fields(vec![
|
||||
FilterableAttributesRule::Field("id".to_string()),
|
||||
FilterableAttributesRule::Field("one".to_string()),
|
||||
FilterableAttributesRule::Field("two".to_string()),
|
||||
]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
|
@ -10,6 +10,7 @@ use roaring::RoaringBitmap;
|
||||
use tracing::error;
|
||||
|
||||
use crate::error::UserError;
|
||||
use crate::filterable_attributes_rules::{filtered_matching_patterns, matching_features};
|
||||
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue};
|
||||
use crate::search::build_dfa;
|
||||
use crate::{DocumentId, FieldId, OrderBy, Result, Search};
|
||||
@ -73,25 +74,28 @@ impl<'a> SearchForFacetValues<'a> {
|
||||
let index = self.search_query.index;
|
||||
let rtxn = self.search_query.rtxn;
|
||||
|
||||
let filterable_fields = index.filterable_fields(rtxn)?;
|
||||
if !filterable_fields.contains(&self.facet) {
|
||||
let (valid_fields, hidden_fields) =
|
||||
index.remove_hidden_fields(rtxn, filterable_fields)?;
|
||||
let filterable_attributes_rules = index.filterable_attributes_rules(rtxn)?;
|
||||
if !matching_features(&self.facet, &filterable_attributes_rules)
|
||||
.map_or(false, |(_, features)| features.is_facet_searchable())
|
||||
{
|
||||
let matching_field_names =
|
||||
filtered_matching_patterns(&filterable_attributes_rules, &|features| {
|
||||
features.is_facet_searchable()
|
||||
});
|
||||
let (valid_patterns, hidden_fields) =
|
||||
index.remove_hidden_fields(rtxn, matching_field_names)?;
|
||||
|
||||
return Err(UserError::InvalidFacetSearchFacetName {
|
||||
field: self.facet.clone(),
|
||||
valid_fields,
|
||||
valid_patterns,
|
||||
hidden_fields,
|
||||
}
|
||||
.into());
|
||||
}
|
||||
};
|
||||
|
||||
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||
let fid = match fields_ids_map.id(&self.facet) {
|
||||
Some(fid) => fid,
|
||||
// we return an empty list of results when the attribute has been
|
||||
// set as filterable but no document contains this field (yet).
|
||||
None => return Ok(Vec::new()),
|
||||
let Some(fid) = fields_ids_map.id(&self.facet) else {
|
||||
return Ok(Vec::new());
|
||||
};
|
||||
|
||||
let fst = match self.search_query.index.facet_id_string_fst.get(rtxn, &fid)? {
|
||||
|
@ -9,6 +9,7 @@ use roaring::bitmap::RoaringBitmap;
|
||||
pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET};
|
||||
pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords};
|
||||
use self::new::{execute_vector_search, PartialSearchResult};
|
||||
use crate::filterable_attributes_rules::{filtered_matching_patterns, matching_features};
|
||||
use crate::score_details::{ScoreDetails, ScoringStrategy};
|
||||
use crate::vector::Embedder;
|
||||
use crate::{
|
||||
@ -187,13 +188,22 @@ impl<'a> Search<'a> {
|
||||
}
|
||||
|
||||
if let Some(distinct) = &self.distinct {
|
||||
let filterable_fields = ctx.index.filterable_fields(ctx.txn)?;
|
||||
if !crate::is_faceted(distinct, &filterable_fields) {
|
||||
let (valid_fields, hidden_fields) =
|
||||
ctx.index.remove_hidden_fields(ctx.txn, filterable_fields)?;
|
||||
let filterable_fields = ctx.index.filterable_attributes_rules(ctx.txn)?;
|
||||
// check if the distinct field is in the filterable fields
|
||||
if !matching_features(distinct, &filterable_fields)
|
||||
.map_or(false, |(_, features)| features.is_filterable())
|
||||
{
|
||||
// if not, remove the hidden fields from the filterable fields to generate the error message
|
||||
let matching_patterns =
|
||||
filtered_matching_patterns(&filterable_fields, &|features| {
|
||||
features.is_filterable()
|
||||
});
|
||||
let (valid_patterns, hidden_fields) =
|
||||
ctx.index.remove_hidden_fields(ctx.txn, matching_patterns)?;
|
||||
// and return the error
|
||||
return Err(Error::UserError(UserError::InvalidDistinctAttribute {
|
||||
field: distinct.clone(),
|
||||
valid_fields,
|
||||
valid_patterns,
|
||||
hidden_fields,
|
||||
}));
|
||||
}
|
||||
|
@ -57,6 +57,7 @@ impl RankingRuleGraphTrait for FidGraph {
|
||||
let term = to_term;
|
||||
|
||||
let mut all_fields = FxHashSet::default();
|
||||
let mut current_max_weight = 0;
|
||||
for word in term.term_subset.all_single_words_except_prefix_db(ctx)? {
|
||||
let fields = ctx.get_db_word_fids(word.interned())?;
|
||||
all_fields.extend(fields);
|
||||
@ -81,6 +82,9 @@ impl RankingRuleGraphTrait for FidGraph {
|
||||
let weight = weights_map
|
||||
.weight(fid)
|
||||
.ok_or(InternalError::FieldidsWeightsMapMissingEntry { key: fid })?;
|
||||
if weight > current_max_weight {
|
||||
current_max_weight = weight;
|
||||
}
|
||||
edges.push((
|
||||
weight as u32 * term.term_ids.len() as u32,
|
||||
conditions_interner.insert(FidCondition { term: term.clone(), fid: Some(fid) }),
|
||||
@ -88,10 +92,10 @@ impl RankingRuleGraphTrait for FidGraph {
|
||||
}
|
||||
|
||||
// always lookup the max_fid if we don't already and add an artificial condition for max scoring
|
||||
let max_weight: Option<u16> = weights_map.max_weight();
|
||||
let max_weight = ctx.index.max_searchable_attribute_weight(ctx.txn)?;
|
||||
|
||||
if let Some(max_weight) = max_weight {
|
||||
if !all_fields.contains(&max_weight) {
|
||||
if current_max_weight < max_weight {
|
||||
edges.push((
|
||||
max_weight as u32 * term.term_ids.len() as u32, // TODO improve the fid score i.e. fid^10.
|
||||
conditions_interner.insert(FidCondition {
|
||||
|
@ -5,13 +5,11 @@
|
||||
|
||||
use std::time::Duration;
|
||||
|
||||
use big_s::S;
|
||||
use maplit::hashset;
|
||||
use meili_snap::snapshot;
|
||||
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::score_details::{ScoreDetails, ScoringStrategy};
|
||||
use crate::{Criterion, Filter, Search, TimeBudget};
|
||||
use crate::{Criterion, Filter, FilterableAttributesRule, Search, TimeBudget};
|
||||
|
||||
fn create_index() -> TempIndex {
|
||||
let index = TempIndex::new();
|
||||
@ -20,7 +18,7 @@ fn create_index() -> TempIndex {
|
||||
.update_settings(|s| {
|
||||
s.set_primary_key("id".to_owned());
|
||||
s.set_searchable_fields(vec!["text".to_owned()]);
|
||||
s.set_filterable_fields(hashset! { S("id") });
|
||||
s.set_filterable_fields(vec![FilterableAttributesRule::Field("id".to_owned())]);
|
||||
s.set_criteria(vec![Criterion::Words, Criterion::Typo]);
|
||||
})
|
||||
.unwrap();
|
||||
|
@ -19,7 +19,10 @@ use maplit::hashset;
|
||||
|
||||
use super::collect_field_values;
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::{AscDesc, Criterion, Index, Member, Search, SearchResult, TermsMatchingStrategy};
|
||||
use crate::{
|
||||
AscDesc, Criterion, FilterableAttributesRule, Index, Member, Search, SearchResult,
|
||||
TermsMatchingStrategy,
|
||||
};
|
||||
|
||||
fn create_index() -> TempIndex {
|
||||
let index = TempIndex::new();
|
||||
@ -236,7 +239,7 @@ fn test_distinct_placeholder_no_ranking_rules() {
|
||||
// Set the letter as filterable and unset the distinct attribute.
|
||||
index
|
||||
.update_settings(|s| {
|
||||
s.set_filterable_fields(hashset! { S("letter") });
|
||||
s.set_filterable_fields(vec![FilterableAttributesRule::Field("letter".to_owned())]);
|
||||
s.reset_distinct_field();
|
||||
})
|
||||
.unwrap();
|
||||
|
@ -9,7 +9,7 @@ use crate::progress::Progress;
|
||||
use crate::update::new::indexer;
|
||||
use crate::update::{IndexerConfig, Settings};
|
||||
use crate::vector::EmbeddingConfigs;
|
||||
use crate::{db_snap, Criterion, Index};
|
||||
use crate::{db_snap, Criterion, FilterableAttributesRule, Index};
|
||||
pub const CONTENT: &str = include_str!("../../../../tests/assets/test_set.ndjson");
|
||||
use crate::constants::RESERVED_GEO_FIELD_NAME;
|
||||
|
||||
@ -25,14 +25,14 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
|
||||
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||
|
||||
builder.set_criteria(criteria.to_vec());
|
||||
builder.set_filterable_fields(hashset! {
|
||||
S("tag"),
|
||||
S("asc_desc_rank"),
|
||||
S(RESERVED_GEO_FIELD_NAME),
|
||||
S("opt1"),
|
||||
S("opt1.opt2"),
|
||||
S("tag_in")
|
||||
});
|
||||
builder.set_filterable_fields(vec![
|
||||
FilterableAttributesRule::Field(S("tag")),
|
||||
FilterableAttributesRule::Field(S("asc_desc_rank")),
|
||||
FilterableAttributesRule::Field(S(RESERVED_GEO_FIELD_NAME)),
|
||||
FilterableAttributesRule::Field(S("opt1")),
|
||||
FilterableAttributesRule::Field(S("opt1.opt2")),
|
||||
FilterableAttributesRule::Field(S("tag_in")),
|
||||
]);
|
||||
builder.set_sortable_fields(hashset! {
|
||||
S("tag"),
|
||||
S("asc_desc_rank"),
|
||||
|
@ -1,5 +1,5 @@
|
||||
---
|
||||
source: milli/src/search/new/tests/attribute_position.rs
|
||||
source: crates/milli/src/search/new/tests/attribute_position.rs
|
||||
expression: "format!(\"{document_ids_scores:#?}\")"
|
||||
---
|
||||
[
|
||||
|
@ -1,5 +1,5 @@
|
||||
---
|
||||
source: milli/src/search/new/tests/attribute_position.rs
|
||||
source: crates/milli/src/search/new/tests/attribute_position.rs
|
||||
expression: "format!(\"{document_ids_scores:#?}\")"
|
||||
---
|
||||
[
|
||||
|
@ -1,5 +1,5 @@
|
||||
---
|
||||
source: milli/src/search/new/tests/attribute_position.rs
|
||||
source: crates/milli/src/search/new/tests/attribute_position.rs
|
||||
expression: "format!(\"{document_ids_scores:#?}\")"
|
||||
---
|
||||
[
|
||||
|
@ -1,5 +1,5 @@
|
||||
---
|
||||
source: milli/src/search/new/tests/attribute_position.rs
|
||||
source: crates/milli/src/search/new/tests/attribute_position.rs
|
||||
expression: "format!(\"{document_ids_scores:#?}\")"
|
||||
---
|
||||
[
|
||||
|
@ -386,7 +386,7 @@ pub fn snap_settings(index: &Index) -> String {
|
||||
write_setting_to_snap!(criteria);
|
||||
write_setting_to_snap!(displayed_fields);
|
||||
write_setting_to_snap!(distinct_field);
|
||||
write_setting_to_snap!(filterable_fields);
|
||||
write_setting_to_snap!(filterable_attributes_rules);
|
||||
write_setting_to_snap!(sortable_fields);
|
||||
write_setting_to_snap!(synonyms);
|
||||
write_setting_to_snap!(authorize_typos);
|
||||
|
@ -81,6 +81,17 @@ pub enum DelAddOperation {
|
||||
DeletionAndAddition,
|
||||
}
|
||||
|
||||
impl DelAddOperation {
|
||||
/// Merge two DelAddOperation enum variants.
|
||||
pub fn merge(self, other: Self) -> Self {
|
||||
match (self, other) {
|
||||
(Self::Deletion, Self::Deletion) => Self::Deletion,
|
||||
(Self::Addition, Self::Addition) => Self::Addition,
|
||||
_ => Self::DeletionAndAddition,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a Kv<K, Kv<DelAdd, value>> from two Kv<K, value>
|
||||
///
|
||||
/// putting each deletion obkv's keys under an DelAdd::Deletion
|
||||
|
@ -6,7 +6,7 @@ use heed::types::Bytes;
|
||||
use heed::{BytesDecode, BytesEncode, Error, PutFlags, RoTxn, RwTxn};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{FACET_GROUP_SIZE, FACET_MIN_LEVEL_SIZE};
|
||||
use super::{clear_facet_levels, FACET_GROUP_SIZE, FACET_MIN_LEVEL_SIZE};
|
||||
use crate::facet::FacetType;
|
||||
use crate::heed_codec::facet::{
|
||||
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
|
||||
@ -97,9 +97,7 @@ pub(crate) struct FacetsUpdateBulkInner<R: std::io::Read + std::io::Seek> {
|
||||
impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
|
||||
pub fn update(mut self, wtxn: &mut RwTxn<'_>, field_ids: &[u16]) -> Result<()> {
|
||||
self.update_level0(wtxn)?;
|
||||
for &field_id in field_ids.iter() {
|
||||
self.clear_levels(wtxn, field_id)?;
|
||||
}
|
||||
clear_facet_levels(wtxn, &self.db.remap_data_type(), field_ids)?;
|
||||
|
||||
for &field_id in field_ids.iter() {
|
||||
let level_readers = self.compute_levels_for_field_id(field_id, wtxn)?;
|
||||
@ -114,14 +112,6 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn clear_levels(&self, wtxn: &mut heed::RwTxn<'_>, field_id: FieldId) -> Result<()> {
|
||||
let left = FacetGroupKey::<&[u8]> { field_id, level: 1, left_bound: &[] };
|
||||
let right = FacetGroupKey::<&[u8]> { field_id, level: u8::MAX, left_bound: &[] };
|
||||
let range = left..=right;
|
||||
self.db.delete_range(wtxn, &range).map(drop)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn update_level0(&mut self, wtxn: &mut RwTxn<'_>) -> Result<()> {
|
||||
let delta_data = match self.delta_data.take() {
|
||||
Some(x) => x,
|
||||
@ -365,8 +355,6 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
|
||||
mod tests {
|
||||
use std::iter::once;
|
||||
|
||||
use big_s::S;
|
||||
use maplit::hashset;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::documents::mmap_from_objects;
|
||||
@ -374,7 +362,7 @@ mod tests {
|
||||
use crate::heed_codec::StrRefCodec;
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::update::facet::test_helpers::{ordered_string, FacetIndex};
|
||||
use crate::{db_snap, milli_snap};
|
||||
use crate::{db_snap, milli_snap, FilterableAttributesRule};
|
||||
|
||||
#[test]
|
||||
fn insert() {
|
||||
@ -474,7 +462,8 @@ mod tests {
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_primary_key("id".to_owned());
|
||||
settings.set_filterable_fields(hashset! { S("id") });
|
||||
settings
|
||||
.set_filterable_fields(vec![FilterableAttributesRule::Field("id".to_string())]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
|
@ -89,6 +89,7 @@ use time::OffsetDateTime;
|
||||
use tracing::debug;
|
||||
|
||||
use self::incremental::FacetsUpdateIncremental;
|
||||
use super::settings::{InnerIndexSettings, InnerIndexSettingsDiff};
|
||||
use super::{FacetsUpdateBulk, MergeDeladdBtreesetString, MergeDeladdCboRoaringBitmaps};
|
||||
use crate::facet::FacetType;
|
||||
use crate::heed_codec::facet::{
|
||||
@ -147,7 +148,11 @@ impl<'i> FacetsUpdate<'i> {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn execute(self, wtxn: &mut heed::RwTxn<'_>) -> Result<()> {
|
||||
pub fn execute(
|
||||
self,
|
||||
wtxn: &mut heed::RwTxn<'_>,
|
||||
new_settings: &InnerIndexSettings,
|
||||
) -> Result<()> {
|
||||
if self.data_size == 0 {
|
||||
return Ok(());
|
||||
}
|
||||
@ -156,8 +161,7 @@ impl<'i> FacetsUpdate<'i> {
|
||||
|
||||
// See self::comparison_bench::benchmark_facet_indexing
|
||||
if self.data_size >= (self.database.len(wtxn)? / 500) {
|
||||
let field_ids =
|
||||
self.index.faceted_fields_ids(wtxn)?.iter().copied().collect::<Vec<_>>();
|
||||
let field_ids = facet_levels_field_ids(new_settings);
|
||||
let bulk_update = FacetsUpdateBulk::new(
|
||||
self.index,
|
||||
field_ids,
|
||||
@ -291,6 +295,53 @@ fn index_facet_search(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Clear all the levels greater than 0 for given field ids.
|
||||
pub fn clear_facet_levels<'a, I>(
|
||||
wtxn: &mut heed::RwTxn<'_>,
|
||||
db: &heed::Database<FacetGroupKeyCodec<BytesRefCodec>, DecodeIgnore>,
|
||||
field_ids: I,
|
||||
) -> Result<()>
|
||||
where
|
||||
I: IntoIterator<Item = &'a FieldId>,
|
||||
{
|
||||
for field_id in field_ids {
|
||||
let field_id = *field_id;
|
||||
let left = FacetGroupKey::<&[u8]> { field_id, level: 1, left_bound: &[] };
|
||||
let right = FacetGroupKey::<&[u8]> { field_id, level: u8::MAX, left_bound: &[] };
|
||||
let range = left..=right;
|
||||
db.delete_range(wtxn, &range).map(drop)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn clear_facet_levels_based_on_settings_diff(
|
||||
wtxn: &mut heed::RwTxn<'_>,
|
||||
index: &Index,
|
||||
settings_diff: &InnerIndexSettingsDiff,
|
||||
) -> Result<()> {
|
||||
let new_field_ids: BTreeSet<_> = facet_levels_field_ids(&settings_diff.new);
|
||||
let old_field_ids: BTreeSet<_> = facet_levels_field_ids(&settings_diff.old);
|
||||
|
||||
let field_ids_to_clear: Vec<_> = old_field_ids.difference(&new_field_ids).copied().collect();
|
||||
clear_facet_levels(wtxn, &index.facet_id_string_docids.remap_types(), &field_ids_to_clear)?;
|
||||
clear_facet_levels(wtxn, &index.facet_id_f64_docids.remap_types(), &field_ids_to_clear)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn facet_levels_field_ids<B>(settings: &InnerIndexSettings) -> B
|
||||
where
|
||||
B: FromIterator<FieldId>,
|
||||
{
|
||||
settings
|
||||
.fields_ids_map
|
||||
.iter_id_metadata()
|
||||
.filter(|(_, metadata)| {
|
||||
metadata.require_facet_level_database(&settings.filterable_attributes_rules)
|
||||
})
|
||||
.map(|(id, _)| id)
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) mod test_helpers {
|
||||
use std::cell::Cell;
|
||||
|
@ -95,12 +95,7 @@ pub fn enrich_documents_batch<R: Read + Seek>(
|
||||
// If the settings specifies that a _geo field must be used therefore we must check the
|
||||
// validity of it in all the documents of this batch and this is when we return `Some`.
|
||||
let geo_field_id = match documents_batch_index.id(RESERVED_GEO_FIELD_NAME) {
|
||||
Some(geo_field_id)
|
||||
if index.sortable_fields(rtxn)?.contains(RESERVED_GEO_FIELD_NAME)
|
||||
|| index.filterable_fields(rtxn)?.contains(RESERVED_GEO_FIELD_NAME) =>
|
||||
{
|
||||
Some(geo_field_id)
|
||||
}
|
||||
Some(geo_field_id) if index.is_geo_enabled(rtxn)? => Some(geo_field_id),
|
||||
_otherwise => None,
|
||||
};
|
||||
|
||||
|
@ -150,9 +150,14 @@ fn searchable_fields_changed(
|
||||
obkv: &KvReader<FieldId>,
|
||||
settings_diff: &InnerIndexSettingsDiff,
|
||||
) -> bool {
|
||||
let searchable_fields = &settings_diff.new.searchable_fields_ids;
|
||||
for (field_id, field_bytes) in obkv.iter() {
|
||||
if searchable_fields.contains(&field_id) {
|
||||
let Some(metadata) = settings_diff.new.fields_ids_map.metadata(field_id) else {
|
||||
// If the field id is not in the fields ids map, skip it.
|
||||
// This happens for the vectors sub-fields. for example:
|
||||
// "_vectors": { "manual": [1, 2, 3]} -> "_vectors.manual" is not registered.
|
||||
continue;
|
||||
};
|
||||
if metadata.is_searchable() {
|
||||
let del_add = KvReaderDelAdd::from_slice(field_bytes);
|
||||
match (del_add.get(DelAdd::Deletion), del_add.get(DelAdd::Addition)) {
|
||||
// if both fields are None, check the next field.
|
||||
@ -200,8 +205,14 @@ fn tokens_from_document<'a>(
|
||||
buffers.obkv_buffer.clear();
|
||||
let mut document_writer = KvWriterU16::new(&mut buffers.obkv_buffer);
|
||||
for (field_id, field_bytes) in obkv.iter() {
|
||||
let Some(metadata) = settings.fields_ids_map.metadata(field_id) else {
|
||||
// If the field id is not in the fields ids map, skip it.
|
||||
// This happens for the vectors sub-fields. for example:
|
||||
// "_vectors": { "manual": [1, 2, 3]} -> "_vectors.manual" is not registered.
|
||||
continue;
|
||||
};
|
||||
// if field is searchable.
|
||||
if settings.searchable_fields_ids.contains(&field_id) {
|
||||
if metadata.is_searchable() {
|
||||
// extract deletion or addition only.
|
||||
if let Some(field_bytes) = KvReaderDelAdd::from_slice(field_bytes).get(del_add) {
|
||||
// parse json.
|
||||
@ -216,7 +227,7 @@ fn tokens_from_document<'a>(
|
||||
buffers.field_buffer.clear();
|
||||
if let Some(field) = json_to_string(&value, &mut buffers.field_buffer) {
|
||||
// create an iterator of token with their positions.
|
||||
let locales = settings.localized_searchable_fields_ids.locales(field_id);
|
||||
let locales = metadata.locales(&settings.localized_attributes_rules);
|
||||
let tokens = process_tokens(tokenizer.tokenize_with_allow_list(field, locales))
|
||||
.take_while(|(p, _)| (*p as u32) < max_positions_per_attributes);
|
||||
|
||||
|
@ -12,12 +12,11 @@ use heed::BytesEncode;
|
||||
use super::helpers::{create_sorter, sorter_into_reader, try_split_array_at, GrenadParameters};
|
||||
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec};
|
||||
use crate::heed_codec::{BEU16StrCodec, StrRefCodec};
|
||||
use crate::localized_attributes_rules::LocalizedFieldIds;
|
||||
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
|
||||
use crate::update::index_documents::helpers::{
|
||||
MergeDeladdBtreesetString, MergeDeladdCboRoaringBitmaps,
|
||||
};
|
||||
use crate::update::settings::InnerIndexSettingsDiff;
|
||||
use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff};
|
||||
use crate::{FieldId, Result, MAX_FACET_VALUE_LENGTH};
|
||||
|
||||
/// Extracts the facet string and the documents ids where this facet string appear.
|
||||
@ -33,13 +32,10 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
|
||||
if settings_diff.settings_update_only() {
|
||||
extract_facet_string_docids_settings(docid_fid_facet_string, indexer, settings_diff)
|
||||
} else {
|
||||
let localized_field_ids = &settings_diff.new.localized_faceted_fields_ids;
|
||||
let facet_search = settings_diff.new.facet_search;
|
||||
extract_facet_string_docids_document_update(
|
||||
docid_fid_facet_string,
|
||||
indexer,
|
||||
localized_field_ids,
|
||||
facet_search,
|
||||
&settings_diff.new,
|
||||
)
|
||||
}
|
||||
}
|
||||
@ -52,8 +48,7 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
|
||||
fn extract_facet_string_docids_document_update<R: io::Read + io::Seek>(
|
||||
docid_fid_facet_string: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
localized_field_ids: &LocalizedFieldIds,
|
||||
facet_search: bool,
|
||||
settings: &InnerIndexSettings,
|
||||
) -> Result<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)> {
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
@ -92,6 +87,14 @@ fn extract_facet_string_docids_document_update<R: io::Read + io::Seek>(
|
||||
let (field_id_bytes, bytes) = try_split_array_at(key).unwrap();
|
||||
let field_id = FieldId::from_be_bytes(field_id_bytes);
|
||||
|
||||
let Some(metadata) = settings.fields_ids_map.metadata(field_id) else {
|
||||
unreachable!("metadata not found for field_id: {}", field_id)
|
||||
};
|
||||
|
||||
if !metadata.is_faceted(&settings.filterable_attributes_rules) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let (document_id_bytes, normalized_value_bytes) =
|
||||
try_split_array_at::<_, 4>(bytes).unwrap();
|
||||
let document_id = u32::from_be_bytes(document_id_bytes);
|
||||
@ -99,8 +102,10 @@ fn extract_facet_string_docids_document_update<R: io::Read + io::Seek>(
|
||||
let normalized_value = str::from_utf8(normalized_value_bytes)?;
|
||||
|
||||
// Facet search normalization
|
||||
if facet_search {
|
||||
let locales = localized_field_ids.locales(field_id);
|
||||
let features =
|
||||
metadata.filterable_attributes_features(&settings.filterable_attributes_rules);
|
||||
if features.is_facet_searchable() && settings.facet_search {
|
||||
let locales = metadata.locales(&settings.localized_attributes_rules);
|
||||
let hyper_normalized_value = normalize_facet_string(normalized_value, locales);
|
||||
|
||||
let set = BTreeSet::from_iter(std::iter::once(normalized_value));
|
||||
@ -178,8 +183,15 @@ fn extract_facet_string_docids_settings<R: io::Read + io::Seek>(
|
||||
let (field_id_bytes, bytes) = try_split_array_at(key).unwrap();
|
||||
let field_id = FieldId::from_be_bytes(field_id_bytes);
|
||||
|
||||
let old_locales = settings_diff.old.localized_faceted_fields_ids.locales(field_id);
|
||||
let new_locales = settings_diff.new.localized_faceted_fields_ids.locales(field_id);
|
||||
let Some(old_metadata) = settings_diff.old.fields_ids_map.metadata(field_id) else {
|
||||
unreachable!("old metadata not found for field_id: {}", field_id)
|
||||
};
|
||||
let Some(new_metadata) = settings_diff.new.fields_ids_map.metadata(field_id) else {
|
||||
unreachable!("new metadata not found for field_id: {}", field_id)
|
||||
};
|
||||
|
||||
let old_locales = old_metadata.locales(&settings_diff.old.localized_attributes_rules);
|
||||
let new_locales = new_metadata.locales(&settings_diff.new.localized_attributes_rules);
|
||||
|
||||
let are_same_locales = old_locales == new_locales;
|
||||
let reindex_facet_search =
|
||||
@ -197,10 +209,15 @@ fn extract_facet_string_docids_settings<R: io::Read + io::Seek>(
|
||||
|
||||
// Facet search normalization
|
||||
if settings_diff.new.facet_search {
|
||||
let new_filterable_features = new_metadata
|
||||
.filterable_attributes_features(&settings_diff.new.filterable_attributes_rules);
|
||||
let new_hyper_normalized_value = normalize_facet_string(normalized_value, new_locales);
|
||||
let old_hyper_normalized_value;
|
||||
let old_filterable_features = old_metadata
|
||||
.filterable_attributes_features(&settings_diff.old.filterable_attributes_rules);
|
||||
let old_hyper_normalized_value = if !settings_diff.old.facet_search
|
||||
|| deladd_reader.get(DelAdd::Deletion).is_none()
|
||||
|| !old_filterable_features.is_facet_searchable()
|
||||
{
|
||||
// if the facet search is disabled in the old settings or if no facet string is deleted,
|
||||
// we don't need to normalize the facet string.
|
||||
@ -215,7 +232,9 @@ fn extract_facet_string_docids_settings<R: io::Read + io::Seek>(
|
||||
let set = BTreeSet::from_iter(std::iter::once(normalized_value));
|
||||
|
||||
// if the facet string is the same, we can put the deletion and addition in the same obkv.
|
||||
if old_hyper_normalized_value == Some(&new_hyper_normalized_value) {
|
||||
if old_hyper_normalized_value == Some(&new_hyper_normalized_value)
|
||||
&& new_filterable_features.is_facet_searchable()
|
||||
{
|
||||
// nothing to do if we delete and re-add the value.
|
||||
if is_same_value {
|
||||
continue;
|
||||
@ -249,7 +268,9 @@ fn extract_facet_string_docids_settings<R: io::Read + io::Seek>(
|
||||
}
|
||||
|
||||
// addition
|
||||
if deladd_reader.get(DelAdd::Addition).is_some() {
|
||||
if new_filterable_features.is_facet_searchable()
|
||||
&& deladd_reader.get(DelAdd::Addition).is_some()
|
||||
{
|
||||
// insert new value
|
||||
let val = SerdeJson::bytes_encode(&set).map_err(heed::Error::Encoding)?;
|
||||
buffer.clear();
|
||||
|
@ -76,9 +76,9 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
|
||||
let mut strings_key_buffer = Vec::new();
|
||||
|
||||
let old_faceted_fids: BTreeSet<_> =
|
||||
settings_diff.old.faceted_fields_ids.iter().copied().collect();
|
||||
settings_diff.list_faceted_fields_from_fid_map(DelAdd::Deletion);
|
||||
let new_faceted_fids: BTreeSet<_> =
|
||||
settings_diff.new.faceted_fields_ids.iter().copied().collect();
|
||||
settings_diff.list_faceted_fields_from_fid_map(DelAdd::Addition);
|
||||
|
||||
if !settings_diff.settings_update_only || settings_diff.reindex_facets() {
|
||||
let mut cursor = obkv_documents.into_cursor()?;
|
||||
|
@ -15,8 +15,9 @@ use serde_json::Value;
|
||||
use super::helpers::{create_writer, writer_into_reader, GrenadParameters};
|
||||
use crate::constants::RESERVED_VECTORS_FIELD_NAME;
|
||||
use crate::error::FaultSource;
|
||||
use crate::fields_ids_map::metadata::FieldIdMapWithMetadata;
|
||||
use crate::index::IndexEmbeddingConfig;
|
||||
use crate::prompt::{FieldsIdsMapWithMetadata, Prompt};
|
||||
use crate::prompt::Prompt;
|
||||
use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
|
||||
use crate::update::settings::InnerIndexSettingsDiff;
|
||||
use crate::vector::error::{EmbedErrorKind, PossibleEmbeddingMistakes, UnusedVectorsDistribution};
|
||||
@ -190,12 +191,8 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
||||
let reindex_vectors = settings_diff.reindex_vectors();
|
||||
|
||||
let old_fields_ids_map = &settings_diff.old.fields_ids_map;
|
||||
let old_fields_ids_map =
|
||||
FieldsIdsMapWithMetadata::new(old_fields_ids_map, &settings_diff.old.searchable_fields_ids);
|
||||
|
||||
let new_fields_ids_map = &settings_diff.new.fields_ids_map;
|
||||
let new_fields_ids_map =
|
||||
FieldsIdsMapWithMetadata::new(new_fields_ids_map, &settings_diff.new.searchable_fields_ids);
|
||||
|
||||
// the vector field id may have changed
|
||||
let old_vectors_fid = old_fields_ids_map.id(RESERVED_VECTORS_FIELD_NAME);
|
||||
@ -383,7 +380,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
||||
);
|
||||
continue;
|
||||
}
|
||||
regenerate_prompt(obkv, prompt, &new_fields_ids_map)?
|
||||
regenerate_prompt(obkv, prompt, new_fields_ids_map)?
|
||||
}
|
||||
},
|
||||
// prompt regeneration is only triggered for existing embedders
|
||||
@ -400,7 +397,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
||||
regenerate_if_prompt_changed(
|
||||
obkv,
|
||||
(old_prompt, prompt),
|
||||
(&old_fields_ids_map, &new_fields_ids_map),
|
||||
(old_fields_ids_map, new_fields_ids_map),
|
||||
)?
|
||||
} else {
|
||||
// we can simply ignore user provided vectors as they are not regenerated and are
|
||||
@ -416,7 +413,7 @@ pub fn extract_vector_points<R: io::Read + io::Seek>(
|
||||
prompt,
|
||||
(add_to_user_provided, remove_from_user_provided),
|
||||
(old, new),
|
||||
(&old_fields_ids_map, &new_fields_ids_map),
|
||||
(old_fields_ids_map, new_fields_ids_map),
|
||||
document_id,
|
||||
embedder_name,
|
||||
embedder_is_manual,
|
||||
@ -486,10 +483,7 @@ fn extract_vector_document_diff(
|
||||
prompt: &Prompt,
|
||||
(add_to_user_provided, remove_from_user_provided): (&mut RoaringBitmap, &mut RoaringBitmap),
|
||||
(old, new): (VectorState, VectorState),
|
||||
(old_fields_ids_map, new_fields_ids_map): (
|
||||
&FieldsIdsMapWithMetadata,
|
||||
&FieldsIdsMapWithMetadata,
|
||||
),
|
||||
(old_fields_ids_map, new_fields_ids_map): (&FieldIdMapWithMetadata, &FieldIdMapWithMetadata),
|
||||
document_id: impl Fn() -> Value,
|
||||
embedder_name: &str,
|
||||
embedder_is_manual: bool,
|
||||
@ -611,10 +605,7 @@ fn extract_vector_document_diff(
|
||||
fn regenerate_if_prompt_changed(
|
||||
obkv: &obkv::KvReader<FieldId>,
|
||||
(old_prompt, new_prompt): (&Prompt, &Prompt),
|
||||
(old_fields_ids_map, new_fields_ids_map): (
|
||||
&FieldsIdsMapWithMetadata,
|
||||
&FieldsIdsMapWithMetadata,
|
||||
),
|
||||
(old_fields_ids_map, new_fields_ids_map): (&FieldIdMapWithMetadata, &FieldIdMapWithMetadata),
|
||||
) -> Result<VectorStateDelta> {
|
||||
let old_prompt = old_prompt
|
||||
.render_kvdeladd(obkv, DelAdd::Deletion, old_fields_ids_map)
|
||||
@ -630,7 +621,7 @@ fn regenerate_if_prompt_changed(
|
||||
fn regenerate_prompt(
|
||||
obkv: &obkv::KvReader<FieldId>,
|
||||
prompt: &Prompt,
|
||||
new_fields_ids_map: &FieldsIdsMapWithMetadata,
|
||||
new_fields_ids_map: &FieldIdMapWithMetadata,
|
||||
) -> Result<VectorStateDelta> {
|
||||
let prompt = prompt.render_kvdeladd(obkv, DelAdd::Addition, new_fields_ids_map)?;
|
||||
|
||||
|
@ -26,6 +26,7 @@ use typed_chunk::{write_typed_chunk_into_index, ChunkAccumulator, TypedChunk};
|
||||
pub use self::enrich::{extract_finite_float_from_value, DocumentId};
|
||||
pub use self::helpers::*;
|
||||
pub use self::transform::{Transform, TransformOutput};
|
||||
use super::facet::clear_facet_levels_based_on_settings_diff;
|
||||
use super::new::StdResult;
|
||||
use crate::documents::{obkv_to_object, DocumentsBatchReader};
|
||||
use crate::error::{Error, InternalError};
|
||||
@ -215,9 +216,8 @@ where
|
||||
flattened_documents,
|
||||
} = output;
|
||||
|
||||
// update the internal facet and searchable list,
|
||||
// update the searchable list,
|
||||
// because they might have changed due to the nested documents flattening.
|
||||
settings_diff.new.recompute_facets(self.wtxn, self.index)?;
|
||||
settings_diff.new.recompute_searchables(self.wtxn, self.index)?;
|
||||
|
||||
let settings_diff = Arc::new(settings_diff);
|
||||
@ -465,6 +465,11 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
// If the settings are only being updated, we may have to clear some of the facet levels.
|
||||
if settings_diff.settings_update_only() {
|
||||
clear_facet_levels_based_on_settings_diff(self.wtxn, self.index, &settings_diff)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}).map_err(InternalError::from)??;
|
||||
|
||||
@ -776,7 +781,7 @@ mod tests {
|
||||
use crate::search::TermsMatchingStrategy;
|
||||
use crate::update::new::indexer;
|
||||
use crate::update::Setting;
|
||||
use crate::{all_obkv_to_json, db_snap, Filter, Search, UserError};
|
||||
use crate::{all_obkv_to_json, db_snap, Filter, FilterableAttributesRule, Search, UserError};
|
||||
|
||||
#[test]
|
||||
fn simple_document_replacement() {
|
||||
@ -1006,7 +1011,9 @@ mod tests {
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_filterable_fields(hashset!(S(RESERVED_GEO_FIELD_NAME)));
|
||||
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(
|
||||
RESERVED_GEO_FIELD_NAME.to_string(),
|
||||
)]);
|
||||
})
|
||||
.unwrap();
|
||||
}
|
||||
@ -1018,7 +1025,9 @@ mod tests {
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_filterable_fields(hashset!(S(RESERVED_GEO_FIELD_NAME)));
|
||||
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(
|
||||
RESERVED_GEO_FIELD_NAME.to_string(),
|
||||
)]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@ -1234,16 +1243,17 @@ mod tests {
|
||||
let searchable_fields = vec![S("title"), S("nested.object"), S("nested.machin")];
|
||||
settings.set_searchable_fields(searchable_fields);
|
||||
|
||||
let faceted_fields = hashset!(S("title"), S("nested.object"), S("nested.machin"));
|
||||
let faceted_fields = vec![
|
||||
FilterableAttributesRule::Field("title".to_string()),
|
||||
FilterableAttributesRule::Field("nested.object".to_string()),
|
||||
FilterableAttributesRule::Field("nested.machin".to_string()),
|
||||
];
|
||||
settings.set_filterable_fields(faceted_fields);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let facets = index.faceted_fields(&rtxn).unwrap();
|
||||
assert_eq!(facets, hashset!(S("title"), S("nested.object"), S("nested.machin")));
|
||||
|
||||
// testing the simple query search
|
||||
let mut search = crate::Search::new(&rtxn, &index);
|
||||
search.query("document");
|
||||
@ -1438,7 +1448,9 @@ mod tests {
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_filterable_fields(hashset!(String::from("dog")));
|
||||
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(
|
||||
"dog".to_string(),
|
||||
)]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@ -1457,10 +1469,6 @@ mod tests {
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let hidden = index.faceted_fields(&rtxn).unwrap();
|
||||
|
||||
assert_eq!(hidden, hashset!(S("dog"), S("dog.race"), S("dog.race.bernese mountain")));
|
||||
|
||||
for (s, i) in [("zeroth", 0), ("first", 1), ("second", 2), ("third", 3)] {
|
||||
let mut search = crate::Search::new(&rtxn, &index);
|
||||
let filter = format!(r#""dog.race.bernese mountain" = {s}"#);
|
||||
@ -1478,12 +1486,6 @@ mod tests {
|
||||
db_snap!(index, facet_id_string_docids, @"");
|
||||
db_snap!(index, field_id_docid_facet_strings, @"");
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let facets = index.faceted_fields(&rtxn).unwrap();
|
||||
|
||||
assert_eq!(facets, hashset!());
|
||||
|
||||
// update the settings to test the sortable
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
@ -1506,10 +1508,6 @@ mod tests {
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let facets = index.faceted_fields(&rtxn).unwrap();
|
||||
|
||||
assert_eq!(facets, hashset!(S("dog.race"), S("dog.race.bernese mountain")));
|
||||
|
||||
let mut search = crate::Search::new(&rtxn, &index);
|
||||
search.sort_criteria(vec![crate::AscDesc::Asc(crate::Member::Field(S(
|
||||
"dog.race.bernese mountain",
|
||||
@ -1717,8 +1715,6 @@ mod tests {
|
||||
|
||||
let check_ok = |index: &Index| {
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let facets = index.faceted_fields(&rtxn).unwrap();
|
||||
assert_eq!(facets, hashset!(S("colour"), S("colour.green"), S("colour.green.blue")));
|
||||
|
||||
let colour_id = index.fields_ids_map(&rtxn).unwrap().id("colour").unwrap();
|
||||
let colour_green_id = index.fields_ids_map(&rtxn).unwrap().id("colour.green").unwrap();
|
||||
@ -1738,7 +1734,7 @@ mod tests {
|
||||
assert_eq!(bitmap_colour_blue.into_iter().collect::<Vec<_>>(), vec![7]);
|
||||
};
|
||||
|
||||
let faceted_fields = hashset!(S("colour"));
|
||||
let faceted_fields = vec![FilterableAttributesRule::Field("colour".to_string())];
|
||||
|
||||
let index = TempIndex::new();
|
||||
index.add_documents(content()).unwrap();
|
||||
@ -1823,8 +1819,6 @@ mod tests {
|
||||
|
||||
let check_ok = |index: &Index| {
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let facets = index.faceted_fields(&rtxn).unwrap();
|
||||
assert_eq!(facets, hashset!(S("colour"), S("colour.green"), S("colour.green.blue")));
|
||||
|
||||
let colour_id = index.fields_ids_map(&rtxn).unwrap().id("colour").unwrap();
|
||||
let colour_green_id = index.fields_ids_map(&rtxn).unwrap().id("colour.green").unwrap();
|
||||
@ -1844,7 +1838,7 @@ mod tests {
|
||||
assert_eq!(bitmap_colour_blue.into_iter().collect::<Vec<_>>(), vec![3]);
|
||||
};
|
||||
|
||||
let faceted_fields = hashset!(S("colour"));
|
||||
let faceted_fields = vec![FilterableAttributesRule::Field("colour".to_string())];
|
||||
|
||||
let index = TempIndex::new();
|
||||
index.add_documents(content()).unwrap();
|
||||
@ -1887,8 +1881,6 @@ mod tests {
|
||||
|
||||
let check_ok = |index: &Index| {
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let facets = index.faceted_fields(&rtxn).unwrap();
|
||||
assert_eq!(facets, hashset!(S("tags"), S("tags.green"), S("tags.green.blue")));
|
||||
|
||||
let tags_id = index.fields_ids_map(&rtxn).unwrap().id("tags").unwrap();
|
||||
let tags_green_id = index.fields_ids_map(&rtxn).unwrap().id("tags.green").unwrap();
|
||||
@ -1907,7 +1899,7 @@ mod tests {
|
||||
assert_eq!(bitmap_tags_blue.into_iter().collect::<Vec<_>>(), vec![12]);
|
||||
};
|
||||
|
||||
let faceted_fields = hashset!(S("tags"));
|
||||
let faceted_fields = vec![FilterableAttributesRule::Field("tags".to_string())];
|
||||
|
||||
let index = TempIndex::new();
|
||||
index.add_documents(content()).unwrap();
|
||||
@ -2259,7 +2251,9 @@ mod tests {
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_filterable_fields(hashset! { S("title") });
|
||||
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(
|
||||
"title".to_string(),
|
||||
)]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@ -3117,7 +3111,10 @@ mod tests {
|
||||
index
|
||||
.update_settings_using_wtxn(&mut wtxn, |settings| {
|
||||
settings.set_primary_key(S("docid"));
|
||||
settings.set_filterable_fields(hashset! { S("label"), S("label2") });
|
||||
settings.set_filterable_fields(vec![
|
||||
FilterableAttributesRule::Field("label".to_string()),
|
||||
FilterableAttributesRule::Field("label2".to_string()),
|
||||
]);
|
||||
})
|
||||
.unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
@ -3296,7 +3293,9 @@ mod tests {
|
||||
index
|
||||
.update_settings_using_wtxn(&mut wtxn, |settings| {
|
||||
settings.set_primary_key(S("id"));
|
||||
settings.set_filterable_fields(hashset!(S(RESERVED_GEO_FIELD_NAME)));
|
||||
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(
|
||||
RESERVED_GEO_FIELD_NAME.to_string(),
|
||||
)]);
|
||||
settings.set_sortable_fields(hashset!(S(RESERVED_GEO_FIELD_NAME)));
|
||||
})
|
||||
.unwrap();
|
||||
|
@ -1,7 +1,7 @@
|
||||
use std::borrow::Cow;
|
||||
use std::collections::btree_map::Entry as BEntry;
|
||||
use std::collections::hash_map::Entry as HEntry;
|
||||
use std::collections::{BTreeMap, HashMap, HashSet};
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::fs::File;
|
||||
use std::io::{Read, Seek};
|
||||
|
||||
@ -18,8 +18,10 @@ use super::helpers::{
|
||||
ObkvsMergeAdditionsAndDeletions,
|
||||
};
|
||||
use super::{create_writer, IndexDocumentsMethod, IndexerConfig, KeepFirst};
|
||||
use crate::attribute_patterns::PatternMatch;
|
||||
use crate::documents::{DocumentsBatchIndex, EnrichedDocument, EnrichedDocumentsBatchReader};
|
||||
use crate::error::{Error, InternalError, UserError};
|
||||
use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
|
||||
use crate::index::{db_name, main_key};
|
||||
use crate::update::del_add::{
|
||||
into_del_add_obkv, into_del_add_obkv_conditional_operation, DelAdd, DelAddOperation,
|
||||
@ -31,9 +33,7 @@ use crate::update::{AvailableIds, UpdateIndexingStep};
|
||||
use crate::vector::parsed_vectors::{ExplicitVectors, VectorOrArrayOfVectors};
|
||||
use crate::vector::settings::WriteBackToDocuments;
|
||||
use crate::vector::ArroyWrapper;
|
||||
use crate::{
|
||||
is_faceted_by, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result,
|
||||
};
|
||||
use crate::{FieldDistribution, FieldId, FieldIdMapMissingEntry, Index, Result};
|
||||
|
||||
pub struct TransformOutput {
|
||||
pub primary_key: String,
|
||||
@ -52,7 +52,7 @@ pub struct TransformOutput {
|
||||
/// containing all those documents.
|
||||
pub struct Transform<'a, 'i> {
|
||||
pub index: &'i Index,
|
||||
fields_ids_map: FieldsIdsMap,
|
||||
fields_ids_map: FieldIdMapWithMetadata,
|
||||
|
||||
indexer_settings: &'a IndexerConfig,
|
||||
pub index_documents_method: IndexDocumentsMethod,
|
||||
@ -84,7 +84,7 @@ pub enum Operation {
|
||||
///
|
||||
/// If new fields are present in the addition, they are added to the index field ids map.
|
||||
fn create_fields_mapping(
|
||||
index_field_map: &mut FieldsIdsMap,
|
||||
index_field_map: &mut FieldIdMapWithMetadata,
|
||||
batch_field_map: &DocumentsBatchIndex,
|
||||
) -> Result<HashMap<FieldId, FieldId>> {
|
||||
batch_field_map
|
||||
@ -141,10 +141,13 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
true,
|
||||
);
|
||||
let documents_ids = index.documents_ids(wtxn)?;
|
||||
let fields_ids_map = index.fields_ids_map(wtxn)?;
|
||||
let builder = MetadataBuilder::from_index(index, wtxn)?;
|
||||
let fields_ids_map = FieldIdMapWithMetadata::new(fields_ids_map, builder);
|
||||
|
||||
Ok(Transform {
|
||||
index,
|
||||
fields_ids_map: index.fields_ids_map(wtxn)?,
|
||||
fields_ids_map,
|
||||
indexer_settings,
|
||||
available_documents_ids: AvailableIds::new(&documents_ids),
|
||||
original_sorter,
|
||||
@ -354,7 +357,7 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
documents_seen: documents_count,
|
||||
});
|
||||
|
||||
self.index.put_fields_ids_map(wtxn, &self.fields_ids_map)?;
|
||||
self.index.put_fields_ids_map(wtxn, self.fields_ids_map.as_fields_ids_map())?;
|
||||
self.index.put_primary_key(wtxn, &primary_key)?;
|
||||
self.documents_count += documents_count;
|
||||
// Now that we have a valid sorter that contains the user id and the obkv we
|
||||
@ -371,7 +374,7 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
)]
|
||||
fn flatten_from_fields_ids_map(
|
||||
obkv: &KvReader<FieldId>,
|
||||
fields_ids_map: &mut FieldsIdsMap,
|
||||
fields_ids_map: &mut FieldIdMapWithMetadata,
|
||||
) -> Result<Option<Vec<u8>>> {
|
||||
if obkv
|
||||
.iter()
|
||||
@ -657,7 +660,6 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
fn rebind_existing_document(
|
||||
old_obkv: &KvReader<FieldId>,
|
||||
settings_diff: &InnerIndexSettingsDiff,
|
||||
modified_faceted_fields: &HashSet<String>,
|
||||
mut injected_vectors: serde_json::Map<String, serde_json::Value>,
|
||||
old_vectors_fid: Option<FieldId>,
|
||||
original_obkv_buffer: Option<&mut Vec<u8>>,
|
||||
@ -667,23 +669,26 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
let is_primary_key = |id: FieldId| -> bool { settings_diff.primary_key_id == Some(id) };
|
||||
|
||||
// If only a faceted field has been added, keep only this field.
|
||||
let global_facet_settings_changed = settings_diff.global_facet_settings_changed();
|
||||
let facet_fids_changed = settings_diff.facet_fids_changed();
|
||||
let necessary_faceted_field =
|
||||
|id: FieldId| -> bool {
|
||||
|
||||
let necessary_faceted_field = |id: FieldId| -> Option<DelAddOperation> {
|
||||
if facet_fids_changed {
|
||||
let field_name = settings_diff.new.fields_ids_map.name(id).unwrap();
|
||||
if global_facet_settings_changed {
|
||||
settings_diff.new.user_defined_faceted_fields.iter().any(|long| {
|
||||
is_faceted_by(long, field_name) || is_faceted_by(field_name, long)
|
||||
})
|
||||
} else if facet_fids_changed {
|
||||
modified_faceted_fields.iter().any(|long| {
|
||||
is_faceted_by(long, field_name) || is_faceted_by(field_name, long)
|
||||
})
|
||||
} else {
|
||||
false
|
||||
// if the faceted fields changed, we need to keep all the field that are
|
||||
// faceted in the old or new settings.
|
||||
match (
|
||||
settings_diff.old.match_faceted_field(field_name),
|
||||
settings_diff.new.match_faceted_field(field_name),
|
||||
) {
|
||||
(PatternMatch::NoMatch, PatternMatch::NoMatch) => None,
|
||||
(PatternMatch::NoMatch, _) => Some(DelAddOperation::Addition),
|
||||
(_, PatternMatch::NoMatch) => Some(DelAddOperation::Deletion),
|
||||
(_, _) => Some(DelAddOperation::DeletionAndAddition),
|
||||
}
|
||||
};
|
||||
} else {
|
||||
None
|
||||
}
|
||||
};
|
||||
|
||||
// Alway provide all fields when vectors are involved because
|
||||
// we need the fields for the prompt/templating.
|
||||
@ -734,12 +739,24 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
}
|
||||
}
|
||||
|
||||
if is_primary_key(id) || necessary_faceted_field(id) || reindex_vectors {
|
||||
if is_primary_key(id) || reindex_vectors {
|
||||
operations.insert(id, DelAddOperation::DeletionAndAddition);
|
||||
obkv_writer.insert(id, val)?;
|
||||
} else if let Some(operation) = settings_diff.reindex_searchable_id(id) {
|
||||
operations.insert(id, operation);
|
||||
obkv_writer.insert(id, val)?;
|
||||
} else {
|
||||
let facet_operation = necessary_faceted_field(id);
|
||||
let searchable_operation = settings_diff.reindex_searchable_id(id);
|
||||
let operation = match (facet_operation, searchable_operation) {
|
||||
(Some(facet_operation), Some(searchable_operation)) => {
|
||||
Some(facet_operation.merge(searchable_operation))
|
||||
}
|
||||
(Some(operation), None) | (None, Some(operation)) => Some(operation),
|
||||
(None, None) => None,
|
||||
};
|
||||
|
||||
if let Some(operation) = operation {
|
||||
operations.insert(id, operation);
|
||||
obkv_writer.insert(id, val)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
if !injected_vectors.is_empty() {
|
||||
@ -856,7 +873,6 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
};
|
||||
|
||||
if original_sorter.is_some() || flattened_sorter.is_some() {
|
||||
let modified_faceted_fields = settings_diff.modified_faceted_fields();
|
||||
let mut original_obkv_buffer = Vec::new();
|
||||
let mut flattened_obkv_buffer = Vec::new();
|
||||
let mut document_sorter_key_buffer = Vec::new();
|
||||
@ -897,7 +913,6 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
Self::rebind_existing_document(
|
||||
old_obkv,
|
||||
&settings_diff,
|
||||
&modified_faceted_fields,
|
||||
injected_vectors,
|
||||
old_vectors_fid,
|
||||
Some(&mut original_obkv_buffer).filter(|_| original_sorter.is_some()),
|
||||
|
@ -365,7 +365,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
let merger = builder.build();
|
||||
|
||||
let indexer = FacetsUpdate::new(index, FacetType::Number, merger, None, data_size);
|
||||
indexer.execute(wtxn)?;
|
||||
indexer.execute(wtxn, &settings_diff.new)?;
|
||||
is_merged_database = true;
|
||||
}
|
||||
TypedChunk::FieldIdFacetStringDocids(_) => {
|
||||
@ -401,7 +401,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
Some(normalized_facet_id_string_merger),
|
||||
data_size,
|
||||
);
|
||||
indexer.execute(wtxn)?;
|
||||
indexer.execute(wtxn, &settings_diff.new)?;
|
||||
is_merged_database = true;
|
||||
}
|
||||
TypedChunk::FieldIdFacetExistsDocids(_) => {
|
||||
|
@ -4,10 +4,10 @@ use heed::RoTxn;
|
||||
use super::document::{
|
||||
Document as _, DocumentFromDb, DocumentFromVersions, MergedDocument, Versions,
|
||||
};
|
||||
use super::extract::perm_json_p;
|
||||
use super::vector_document::{
|
||||
MergedVectorDocument, VectorDocumentFromDb, VectorDocumentFromVersions,
|
||||
};
|
||||
use crate::attribute_patterns::PatternMatch;
|
||||
use crate::documents::FieldIdMapper;
|
||||
use crate::vector::EmbeddingConfigs;
|
||||
use crate::{DocumentId, Index, Result};
|
||||
@ -167,13 +167,15 @@ impl<'doc> Update<'doc> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns whether the updated version of the document is different from the current version for the passed subset of fields.
|
||||
/// Returns whether the updated version of the document is different from the current version for the subset of fields selected by `selector`.
|
||||
///
|
||||
/// `true` if at least one top-level-field that is a exactly a member of field or a parent of a member of field changed.
|
||||
/// `true` if at least one top-level-field that is exactly a selected field or a parent of a selected field changed.
|
||||
/// Otherwise `false`.
|
||||
///
|
||||
/// - Note: `_geo` and `_vectors` are not taken into account by this function.
|
||||
pub fn has_changed_for_fields<'t, Mapper: FieldIdMapper>(
|
||||
&self,
|
||||
fields: Option<&[&str]>,
|
||||
selector: &mut impl FnMut(&str) -> PatternMatch,
|
||||
rtxn: &'t RoTxn,
|
||||
index: &'t Index,
|
||||
mapper: &'t Mapper,
|
||||
@ -185,7 +187,7 @@ impl<'doc> Update<'doc> {
|
||||
for entry in self.only_changed_fields().iter_top_level_fields() {
|
||||
let (key, updated_value) = entry?;
|
||||
|
||||
if perm_json_p::select_field(key, fields, &[]) == perm_json_p::Selection::Skip {
|
||||
if selector(key) == PatternMatch::NoMatch {
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -229,7 +231,7 @@ impl<'doc> Update<'doc> {
|
||||
for entry in current.iter_top_level_fields() {
|
||||
let (key, _) = entry?;
|
||||
|
||||
if perm_json_p::select_field(key, fields, &[]) == perm_json_p::Selection::Skip {
|
||||
if selector(key) == PatternMatch::NoMatch {
|
||||
continue;
|
||||
}
|
||||
current_selected_field_count += 1;
|
||||
|
@ -5,12 +5,13 @@ use std::ops::DerefMut as _;
|
||||
use bumpalo::collections::Vec as BVec;
|
||||
use bumpalo::Bump;
|
||||
use hashbrown::HashMap;
|
||||
use heed::RoTxn;
|
||||
use serde_json::Value;
|
||||
|
||||
use super::super::cache::BalancedCaches;
|
||||
use super::facet_document::extract_document_facets;
|
||||
use super::FacetKind;
|
||||
use crate::fields_ids_map::metadata::Metadata;
|
||||
use crate::filterable_attributes_rules::match_faceted_field;
|
||||
use crate::heed_codec::facet::OrderedF64Codec;
|
||||
use crate::update::del_add::DelAdd;
|
||||
use crate::update::new::channel::FieldIdDocidFacetSender;
|
||||
@ -23,13 +24,17 @@ use crate::update::new::steps::IndexingStep;
|
||||
use crate::update::new::thread_local::{FullySend, ThreadLocal};
|
||||
use crate::update::new::DocumentChange;
|
||||
use crate::update::GrenadParameters;
|
||||
use crate::{DocumentId, FieldId, Index, Result, MAX_FACET_VALUE_LENGTH};
|
||||
use crate::{DocumentId, FieldId, FilterableAttributesRule, Result, MAX_FACET_VALUE_LENGTH};
|
||||
|
||||
pub struct FacetedExtractorData<'a, 'b> {
|
||||
attributes_to_extract: &'a [&'a str],
|
||||
sender: &'a FieldIdDocidFacetSender<'a, 'b>,
|
||||
grenad_parameters: &'a GrenadParameters,
|
||||
buckets: usize,
|
||||
filterable_attributes: &'a [FilterableAttributesRule],
|
||||
sortable_fields: &'a HashSet<String>,
|
||||
asc_desc_fields: &'a HashSet<String>,
|
||||
distinct_field: &'a Option<String>,
|
||||
is_geo_enabled: bool,
|
||||
}
|
||||
|
||||
impl<'a, 'b, 'extractor> Extractor<'extractor> for FacetedExtractorData<'a, 'b> {
|
||||
@ -52,7 +57,11 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for FacetedExtractorData<'a, 'b>
|
||||
let change = change?;
|
||||
FacetedDocidsExtractor::extract_document_change(
|
||||
context,
|
||||
self.attributes_to_extract,
|
||||
self.filterable_attributes,
|
||||
self.sortable_fields,
|
||||
self.asc_desc_fields,
|
||||
self.distinct_field,
|
||||
self.is_geo_enabled,
|
||||
change,
|
||||
self.sender,
|
||||
)?
|
||||
@ -64,13 +73,18 @@ impl<'a, 'b, 'extractor> Extractor<'extractor> for FacetedExtractorData<'a, 'b>
|
||||
pub struct FacetedDocidsExtractor;
|
||||
|
||||
impl FacetedDocidsExtractor {
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn extract_document_change(
|
||||
context: &DocumentChangeContext<RefCell<BalancedCaches>>,
|
||||
attributes_to_extract: &[&str],
|
||||
filterable_attributes: &[FilterableAttributesRule],
|
||||
sortable_fields: &HashSet<String>,
|
||||
asc_desc_fields: &HashSet<String>,
|
||||
distinct_field: &Option<String>,
|
||||
is_geo_enabled: bool,
|
||||
document_change: DocumentChange,
|
||||
sender: &FieldIdDocidFacetSender,
|
||||
) -> Result<()> {
|
||||
let index = &context.index;
|
||||
let index = context.index;
|
||||
let rtxn = &context.rtxn;
|
||||
let mut new_fields_ids_map = context.new_fields_ids_map.borrow_mut_or_yield();
|
||||
let mut cached_sorter = context.data.borrow_mut_or_yield();
|
||||
@ -78,11 +92,15 @@ impl FacetedDocidsExtractor {
|
||||
let docid = document_change.docid();
|
||||
let res = match document_change {
|
||||
DocumentChange::Deletion(inner) => extract_document_facets(
|
||||
attributes_to_extract,
|
||||
inner.current(rtxn, index, context.db_fields_ids_map)?,
|
||||
inner.external_document_id(),
|
||||
new_fields_ids_map.deref_mut(),
|
||||
&mut |fid, depth, value| {
|
||||
filterable_attributes,
|
||||
sortable_fields,
|
||||
asc_desc_fields,
|
||||
distinct_field,
|
||||
is_geo_enabled,
|
||||
&mut |fid, meta, depth, value| {
|
||||
Self::facet_fn_with_options(
|
||||
&context.doc_alloc,
|
||||
cached_sorter.deref_mut(),
|
||||
@ -91,6 +109,8 @@ impl FacetedDocidsExtractor {
|
||||
DelAddFacetValue::insert_del,
|
||||
docid,
|
||||
fid,
|
||||
meta,
|
||||
filterable_attributes,
|
||||
depth,
|
||||
value,
|
||||
)
|
||||
@ -98,7 +118,15 @@ impl FacetedDocidsExtractor {
|
||||
),
|
||||
DocumentChange::Update(inner) => {
|
||||
if !inner.has_changed_for_fields(
|
||||
Some(attributes_to_extract),
|
||||
&mut |field_name| {
|
||||
match_faceted_field(
|
||||
field_name,
|
||||
filterable_attributes,
|
||||
sortable_fields,
|
||||
asc_desc_fields,
|
||||
distinct_field,
|
||||
)
|
||||
},
|
||||
rtxn,
|
||||
index,
|
||||
context.db_fields_ids_map,
|
||||
@ -107,11 +135,15 @@ impl FacetedDocidsExtractor {
|
||||
}
|
||||
|
||||
extract_document_facets(
|
||||
attributes_to_extract,
|
||||
inner.current(rtxn, index, context.db_fields_ids_map)?,
|
||||
inner.external_document_id(),
|
||||
new_fields_ids_map.deref_mut(),
|
||||
&mut |fid, depth, value| {
|
||||
filterable_attributes,
|
||||
sortable_fields,
|
||||
asc_desc_fields,
|
||||
distinct_field,
|
||||
is_geo_enabled,
|
||||
&mut |fid, meta, depth, value| {
|
||||
Self::facet_fn_with_options(
|
||||
&context.doc_alloc,
|
||||
cached_sorter.deref_mut(),
|
||||
@ -120,6 +152,8 @@ impl FacetedDocidsExtractor {
|
||||
DelAddFacetValue::insert_del,
|
||||
docid,
|
||||
fid,
|
||||
meta,
|
||||
filterable_attributes,
|
||||
depth,
|
||||
value,
|
||||
)
|
||||
@ -127,11 +161,15 @@ impl FacetedDocidsExtractor {
|
||||
)?;
|
||||
|
||||
extract_document_facets(
|
||||
attributes_to_extract,
|
||||
inner.merged(rtxn, index, context.db_fields_ids_map)?,
|
||||
inner.external_document_id(),
|
||||
new_fields_ids_map.deref_mut(),
|
||||
&mut |fid, depth, value| {
|
||||
filterable_attributes,
|
||||
sortable_fields,
|
||||
asc_desc_fields,
|
||||
distinct_field,
|
||||
is_geo_enabled,
|
||||
&mut |fid, meta, depth, value| {
|
||||
Self::facet_fn_with_options(
|
||||
&context.doc_alloc,
|
||||
cached_sorter.deref_mut(),
|
||||
@ -140,6 +178,8 @@ impl FacetedDocidsExtractor {
|
||||
DelAddFacetValue::insert_add,
|
||||
docid,
|
||||
fid,
|
||||
meta,
|
||||
filterable_attributes,
|
||||
depth,
|
||||
value,
|
||||
)
|
||||
@ -147,11 +187,15 @@ impl FacetedDocidsExtractor {
|
||||
)
|
||||
}
|
||||
DocumentChange::Insertion(inner) => extract_document_facets(
|
||||
attributes_to_extract,
|
||||
inner.inserted(),
|
||||
inner.external_document_id(),
|
||||
new_fields_ids_map.deref_mut(),
|
||||
&mut |fid, depth, value| {
|
||||
filterable_attributes,
|
||||
sortable_fields,
|
||||
asc_desc_fields,
|
||||
distinct_field,
|
||||
is_geo_enabled,
|
||||
&mut |fid, meta, depth, value| {
|
||||
Self::facet_fn_with_options(
|
||||
&context.doc_alloc,
|
||||
cached_sorter.deref_mut(),
|
||||
@ -160,6 +204,8 @@ impl FacetedDocidsExtractor {
|
||||
DelAddFacetValue::insert_add,
|
||||
docid,
|
||||
fid,
|
||||
meta,
|
||||
filterable_attributes,
|
||||
depth,
|
||||
value,
|
||||
)
|
||||
@ -180,9 +226,18 @@ impl FacetedDocidsExtractor {
|
||||
facet_fn: impl Fn(&mut DelAddFacetValue<'doc>, FieldId, BVec<'doc, u8>, FacetKind),
|
||||
docid: DocumentId,
|
||||
fid: FieldId,
|
||||
meta: Metadata,
|
||||
filterable_attributes: &[FilterableAttributesRule],
|
||||
depth: perm_json_p::Depth,
|
||||
value: &Value,
|
||||
) -> Result<()> {
|
||||
// if the field is not faceted, do nothing
|
||||
if !meta.is_faceted(filterable_attributes) {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let features = meta.filterable_attributes_features(filterable_attributes);
|
||||
|
||||
let mut buffer = BVec::new_in(doc_alloc);
|
||||
// Exists
|
||||
// key: fid
|
||||
@ -246,7 +301,9 @@ impl FacetedDocidsExtractor {
|
||||
}
|
||||
// Null
|
||||
// key: fid
|
||||
Value::Null if depth == perm_json_p::Depth::OnBaseKey => {
|
||||
Value::Null
|
||||
if depth == perm_json_p::Depth::OnBaseKey && features.is_filterable_null() =>
|
||||
{
|
||||
buffer.clear();
|
||||
buffer.push(FacetKind::Null as u8);
|
||||
buffer.extend_from_slice(&fid.to_be_bytes());
|
||||
@ -254,19 +311,29 @@ impl FacetedDocidsExtractor {
|
||||
}
|
||||
// Empty
|
||||
// key: fid
|
||||
Value::Array(a) if a.is_empty() && depth == perm_json_p::Depth::OnBaseKey => {
|
||||
Value::Array(a)
|
||||
if a.is_empty()
|
||||
&& depth == perm_json_p::Depth::OnBaseKey
|
||||
&& features.is_filterable_empty() =>
|
||||
{
|
||||
buffer.clear();
|
||||
buffer.push(FacetKind::Empty as u8);
|
||||
buffer.extend_from_slice(&fid.to_be_bytes());
|
||||
cache_fn(cached_sorter, &buffer, docid)
|
||||
}
|
||||
Value::String(_) if depth == perm_json_p::Depth::OnBaseKey => {
|
||||
Value::String(_)
|
||||
if depth == perm_json_p::Depth::OnBaseKey && features.is_filterable_empty() =>
|
||||
{
|
||||
buffer.clear();
|
||||
buffer.push(FacetKind::Empty as u8);
|
||||
buffer.extend_from_slice(&fid.to_be_bytes());
|
||||
cache_fn(cached_sorter, &buffer, docid)
|
||||
}
|
||||
Value::Object(o) if o.is_empty() && depth == perm_json_p::Depth::OnBaseKey => {
|
||||
Value::Object(o)
|
||||
if o.is_empty()
|
||||
&& depth == perm_json_p::Depth::OnBaseKey
|
||||
&& features.is_filterable_empty() =>
|
||||
{
|
||||
buffer.clear();
|
||||
buffer.push(FacetKind::Empty as u8);
|
||||
buffer.extend_from_slice(&fid.to_be_bytes());
|
||||
@ -276,10 +343,6 @@ impl FacetedDocidsExtractor {
|
||||
_ => Ok(()),
|
||||
}
|
||||
}
|
||||
|
||||
fn attributes_to_extract<'a>(rtxn: &'a RoTxn, index: &'a Index) -> Result<HashSet<String>> {
|
||||
index.user_defined_faceted_fields(rtxn)
|
||||
}
|
||||
}
|
||||
|
||||
struct DelAddFacetValue<'doc> {
|
||||
@ -399,9 +462,11 @@ impl FacetedDocidsExtractor {
|
||||
{
|
||||
let index = indexing_context.index;
|
||||
let rtxn = index.read_txn()?;
|
||||
let attributes_to_extract = Self::attributes_to_extract(&rtxn, index)?;
|
||||
let attributes_to_extract: Vec<_> =
|
||||
attributes_to_extract.iter().map(|s| s.as_ref()).collect();
|
||||
let filterable_attributes = index.filterable_attributes_rules(&rtxn)?;
|
||||
let sortable_fields = index.sortable_fields(&rtxn)?;
|
||||
let asc_desc_fields = index.asc_desc_fields(&rtxn)?;
|
||||
let distinct_field = index.distinct_field(&rtxn)?.map(|s| s.to_string());
|
||||
let is_geo_enabled = index.is_geo_enabled(&rtxn)?;
|
||||
let datastore = ThreadLocal::new();
|
||||
|
||||
{
|
||||
@ -410,10 +475,14 @@ impl FacetedDocidsExtractor {
|
||||
let _entered = span.enter();
|
||||
|
||||
let extractor = FacetedExtractorData {
|
||||
attributes_to_extract: &attributes_to_extract,
|
||||
grenad_parameters: indexing_context.grenad_parameters,
|
||||
buckets: rayon::current_num_threads(),
|
||||
sender,
|
||||
filterable_attributes: &filterable_attributes,
|
||||
sortable_fields: &sortable_fields,
|
||||
asc_desc_fields: &asc_desc_fields,
|
||||
distinct_field: &distinct_field,
|
||||
is_geo_enabled,
|
||||
};
|
||||
extract(
|
||||
document_changes,
|
||||
|
@ -1,46 +1,80 @@
|
||||
use std::collections::HashSet;
|
||||
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::constants::RESERVED_GEO_FIELD_NAME;
|
||||
use crate::attribute_patterns::PatternMatch;
|
||||
use crate::fields_ids_map::metadata::Metadata;
|
||||
use crate::update::new::document::Document;
|
||||
use crate::update::new::extract::geo::extract_geo_coordinates;
|
||||
use crate::update::new::extract::perm_json_p;
|
||||
use crate::{FieldId, GlobalFieldsIdsMap, InternalError, Result, UserError};
|
||||
use crate::{
|
||||
FieldId, FilterableAttributesRule, GlobalFieldsIdsMap, InternalError, Result, UserError,
|
||||
};
|
||||
|
||||
use crate::filterable_attributes_rules::match_faceted_field;
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn extract_document_facets<'doc>(
|
||||
attributes_to_extract: &[&str],
|
||||
document: impl Document<'doc>,
|
||||
external_document_id: &str,
|
||||
field_id_map: &mut GlobalFieldsIdsMap,
|
||||
facet_fn: &mut impl FnMut(FieldId, perm_json_p::Depth, &Value) -> Result<()>,
|
||||
filterable_attributes: &[FilterableAttributesRule],
|
||||
sortable_fields: &HashSet<String>,
|
||||
asc_desc_fields: &HashSet<String>,
|
||||
distinct_field: &Option<String>,
|
||||
is_geo_enabled: bool,
|
||||
facet_fn: &mut impl FnMut(FieldId, Metadata, perm_json_p::Depth, &Value) -> Result<()>,
|
||||
) -> Result<()> {
|
||||
// return the match result for the given field name.
|
||||
let match_field = |field_name: &str| -> PatternMatch {
|
||||
match_faceted_field(
|
||||
field_name,
|
||||
filterable_attributes,
|
||||
sortable_fields,
|
||||
asc_desc_fields,
|
||||
distinct_field,
|
||||
)
|
||||
};
|
||||
|
||||
// extract the field if it is faceted (facet searchable, filterable, sortable)
|
||||
let mut extract_field = |name: &str, depth: perm_json_p::Depth, value: &Value| -> Result<()> {
|
||||
match field_id_map.id_with_metadata_or_insert(name) {
|
||||
Some((field_id, meta)) => {
|
||||
facet_fn(field_id, meta, depth, value)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
None => Err(UserError::AttributeLimitReached.into()),
|
||||
}
|
||||
};
|
||||
|
||||
for res in document.iter_top_level_fields() {
|
||||
let (field_name, value) = res?;
|
||||
let selection = match_field(field_name);
|
||||
|
||||
let mut tokenize_field =
|
||||
|name: &str, depth: perm_json_p::Depth, value: &Value| match field_id_map
|
||||
.id_or_insert(name)
|
||||
{
|
||||
Some(field_id) => facet_fn(field_id, depth, value),
|
||||
None => Err(UserError::AttributeLimitReached.into()),
|
||||
};
|
||||
// extract the field if it matches a pattern and if it is faceted (facet searchable, filterable, sortable)
|
||||
let mut match_and_extract = |name: &str, depth: perm_json_p::Depth, value: &Value| {
|
||||
let selection = match_field(name);
|
||||
if selection == PatternMatch::Match {
|
||||
extract_field(name, depth, value)?;
|
||||
}
|
||||
|
||||
// if the current field is searchable or contains a searchable attribute
|
||||
let selection = perm_json_p::select_field(field_name, Some(attributes_to_extract), &[]);
|
||||
if selection != perm_json_p::Selection::Skip {
|
||||
Ok(selection)
|
||||
};
|
||||
|
||||
if selection != PatternMatch::NoMatch {
|
||||
// parse json.
|
||||
match serde_json::value::to_value(value).map_err(InternalError::SerdeJson)? {
|
||||
Value::Object(object) => {
|
||||
perm_json_p::seek_leaf_values_in_object(
|
||||
&object,
|
||||
Some(attributes_to_extract),
|
||||
&[], // skip no attributes
|
||||
field_name,
|
||||
perm_json_p::Depth::OnBaseKey,
|
||||
&mut tokenize_field,
|
||||
&mut match_and_extract,
|
||||
)?;
|
||||
|
||||
if selection == perm_json_p::Selection::Select {
|
||||
tokenize_field(
|
||||
if selection == PatternMatch::Match {
|
||||
extract_field(
|
||||
field_name,
|
||||
perm_json_p::Depth::OnBaseKey,
|
||||
&Value::Object(object),
|
||||
@ -50,36 +84,34 @@ pub fn extract_document_facets<'doc>(
|
||||
Value::Array(array) => {
|
||||
perm_json_p::seek_leaf_values_in_array(
|
||||
&array,
|
||||
Some(attributes_to_extract),
|
||||
&[], // skip no attributes
|
||||
field_name,
|
||||
perm_json_p::Depth::OnBaseKey,
|
||||
&mut tokenize_field,
|
||||
&mut match_and_extract,
|
||||
)?;
|
||||
|
||||
if selection == perm_json_p::Selection::Select {
|
||||
tokenize_field(
|
||||
if selection == PatternMatch::Match {
|
||||
extract_field(
|
||||
field_name,
|
||||
perm_json_p::Depth::OnBaseKey,
|
||||
&Value::Array(array),
|
||||
)?;
|
||||
}
|
||||
}
|
||||
value => tokenize_field(field_name, perm_json_p::Depth::OnBaseKey, &value)?,
|
||||
value => extract_field(field_name, perm_json_p::Depth::OnBaseKey, &value)?,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if attributes_to_extract.contains(&RESERVED_GEO_FIELD_NAME) {
|
||||
if is_geo_enabled {
|
||||
if let Some(geo_value) = document.geo_field()? {
|
||||
if let Some([lat, lng]) = extract_geo_coordinates(external_document_id, geo_value)? {
|
||||
let (lat_fid, lng_fid) = field_id_map
|
||||
.id_or_insert("_geo.lat")
|
||||
.zip(field_id_map.id_or_insert("_geo.lng"))
|
||||
let ((lat_fid, lat_meta), (lng_fid, lng_meta)) = field_id_map
|
||||
.id_with_metadata_or_insert("_geo.lat")
|
||||
.zip(field_id_map.id_with_metadata_or_insert("_geo.lng"))
|
||||
.ok_or(UserError::AttributeLimitReached)?;
|
||||
|
||||
facet_fn(lat_fid, perm_json_p::Depth::OnBaseKey, &lat.into())?;
|
||||
facet_fn(lng_fid, perm_json_p::Depth::OnBaseKey, &lng.into())?;
|
||||
facet_fn(lat_fid, lat_meta, perm_json_p::Depth::OnBaseKey, &lat.into())?;
|
||||
facet_fn(lng_fid, lng_meta, perm_json_p::Depth::OnBaseKey, &lng.into())?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -9,7 +9,6 @@ use heed::RoTxn;
|
||||
use serde_json::value::RawValue;
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::constants::RESERVED_GEO_FIELD_NAME;
|
||||
use crate::error::GeoError;
|
||||
use crate::update::new::document::Document;
|
||||
use crate::update::new::indexer::document_changes::{DocumentChangeContext, Extractor};
|
||||
@ -29,9 +28,7 @@ impl GeoExtractor {
|
||||
index: &Index,
|
||||
grenad_parameters: GrenadParameters,
|
||||
) -> Result<Option<Self>> {
|
||||
let is_sortable = index.sortable_fields(rtxn)?.contains(RESERVED_GEO_FIELD_NAME);
|
||||
let is_filterable = index.filterable_fields(rtxn)?.contains(RESERVED_GEO_FIELD_NAME);
|
||||
if is_sortable || is_filterable {
|
||||
if index.is_geo_enabled(rtxn)? {
|
||||
Ok(Some(GeoExtractor { grenad_parameters }))
|
||||
} else {
|
||||
Ok(None)
|
||||
|
@ -5,7 +5,6 @@ mod geo;
|
||||
mod searchable;
|
||||
mod vectors;
|
||||
|
||||
use bumpalo::Bump;
|
||||
pub use cache::{
|
||||
merge_caches_sorted, transpose_and_freeze_caches, BalancedCaches, DelAddRoaringBitmap,
|
||||
};
|
||||
@ -15,27 +14,11 @@ pub use geo::*;
|
||||
pub use searchable::*;
|
||||
pub use vectors::EmbeddingExtractor;
|
||||
|
||||
use super::indexer::document_changes::{DocumentChanges, IndexingContext};
|
||||
use super::steps::IndexingStep;
|
||||
use super::thread_local::{FullySend, ThreadLocal};
|
||||
use crate::Result;
|
||||
|
||||
pub trait DocidsExtractor {
|
||||
fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
|
||||
document_changes: &DC,
|
||||
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
|
||||
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
|
||||
step: IndexingStep,
|
||||
) -> Result<Vec<BalancedCaches<'extractor>>>
|
||||
where
|
||||
MSP: Fn() -> bool + Sync;
|
||||
}
|
||||
|
||||
/// TODO move in permissive json pointer
|
||||
pub mod perm_json_p {
|
||||
use serde_json::{Map, Value};
|
||||
|
||||
use crate::Result;
|
||||
use crate::{attribute_patterns::PatternMatch, Result};
|
||||
const SPLIT_SYMBOL: char = '.';
|
||||
|
||||
/// Returns `true` if the `selector` match the `key`.
|
||||
@ -68,11 +51,9 @@ pub mod perm_json_p {
|
||||
|
||||
pub fn seek_leaf_values_in_object(
|
||||
value: &Map<String, Value>,
|
||||
selectors: Option<&[&str]>,
|
||||
skip_selectors: &[&str],
|
||||
base_key: &str,
|
||||
base_depth: Depth,
|
||||
seeker: &mut impl FnMut(&str, Depth, &Value) -> Result<()>,
|
||||
seeker: &mut impl FnMut(&str, Depth, &Value) -> Result<PatternMatch>,
|
||||
) -> Result<()> {
|
||||
if value.is_empty() {
|
||||
seeker(base_key, base_depth, &Value::Object(Map::with_capacity(0)))?;
|
||||
@ -85,40 +66,16 @@ pub mod perm_json_p {
|
||||
format!("{}{}{}", base_key, SPLIT_SYMBOL, key)
|
||||
};
|
||||
|
||||
// here if the user only specified `doggo` we need to iterate in all the fields of `doggo`
|
||||
// so we check the contained_in on both side
|
||||
let selection = select_field(&base_key, selectors, skip_selectors);
|
||||
if selection != Selection::Skip {
|
||||
let selection = seeker(&base_key, Depth::OnBaseKey, value)?;
|
||||
if selection != PatternMatch::NoMatch {
|
||||
match value {
|
||||
Value::Object(object) => {
|
||||
if selection == Selection::Select {
|
||||
seeker(&base_key, Depth::OnBaseKey, value)?;
|
||||
}
|
||||
|
||||
seek_leaf_values_in_object(
|
||||
object,
|
||||
selectors,
|
||||
skip_selectors,
|
||||
&base_key,
|
||||
Depth::OnBaseKey,
|
||||
seeker,
|
||||
)
|
||||
seek_leaf_values_in_object(object, &base_key, Depth::OnBaseKey, seeker)
|
||||
}
|
||||
Value::Array(array) => {
|
||||
if selection == Selection::Select {
|
||||
seeker(&base_key, Depth::OnBaseKey, value)?;
|
||||
}
|
||||
|
||||
seek_leaf_values_in_array(
|
||||
array,
|
||||
selectors,
|
||||
skip_selectors,
|
||||
&base_key,
|
||||
Depth::OnBaseKey,
|
||||
seeker,
|
||||
)
|
||||
seek_leaf_values_in_array(array, &base_key, Depth::OnBaseKey, seeker)
|
||||
}
|
||||
value => seeker(&base_key, Depth::OnBaseKey, value),
|
||||
_ => Ok(()),
|
||||
}?;
|
||||
}
|
||||
}
|
||||
@ -128,11 +85,9 @@ pub mod perm_json_p {
|
||||
|
||||
pub fn seek_leaf_values_in_array(
|
||||
values: &[Value],
|
||||
selectors: Option<&[&str]>,
|
||||
skip_selectors: &[&str],
|
||||
base_key: &str,
|
||||
base_depth: Depth,
|
||||
seeker: &mut impl FnMut(&str, Depth, &Value) -> Result<()>,
|
||||
seeker: &mut impl FnMut(&str, Depth, &Value) -> Result<PatternMatch>,
|
||||
) -> Result<()> {
|
||||
if values.is_empty() {
|
||||
seeker(base_key, base_depth, &Value::Array(vec![]))?;
|
||||
@ -140,61 +95,16 @@ pub mod perm_json_p {
|
||||
|
||||
for value in values {
|
||||
match value {
|
||||
Value::Object(object) => seek_leaf_values_in_object(
|
||||
object,
|
||||
selectors,
|
||||
skip_selectors,
|
||||
base_key,
|
||||
Depth::InsideArray,
|
||||
seeker,
|
||||
),
|
||||
Value::Array(array) => seek_leaf_values_in_array(
|
||||
array,
|
||||
selectors,
|
||||
skip_selectors,
|
||||
base_key,
|
||||
Depth::InsideArray,
|
||||
seeker,
|
||||
),
|
||||
value => seeker(base_key, Depth::InsideArray, value),
|
||||
Value::Object(object) => {
|
||||
seek_leaf_values_in_object(object, base_key, Depth::InsideArray, seeker)
|
||||
}
|
||||
Value::Array(array) => {
|
||||
seek_leaf_values_in_array(array, base_key, Depth::InsideArray, seeker)
|
||||
}
|
||||
value => seeker(base_key, Depth::InsideArray, value).map(|_| ()),
|
||||
}?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn select_field(
|
||||
field_name: &str,
|
||||
selectors: Option<&[&str]>,
|
||||
skip_selectors: &[&str],
|
||||
) -> Selection {
|
||||
if skip_selectors.iter().any(|skip_selector| {
|
||||
contained_in(skip_selector, field_name) || contained_in(field_name, skip_selector)
|
||||
}) {
|
||||
Selection::Skip
|
||||
} else if let Some(selectors) = selectors {
|
||||
let mut selection = Selection::Skip;
|
||||
for selector in selectors {
|
||||
if contained_in(field_name, selector) {
|
||||
selection = Selection::Select;
|
||||
break;
|
||||
} else if contained_in(selector, field_name) {
|
||||
selection = Selection::Parent;
|
||||
}
|
||||
}
|
||||
selection
|
||||
} else {
|
||||
Selection::Select
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum Selection {
|
||||
/// The field is a parent of the of a nested field that must be selected
|
||||
Parent,
|
||||
/// The field must be selected
|
||||
Select,
|
||||
/// The field must be skipped
|
||||
Skip,
|
||||
}
|
||||
}
|
||||
|
@ -5,8 +5,8 @@ use std::ops::DerefMut as _;
|
||||
|
||||
use bumpalo::collections::vec::Vec as BumpVec;
|
||||
use bumpalo::Bump;
|
||||
use heed::RoTxn;
|
||||
|
||||
use super::match_searchable_field;
|
||||
use super::tokenize_document::{tokenizer_builder, DocumentTokenizer};
|
||||
use crate::update::new::extract::cache::BalancedCaches;
|
||||
use crate::update::new::extract::perm_json_p::contained_in;
|
||||
@ -17,8 +17,7 @@ use crate::update::new::ref_cell_ext::RefCellExt as _;
|
||||
use crate::update::new::steps::IndexingStep;
|
||||
use crate::update::new::thread_local::{FullySend, MostlySend, ThreadLocal};
|
||||
use crate::update::new::DocumentChange;
|
||||
use crate::update::GrenadParameters;
|
||||
use crate::{bucketed_position, DocumentId, FieldId, Index, Result, MAX_POSITION_PER_ATTRIBUTE};
|
||||
use crate::{bucketed_position, DocumentId, FieldId, Result, MAX_POSITION_PER_ATTRIBUTE};
|
||||
|
||||
const MAX_COUNTED_WORDS: usize = 30;
|
||||
|
||||
@ -207,9 +206,10 @@ impl<'extractor> WordDocidsCaches<'extractor> {
|
||||
}
|
||||
|
||||
pub struct WordDocidsExtractorData<'a> {
|
||||
tokenizer: &'a DocumentTokenizer<'a>,
|
||||
grenad_parameters: &'a GrenadParameters,
|
||||
tokenizer: DocumentTokenizer<'a>,
|
||||
max_memory_by_thread: Option<usize>,
|
||||
buckets: usize,
|
||||
searchable_attributes: Option<Vec<&'a str>>,
|
||||
}
|
||||
|
||||
impl<'a, 'extractor> Extractor<'extractor> for WordDocidsExtractorData<'a> {
|
||||
@ -218,7 +218,7 @@ impl<'a, 'extractor> Extractor<'extractor> for WordDocidsExtractorData<'a> {
|
||||
fn init_data(&self, extractor_alloc: &'extractor Bump) -> Result<Self::Data> {
|
||||
Ok(RefCell::new(Some(WordDocidsBalancedCaches::new_in(
|
||||
self.buckets,
|
||||
self.grenad_parameters.max_memory_by_thread(),
|
||||
self.max_memory_by_thread,
|
||||
extractor_alloc,
|
||||
))))
|
||||
}
|
||||
@ -230,7 +230,12 @@ impl<'a, 'extractor> Extractor<'extractor> for WordDocidsExtractorData<'a> {
|
||||
) -> Result<()> {
|
||||
for change in changes {
|
||||
let change = change?;
|
||||
WordDocidsExtractors::extract_document_change(context, self.tokenizer, change)?;
|
||||
WordDocidsExtractors::extract_document_change(
|
||||
context,
|
||||
&self.tokenizer,
|
||||
self.searchable_attributes.as_deref(),
|
||||
change,
|
||||
)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
@ -248,52 +253,42 @@ impl WordDocidsExtractors {
|
||||
where
|
||||
MSP: Fn() -> bool + Sync,
|
||||
{
|
||||
let index = indexing_context.index;
|
||||
let rtxn = index.read_txn()?;
|
||||
|
||||
let stop_words = index.stop_words(&rtxn)?;
|
||||
let allowed_separators = index.allowed_separators(&rtxn)?;
|
||||
// Warning: this is duplicated code from extract_word_pair_proximity_docids.rs
|
||||
let rtxn = indexing_context.index.read_txn()?;
|
||||
let stop_words = indexing_context.index.stop_words(&rtxn)?;
|
||||
let allowed_separators = indexing_context.index.allowed_separators(&rtxn)?;
|
||||
let allowed_separators: Option<Vec<_>> =
|
||||
allowed_separators.as_ref().map(|s| s.iter().map(String::as_str).collect());
|
||||
let dictionary = index.dictionary(&rtxn)?;
|
||||
let dictionary = indexing_context.index.dictionary(&rtxn)?;
|
||||
let dictionary: Option<Vec<_>> =
|
||||
dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
|
||||
let builder = tokenizer_builder(
|
||||
let mut builder = tokenizer_builder(
|
||||
stop_words.as_ref(),
|
||||
allowed_separators.as_deref(),
|
||||
dictionary.as_deref(),
|
||||
);
|
||||
let tokenizer = builder.into_tokenizer();
|
||||
|
||||
let attributes_to_extract = Self::attributes_to_extract(&rtxn, index)?;
|
||||
let attributes_to_skip = Self::attributes_to_skip(&rtxn, index)?;
|
||||
let tokenizer = builder.build();
|
||||
let localized_attributes_rules =
|
||||
index.localized_attributes_rules(&rtxn)?.unwrap_or_default();
|
||||
|
||||
indexing_context.index.localized_attributes_rules(&rtxn)?.unwrap_or_default();
|
||||
let document_tokenizer = DocumentTokenizer {
|
||||
tokenizer: &tokenizer,
|
||||
attribute_to_extract: attributes_to_extract.as_deref(),
|
||||
attribute_to_skip: attributes_to_skip.as_slice(),
|
||||
localized_attributes_rules: &localized_attributes_rules,
|
||||
max_positions_per_attributes: MAX_POSITION_PER_ATTRIBUTE,
|
||||
};
|
||||
|
||||
let extractor_data = WordDocidsExtractorData {
|
||||
tokenizer: document_tokenizer,
|
||||
max_memory_by_thread: indexing_context.grenad_parameters.max_memory_by_thread(),
|
||||
buckets: rayon::current_num_threads(),
|
||||
searchable_attributes: indexing_context.index.user_defined_searchable_fields(&rtxn)?,
|
||||
};
|
||||
let datastore = ThreadLocal::new();
|
||||
|
||||
{
|
||||
let span =
|
||||
tracing::trace_span!(target: "indexing::documents::extract", "docids_extraction");
|
||||
let _entered = span.enter();
|
||||
|
||||
let extractor = WordDocidsExtractorData {
|
||||
tokenizer: &document_tokenizer,
|
||||
grenad_parameters: indexing_context.grenad_parameters,
|
||||
buckets: rayon::current_num_threads(),
|
||||
};
|
||||
|
||||
extract(
|
||||
document_changes,
|
||||
&extractor,
|
||||
&extractor_data,
|
||||
indexing_context,
|
||||
extractor_allocs,
|
||||
&datastore,
|
||||
@ -312,6 +307,7 @@ impl WordDocidsExtractors {
|
||||
fn extract_document_change(
|
||||
context: &DocumentChangeContext<RefCell<Option<WordDocidsBalancedCaches>>>,
|
||||
document_tokenizer: &DocumentTokenizer,
|
||||
searchable_attributes: Option<&[&str]>,
|
||||
document_change: DocumentChange,
|
||||
) -> Result<()> {
|
||||
let index = &context.index;
|
||||
@ -345,7 +341,9 @@ impl WordDocidsExtractors {
|
||||
}
|
||||
DocumentChange::Update(inner) => {
|
||||
if !inner.has_changed_for_fields(
|
||||
document_tokenizer.attribute_to_extract,
|
||||
&mut |field_name: &str| {
|
||||
match_searchable_field(field_name, searchable_attributes)
|
||||
},
|
||||
&context.rtxn,
|
||||
context.index,
|
||||
context.db_fields_ids_map,
|
||||
@ -408,15 +406,4 @@ impl WordDocidsExtractors {
|
||||
let mut buffer = BumpVec::with_capacity_in(buffer_size, &context.doc_alloc);
|
||||
cached_sorter.flush_fid_word_count(&mut buffer)
|
||||
}
|
||||
|
||||
fn attributes_to_extract<'a>(
|
||||
rtxn: &'a RoTxn,
|
||||
index: &'a Index,
|
||||
) -> Result<Option<Vec<&'a str>>> {
|
||||
index.user_defined_searchable_fields(rtxn).map_err(Into::into)
|
||||
}
|
||||
|
||||
fn attributes_to_skip<'a>(_rtxn: &'a RoTxn, _index: &'a Index) -> Result<Vec<&'a str>> {
|
||||
Ok(Vec::new())
|
||||
}
|
||||
}
|
||||
|
@ -2,30 +2,114 @@ use std::cell::RefCell;
|
||||
use std::collections::VecDeque;
|
||||
use std::rc::Rc;
|
||||
|
||||
use heed::RoTxn;
|
||||
use bumpalo::Bump;
|
||||
|
||||
use super::tokenize_document::DocumentTokenizer;
|
||||
use super::SearchableExtractor;
|
||||
use super::match_searchable_field;
|
||||
use super::tokenize_document::{tokenizer_builder, DocumentTokenizer};
|
||||
use crate::proximity::{index_proximity, MAX_DISTANCE};
|
||||
use crate::update::new::document::Document;
|
||||
use crate::update::new::extract::cache::BalancedCaches;
|
||||
use crate::update::new::indexer::document_changes::DocumentChangeContext;
|
||||
use crate::update::new::indexer::document_changes::{
|
||||
extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext,
|
||||
};
|
||||
use crate::update::new::ref_cell_ext::RefCellExt as _;
|
||||
use crate::update::new::steps::IndexingStep;
|
||||
use crate::update::new::thread_local::{FullySend, ThreadLocal};
|
||||
use crate::update::new::DocumentChange;
|
||||
use crate::{FieldId, GlobalFieldsIdsMap, Index, Result};
|
||||
use crate::{FieldId, GlobalFieldsIdsMap, Result, MAX_POSITION_PER_ATTRIBUTE};
|
||||
|
||||
pub struct WordPairProximityDocidsExtractorData<'a> {
|
||||
tokenizer: DocumentTokenizer<'a>,
|
||||
searchable_attributes: Option<Vec<&'a str>>,
|
||||
max_memory_by_thread: Option<usize>,
|
||||
buckets: usize,
|
||||
}
|
||||
|
||||
impl<'a, 'extractor> Extractor<'extractor> for WordPairProximityDocidsExtractorData<'a> {
|
||||
type Data = RefCell<BalancedCaches<'extractor>>;
|
||||
|
||||
fn init_data(&self, extractor_alloc: &'extractor Bump) -> Result<Self::Data> {
|
||||
Ok(RefCell::new(BalancedCaches::new_in(
|
||||
self.buckets,
|
||||
self.max_memory_by_thread,
|
||||
extractor_alloc,
|
||||
)))
|
||||
}
|
||||
|
||||
fn process<'doc>(
|
||||
&self,
|
||||
changes: impl Iterator<Item = Result<DocumentChange<'doc>>>,
|
||||
context: &DocumentChangeContext<Self::Data>,
|
||||
) -> Result<()> {
|
||||
for change in changes {
|
||||
let change = change?;
|
||||
WordPairProximityDocidsExtractor::extract_document_change(
|
||||
context,
|
||||
&self.tokenizer,
|
||||
self.searchable_attributes.as_deref(),
|
||||
change,
|
||||
)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct WordPairProximityDocidsExtractor;
|
||||
|
||||
impl SearchableExtractor for WordPairProximityDocidsExtractor {
|
||||
fn attributes_to_extract<'a>(
|
||||
rtxn: &'a RoTxn,
|
||||
index: &'a Index,
|
||||
) -> Result<Option<Vec<&'a str>>> {
|
||||
index.user_defined_searchable_fields(rtxn).map_err(Into::into)
|
||||
}
|
||||
impl WordPairProximityDocidsExtractor {
|
||||
pub fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
|
||||
document_changes: &DC,
|
||||
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
|
||||
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
|
||||
step: IndexingStep,
|
||||
) -> Result<Vec<BalancedCaches<'extractor>>>
|
||||
where
|
||||
MSP: Fn() -> bool + Sync,
|
||||
{
|
||||
// Warning: this is duplicated code from extract_word_docids.rs
|
||||
let rtxn = indexing_context.index.read_txn()?;
|
||||
let stop_words = indexing_context.index.stop_words(&rtxn)?;
|
||||
let allowed_separators = indexing_context.index.allowed_separators(&rtxn)?;
|
||||
let allowed_separators: Option<Vec<_>> =
|
||||
allowed_separators.as_ref().map(|s| s.iter().map(String::as_str).collect());
|
||||
let dictionary = indexing_context.index.dictionary(&rtxn)?;
|
||||
let dictionary: Option<Vec<_>> =
|
||||
dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
|
||||
let mut builder = tokenizer_builder(
|
||||
stop_words.as_ref(),
|
||||
allowed_separators.as_deref(),
|
||||
dictionary.as_deref(),
|
||||
);
|
||||
let tokenizer = builder.build();
|
||||
let localized_attributes_rules =
|
||||
indexing_context.index.localized_attributes_rules(&rtxn)?.unwrap_or_default();
|
||||
let document_tokenizer = DocumentTokenizer {
|
||||
tokenizer: &tokenizer,
|
||||
localized_attributes_rules: &localized_attributes_rules,
|
||||
max_positions_per_attributes: MAX_POSITION_PER_ATTRIBUTE,
|
||||
};
|
||||
let extractor_data = WordPairProximityDocidsExtractorData {
|
||||
tokenizer: document_tokenizer,
|
||||
searchable_attributes: indexing_context.index.user_defined_searchable_fields(&rtxn)?,
|
||||
max_memory_by_thread: indexing_context.grenad_parameters.max_memory_by_thread(),
|
||||
buckets: rayon::current_num_threads(),
|
||||
};
|
||||
let datastore = ThreadLocal::new();
|
||||
{
|
||||
let span =
|
||||
tracing::trace_span!(target: "indexing::documents::extract", "docids_extraction");
|
||||
let _entered = span.enter();
|
||||
extract(
|
||||
document_changes,
|
||||
&extractor_data,
|
||||
indexing_context,
|
||||
extractor_allocs,
|
||||
&datastore,
|
||||
step,
|
||||
)?;
|
||||
}
|
||||
|
||||
fn attributes_to_skip<'a>(_rtxn: &'a RoTxn, _index: &'a Index) -> Result<Vec<&'a str>> {
|
||||
Ok(Vec::new())
|
||||
Ok(datastore.into_iter().map(RefCell::into_inner).collect())
|
||||
}
|
||||
|
||||
// This method is reimplemented to count the number of words in the document in each field
|
||||
@ -34,6 +118,7 @@ impl SearchableExtractor for WordPairProximityDocidsExtractor {
|
||||
fn extract_document_change(
|
||||
context: &DocumentChangeContext<RefCell<BalancedCaches>>,
|
||||
document_tokenizer: &DocumentTokenizer,
|
||||
searchable_attributes: Option<&[&str]>,
|
||||
document_change: DocumentChange,
|
||||
) -> Result<()> {
|
||||
let doc_alloc = &context.doc_alloc;
|
||||
@ -71,7 +156,9 @@ impl SearchableExtractor for WordPairProximityDocidsExtractor {
|
||||
}
|
||||
DocumentChange::Update(inner) => {
|
||||
if !inner.has_changed_for_fields(
|
||||
document_tokenizer.attribute_to_extract,
|
||||
&mut |field_name: &str| {
|
||||
match_searchable_field(field_name, searchable_attributes)
|
||||
},
|
||||
rtxn,
|
||||
index,
|
||||
context.db_fields_ids_map,
|
||||
|
@ -2,145 +2,28 @@ mod extract_word_docids;
|
||||
mod extract_word_pair_proximity_docids;
|
||||
mod tokenize_document;
|
||||
|
||||
use std::cell::RefCell;
|
||||
use std::marker::PhantomData;
|
||||
|
||||
use bumpalo::Bump;
|
||||
pub use extract_word_docids::{WordDocidsCaches, WordDocidsExtractors};
|
||||
pub use extract_word_pair_proximity_docids::WordPairProximityDocidsExtractor;
|
||||
use heed::RoTxn;
|
||||
use tokenize_document::{tokenizer_builder, DocumentTokenizer};
|
||||
|
||||
use super::cache::BalancedCaches;
|
||||
use super::DocidsExtractor;
|
||||
use crate::update::new::indexer::document_changes::{
|
||||
extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext,
|
||||
};
|
||||
use crate::update::new::steps::IndexingStep;
|
||||
use crate::update::new::thread_local::{FullySend, ThreadLocal};
|
||||
use crate::update::new::DocumentChange;
|
||||
use crate::update::GrenadParameters;
|
||||
use crate::{Index, Result, MAX_POSITION_PER_ATTRIBUTE};
|
||||
use crate::attribute_patterns::{match_field_legacy, PatternMatch};
|
||||
|
||||
pub struct SearchableExtractorData<'a, EX: SearchableExtractor> {
|
||||
tokenizer: &'a DocumentTokenizer<'a>,
|
||||
grenad_parameters: &'a GrenadParameters,
|
||||
buckets: usize,
|
||||
_ex: PhantomData<EX>,
|
||||
}
|
||||
pub fn match_searchable_field(
|
||||
field_name: &str,
|
||||
searchable_fields: Option<&[&str]>,
|
||||
) -> PatternMatch {
|
||||
let Some(searchable_fields) = searchable_fields else {
|
||||
// If no searchable fields are provided, consider all fields as searchable
|
||||
return PatternMatch::Match;
|
||||
};
|
||||
|
||||
impl<'a, 'extractor, EX: SearchableExtractor + Sync> Extractor<'extractor>
|
||||
for SearchableExtractorData<'a, EX>
|
||||
{
|
||||
type Data = RefCell<BalancedCaches<'extractor>>;
|
||||
|
||||
fn init_data(&self, extractor_alloc: &'extractor Bump) -> Result<Self::Data> {
|
||||
Ok(RefCell::new(BalancedCaches::new_in(
|
||||
self.buckets,
|
||||
self.grenad_parameters.max_memory_by_thread(),
|
||||
extractor_alloc,
|
||||
)))
|
||||
}
|
||||
|
||||
fn process<'doc>(
|
||||
&self,
|
||||
changes: impl Iterator<Item = Result<DocumentChange<'doc>>>,
|
||||
context: &DocumentChangeContext<Self::Data>,
|
||||
) -> Result<()> {
|
||||
for change in changes {
|
||||
let change = change?;
|
||||
EX::extract_document_change(context, self.tokenizer, change)?;
|
||||
let mut selection = PatternMatch::NoMatch;
|
||||
for pattern in searchable_fields {
|
||||
match match_field_legacy(pattern, field_name) {
|
||||
PatternMatch::Match => return PatternMatch::Match,
|
||||
PatternMatch::Parent => selection = PatternMatch::Parent,
|
||||
PatternMatch::NoMatch => (),
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub trait SearchableExtractor: Sized + Sync {
|
||||
fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
|
||||
document_changes: &DC,
|
||||
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
|
||||
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
|
||||
step: IndexingStep,
|
||||
) -> Result<Vec<BalancedCaches<'extractor>>>
|
||||
where
|
||||
MSP: Fn() -> bool + Sync,
|
||||
{
|
||||
let rtxn = indexing_context.index.read_txn()?;
|
||||
let stop_words = indexing_context.index.stop_words(&rtxn)?;
|
||||
let allowed_separators = indexing_context.index.allowed_separators(&rtxn)?;
|
||||
let allowed_separators: Option<Vec<_>> =
|
||||
allowed_separators.as_ref().map(|s| s.iter().map(String::as_str).collect());
|
||||
let dictionary = indexing_context.index.dictionary(&rtxn)?;
|
||||
let dictionary: Option<Vec<_>> =
|
||||
dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
|
||||
let mut builder = tokenizer_builder(
|
||||
stop_words.as_ref(),
|
||||
allowed_separators.as_deref(),
|
||||
dictionary.as_deref(),
|
||||
);
|
||||
let tokenizer = builder.build();
|
||||
|
||||
let attributes_to_extract = Self::attributes_to_extract(&rtxn, indexing_context.index)?;
|
||||
let attributes_to_skip = Self::attributes_to_skip(&rtxn, indexing_context.index)?;
|
||||
let localized_attributes_rules =
|
||||
indexing_context.index.localized_attributes_rules(&rtxn)?.unwrap_or_default();
|
||||
|
||||
let document_tokenizer = DocumentTokenizer {
|
||||
tokenizer: &tokenizer,
|
||||
attribute_to_extract: attributes_to_extract.as_deref(),
|
||||
attribute_to_skip: attributes_to_skip.as_slice(),
|
||||
localized_attributes_rules: &localized_attributes_rules,
|
||||
max_positions_per_attributes: MAX_POSITION_PER_ATTRIBUTE,
|
||||
};
|
||||
|
||||
let extractor_data: SearchableExtractorData<Self> = SearchableExtractorData {
|
||||
tokenizer: &document_tokenizer,
|
||||
grenad_parameters: indexing_context.grenad_parameters,
|
||||
buckets: rayon::current_num_threads(),
|
||||
_ex: PhantomData,
|
||||
};
|
||||
|
||||
let datastore = ThreadLocal::new();
|
||||
|
||||
{
|
||||
let span =
|
||||
tracing::trace_span!(target: "indexing::documents::extract", "docids_extraction");
|
||||
let _entered = span.enter();
|
||||
extract(
|
||||
document_changes,
|
||||
&extractor_data,
|
||||
indexing_context,
|
||||
extractor_allocs,
|
||||
&datastore,
|
||||
step,
|
||||
)?;
|
||||
}
|
||||
|
||||
Ok(datastore.into_iter().map(RefCell::into_inner).collect())
|
||||
}
|
||||
|
||||
fn extract_document_change(
|
||||
context: &DocumentChangeContext<RefCell<BalancedCaches>>,
|
||||
document_tokenizer: &DocumentTokenizer,
|
||||
document_change: DocumentChange,
|
||||
) -> Result<()>;
|
||||
|
||||
fn attributes_to_extract<'a>(rtxn: &'a RoTxn, index: &'a Index)
|
||||
-> Result<Option<Vec<&'a str>>>;
|
||||
|
||||
fn attributes_to_skip<'a>(rtxn: &'a RoTxn, index: &'a Index) -> Result<Vec<&'a str>>;
|
||||
}
|
||||
|
||||
impl<T: SearchableExtractor> DocidsExtractor for T {
|
||||
fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
|
||||
document_changes: &DC,
|
||||
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
|
||||
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
|
||||
step: IndexingStep,
|
||||
) -> Result<Vec<BalancedCaches<'extractor>>>
|
||||
where
|
||||
MSP: Fn() -> bool + Sync,
|
||||
{
|
||||
Self::run_extraction(document_changes, indexing_context, extractor_allocs, step)
|
||||
}
|
||||
selection
|
||||
}
|
||||
|
@ -3,9 +3,10 @@ use std::collections::HashMap;
|
||||
use charabia::{SeparatorKind, Token, TokenKind, Tokenizer, TokenizerBuilder};
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::attribute_patterns::PatternMatch;
|
||||
use crate::update::new::document::Document;
|
||||
use crate::update::new::extract::perm_json_p::{
|
||||
seek_leaf_values_in_array, seek_leaf_values_in_object, select_field, Depth, Selection,
|
||||
seek_leaf_values_in_array, seek_leaf_values_in_object, Depth,
|
||||
};
|
||||
use crate::{
|
||||
FieldId, GlobalFieldsIdsMap, InternalError, LocalizedAttributesRule, Result, UserError,
|
||||
@ -17,8 +18,6 @@ const MAX_DISTANCE: u32 = 8;
|
||||
|
||||
pub struct DocumentTokenizer<'a> {
|
||||
pub tokenizer: &'a Tokenizer<'a>,
|
||||
pub attribute_to_extract: Option<&'a [&'a str]>,
|
||||
pub attribute_to_skip: &'a [&'a str],
|
||||
pub localized_attributes_rules: &'a [LocalizedAttributesRule],
|
||||
pub max_positions_per_attributes: u32,
|
||||
}
|
||||
@ -31,87 +30,94 @@ impl<'a> DocumentTokenizer<'a> {
|
||||
token_fn: &mut impl FnMut(&str, FieldId, u16, &str) -> Result<()>,
|
||||
) -> Result<()> {
|
||||
let mut field_position = HashMap::new();
|
||||
let mut tokenize_field = |field_name: &str, _depth, value: &Value| {
|
||||
let Some((field_id, meta)) = field_id_map.id_with_metadata_or_insert(field_name) else {
|
||||
return Err(UserError::AttributeLimitReached.into());
|
||||
};
|
||||
|
||||
if meta.is_searchable() {
|
||||
self.tokenize_field(field_id, field_name, value, token_fn, &mut field_position)?;
|
||||
}
|
||||
|
||||
// todo: should be a match on the field_name using `match_field_legacy` function,
|
||||
// but for legacy reasons we iterate over all the fields to fill the field_id_map.
|
||||
Ok(PatternMatch::Match)
|
||||
};
|
||||
|
||||
for entry in document.iter_top_level_fields() {
|
||||
let (field_name, value) = entry?;
|
||||
|
||||
let mut tokenize_field = |field_name: &str, _depth, value: &Value| {
|
||||
let Some(field_id) = field_id_map.id_or_insert(field_name) else {
|
||||
return Err(UserError::AttributeLimitReached.into());
|
||||
};
|
||||
|
||||
if select_field(field_name, self.attribute_to_extract, self.attribute_to_skip)
|
||||
!= Selection::Select
|
||||
{
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let position = field_position
|
||||
.entry(field_id)
|
||||
.and_modify(|counter| *counter += MAX_DISTANCE)
|
||||
.or_insert(0);
|
||||
if *position >= self.max_positions_per_attributes {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let text;
|
||||
let tokens = match value {
|
||||
Value::Number(n) => {
|
||||
text = n.to_string();
|
||||
self.tokenizer.tokenize(text.as_str())
|
||||
}
|
||||
Value::Bool(b) => {
|
||||
text = b.to_string();
|
||||
self.tokenizer.tokenize(text.as_str())
|
||||
}
|
||||
Value::String(text) => {
|
||||
let locales = self
|
||||
.localized_attributes_rules
|
||||
.iter()
|
||||
.find(|rule| rule.match_str(field_name))
|
||||
.map(|rule| rule.locales());
|
||||
self.tokenizer.tokenize_with_allow_list(text.as_str(), locales)
|
||||
}
|
||||
_ => return Ok(()),
|
||||
};
|
||||
|
||||
// create an iterator of token with their positions.
|
||||
let tokens = process_tokens(*position, tokens)
|
||||
.take_while(|(p, _)| *p < self.max_positions_per_attributes);
|
||||
|
||||
for (index, token) in tokens {
|
||||
// keep a word only if it is not empty and fit in a LMDB key.
|
||||
let token = token.lemma().trim();
|
||||
if !token.is_empty() && token.len() <= MAX_WORD_LENGTH {
|
||||
*position = index;
|
||||
if let Ok(position) = (*position).try_into() {
|
||||
token_fn(field_name, field_id, position, token)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
};
|
||||
|
||||
// parse json.
|
||||
match serde_json::to_value(value).map_err(InternalError::SerdeJson)? {
|
||||
Value::Object(object) => seek_leaf_values_in_object(
|
||||
&object,
|
||||
None,
|
||||
&[],
|
||||
field_name,
|
||||
Depth::OnBaseKey,
|
||||
&mut tokenize_field,
|
||||
)?,
|
||||
Value::Array(array) => seek_leaf_values_in_array(
|
||||
&array,
|
||||
None,
|
||||
&[],
|
||||
field_name,
|
||||
Depth::OnBaseKey,
|
||||
&mut tokenize_field,
|
||||
)?,
|
||||
value => tokenize_field(field_name, Depth::OnBaseKey, &value)?,
|
||||
value => {
|
||||
tokenize_field(field_name, Depth::OnBaseKey, &value)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn tokenize_field(
|
||||
&self,
|
||||
field_id: FieldId,
|
||||
field_name: &str,
|
||||
value: &Value,
|
||||
token_fn: &mut impl FnMut(&str, u16, u16, &str) -> std::result::Result<(), crate::Error>,
|
||||
field_position: &mut HashMap<u16, u32>,
|
||||
) -> Result<()> {
|
||||
let position = field_position
|
||||
.entry(field_id)
|
||||
.and_modify(|counter| *counter += MAX_DISTANCE)
|
||||
.or_insert(0);
|
||||
if *position >= self.max_positions_per_attributes {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let text;
|
||||
let tokens = match value {
|
||||
Value::Number(n) => {
|
||||
text = n.to_string();
|
||||
self.tokenizer.tokenize(text.as_str())
|
||||
}
|
||||
Value::Bool(b) => {
|
||||
text = b.to_string();
|
||||
self.tokenizer.tokenize(text.as_str())
|
||||
}
|
||||
Value::String(text) => {
|
||||
let locales = self
|
||||
.localized_attributes_rules
|
||||
.iter()
|
||||
.find(|rule| rule.match_str(field_name) == PatternMatch::Match)
|
||||
.map(|rule| rule.locales());
|
||||
self.tokenizer.tokenize_with_allow_list(text.as_str(), locales)
|
||||
}
|
||||
_ => return Ok(()),
|
||||
};
|
||||
|
||||
// create an iterator of token with their positions.
|
||||
let tokens = process_tokens(*position, tokens)
|
||||
.take_while(|(p, _)| *p < self.max_positions_per_attributes);
|
||||
|
||||
for (index, token) in tokens {
|
||||
// keep a word only if it is not empty and fit in a LMDB key.
|
||||
let token = token.lemma().trim();
|
||||
if !token.is_empty() && token.len() <= MAX_WORD_LENGTH {
|
||||
*position = index;
|
||||
if let Ok(position) = (*position).try_into() {
|
||||
token_fn(field_name, field_id, position, token)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -215,15 +221,20 @@ mod test {
|
||||
let mut tb = TokenizerBuilder::default();
|
||||
let document_tokenizer = DocumentTokenizer {
|
||||
tokenizer: &tb.build(),
|
||||
attribute_to_extract: None,
|
||||
attribute_to_skip: &["not-me", "me-nether.nope"],
|
||||
localized_attributes_rules: &[],
|
||||
max_positions_per_attributes: 1000,
|
||||
};
|
||||
|
||||
let fields_ids_map = FieldIdMapWithMetadata::new(
|
||||
fields_ids_map,
|
||||
MetadataBuilder::new(Default::default(), Default::default(), Default::default(), None),
|
||||
MetadataBuilder::new(
|
||||
Default::default(),
|
||||
Default::default(),
|
||||
Default::default(),
|
||||
None,
|
||||
None,
|
||||
Default::default(),
|
||||
),
|
||||
);
|
||||
|
||||
let fields_ids_map_lock = std::sync::RwLock::new(fields_ids_map);
|
||||
@ -265,6 +276,10 @@ mod test {
|
||||
2,
|
||||
16,
|
||||
]: "catto",
|
||||
[
|
||||
3,
|
||||
0,
|
||||
]: "unsearchable",
|
||||
[
|
||||
5,
|
||||
0,
|
||||
@ -277,6 +292,10 @@ mod test {
|
||||
8,
|
||||
0,
|
||||
]: "23",
|
||||
[
|
||||
9,
|
||||
0,
|
||||
]: "unsearchable",
|
||||
}
|
||||
"###);
|
||||
}
|
||||
|
@ -9,12 +9,14 @@ use heed::{BytesDecode, BytesEncode, RoTxn, RwTxn};
|
||||
|
||||
use super::fst_merger_builder::FstMergerBuilder;
|
||||
use super::KvReaderDelAdd;
|
||||
use crate::attribute_patterns::PatternMatch;
|
||||
use crate::heed_codec::facet::FacetGroupKey;
|
||||
use crate::update::del_add::{DelAdd, KvWriterDelAdd};
|
||||
use crate::update::{create_sorter, MergeDeladdBtreesetString};
|
||||
use crate::{
|
||||
BEU16StrCodec, FieldId, GlobalFieldsIdsMap, Index, LocalizedAttributesRule, Result,
|
||||
MAX_FACET_VALUE_LENGTH,
|
||||
BEU16StrCodec, FieldId, FieldIdMapMissingEntry, FilterableAttributesFeatures,
|
||||
FilterableAttributesRule, GlobalFieldsIdsMap, Index, InternalError, LocalizedAttributesRule,
|
||||
Result, MAX_FACET_VALUE_LENGTH,
|
||||
};
|
||||
|
||||
pub struct FacetSearchBuilder<'indexer> {
|
||||
@ -22,6 +24,7 @@ pub struct FacetSearchBuilder<'indexer> {
|
||||
normalized_facet_string_docids_sorter: Sorter<MergeDeladdBtreesetString>,
|
||||
global_fields_ids_map: GlobalFieldsIdsMap<'indexer>,
|
||||
localized_attributes_rules: Vec<LocalizedAttributesRule>,
|
||||
filterable_attributes_rules: Vec<FilterableAttributesRule>,
|
||||
// Buffered data below
|
||||
buffer: Vec<u8>,
|
||||
localized_field_ids: HashMap<FieldId, Option<Vec<Language>>>,
|
||||
@ -31,6 +34,7 @@ impl<'indexer> FacetSearchBuilder<'indexer> {
|
||||
pub fn new(
|
||||
global_fields_ids_map: GlobalFieldsIdsMap<'indexer>,
|
||||
localized_attributes_rules: Vec<LocalizedAttributesRule>,
|
||||
filterable_attributes_rules: Vec<FilterableAttributesRule>,
|
||||
) -> Self {
|
||||
let registered_facets = HashMap::new();
|
||||
let normalized_facet_string_docids_sorter = create_sorter(
|
||||
@ -49,6 +53,7 @@ impl<'indexer> FacetSearchBuilder<'indexer> {
|
||||
buffer: Vec::new(),
|
||||
global_fields_ids_map,
|
||||
localized_attributes_rules,
|
||||
filterable_attributes_rules,
|
||||
localized_field_ids: HashMap::new(),
|
||||
}
|
||||
}
|
||||
@ -60,6 +65,13 @@ impl<'indexer> FacetSearchBuilder<'indexer> {
|
||||
) -> Result<()> {
|
||||
let FacetGroupKey { field_id, level: _level, left_bound } = facet_key;
|
||||
|
||||
let filterable_attributes_features = self.filterable_attributes_features(field_id)?;
|
||||
|
||||
// if facet search is disabled, we don't need to register the facet
|
||||
if !filterable_attributes_features.is_facet_searchable() {
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
if deladd == DelAdd::Addition {
|
||||
self.registered_facets.entry(field_id).and_modify(|count| *count += 1).or_insert(1);
|
||||
}
|
||||
@ -83,6 +95,24 @@ impl<'indexer> FacetSearchBuilder<'indexer> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn filterable_attributes_features(
|
||||
&mut self,
|
||||
field_id: u16,
|
||||
) -> Result<FilterableAttributesFeatures> {
|
||||
let Some(filterable_attributes_features) =
|
||||
self.global_fields_ids_map.metadata(field_id).map(|metadata| {
|
||||
metadata.filterable_attributes_features(&self.filterable_attributes_rules)
|
||||
})
|
||||
else {
|
||||
return Err(InternalError::FieldIdMapMissingEntry(FieldIdMapMissingEntry::FieldId {
|
||||
field_id,
|
||||
process: "facet_search_builder::register_from_key",
|
||||
})
|
||||
.into());
|
||||
};
|
||||
Ok(filterable_attributes_features)
|
||||
}
|
||||
|
||||
fn locales(&mut self, field_id: FieldId) -> Option<&[Language]> {
|
||||
if let Entry::Vacant(e) = self.localized_field_ids.entry(field_id) {
|
||||
let Some(field_name) = self.global_fields_ids_map.name(field_id) else {
|
||||
@ -92,7 +122,7 @@ impl<'indexer> FacetSearchBuilder<'indexer> {
|
||||
let locales = self
|
||||
.localized_attributes_rules
|
||||
.iter()
|
||||
.find(|rule| rule.match_str(field_name))
|
||||
.find(|rule| rule.match_str(field_name) == PatternMatch::Match)
|
||||
.map(|rule| rule.locales.clone());
|
||||
|
||||
e.insert(locales);
|
||||
|
@ -199,7 +199,7 @@ where
|
||||
let span = tracing::trace_span!(target: "indexing::documents::extract", "word_pair_proximity_docids");
|
||||
let _entered = span.enter();
|
||||
|
||||
<WordPairProximityDocidsExtractor as DocidsExtractor>::run_extraction(
|
||||
WordPairProximityDocidsExtractor::run_extraction(
|
||||
document_changes,
|
||||
indexing_context,
|
||||
extractor_allocs,
|
||||
|
@ -25,7 +25,7 @@ use crate::{GlobalFieldsIdsMap, Index, Result};
|
||||
pub(super) fn post_process<MSP>(
|
||||
indexing_context: IndexingContext<MSP>,
|
||||
wtxn: &mut RwTxn<'_>,
|
||||
global_fields_ids_map: GlobalFieldsIdsMap<'_>,
|
||||
mut global_fields_ids_map: GlobalFieldsIdsMap<'_>,
|
||||
facet_field_ids_delta: FacetFieldIdsDelta,
|
||||
) -> Result<()>
|
||||
where
|
||||
@ -33,10 +33,8 @@ where
|
||||
{
|
||||
let index = indexing_context.index;
|
||||
indexing_context.progress.update_progress(IndexingStep::PostProcessingFacets);
|
||||
if index.facet_search(wtxn)? {
|
||||
compute_facet_search_database(index, wtxn, global_fields_ids_map)?;
|
||||
}
|
||||
compute_facet_level_database(index, wtxn, facet_field_ids_delta)?;
|
||||
compute_facet_level_database(index, wtxn, facet_field_ids_delta, &mut global_fields_ids_map)?;
|
||||
compute_facet_search_database(index, wtxn, global_fields_ids_map)?;
|
||||
indexing_context.progress.update_progress(IndexingStep::PostProcessingWords);
|
||||
if let Some(prefix_delta) = compute_word_fst(index, wtxn)? {
|
||||
compute_prefix_database(index, wtxn, prefix_delta, indexing_context.grenad_parameters)?;
|
||||
@ -116,10 +114,18 @@ fn compute_facet_search_database(
|
||||
global_fields_ids_map: GlobalFieldsIdsMap,
|
||||
) -> Result<()> {
|
||||
let rtxn = index.read_txn()?;
|
||||
|
||||
// if the facet search is not enabled, we can skip the rest of the function
|
||||
if !index.facet_search(wtxn)? {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let localized_attributes_rules = index.localized_attributes_rules(&rtxn)?;
|
||||
let filterable_attributes_rules = index.filterable_attributes_rules(&rtxn)?;
|
||||
let mut facet_search_builder = FacetSearchBuilder::new(
|
||||
global_fields_ids_map,
|
||||
localized_attributes_rules.unwrap_or_default(),
|
||||
filterable_attributes_rules,
|
||||
);
|
||||
|
||||
let previous_facet_id_string_docids = index
|
||||
@ -164,8 +170,19 @@ fn compute_facet_level_database(
|
||||
index: &Index,
|
||||
wtxn: &mut RwTxn,
|
||||
mut facet_field_ids_delta: FacetFieldIdsDelta,
|
||||
global_fields_ids_map: &mut GlobalFieldsIdsMap,
|
||||
) -> Result<()> {
|
||||
let rtxn = index.read_txn()?;
|
||||
let filterable_attributes_rules = index.filterable_attributes_rules(&rtxn)?;
|
||||
for (fid, delta) in facet_field_ids_delta.consume_facet_string_delta() {
|
||||
// skip field ids that should not be facet leveled
|
||||
let Some(metadata) = global_fields_ids_map.metadata(fid) else {
|
||||
continue;
|
||||
};
|
||||
if !metadata.require_facet_level_database(&filterable_attributes_rules) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let span = tracing::trace_span!(target: "indexing::facet_field_ids", "string");
|
||||
let _entered = span.enter();
|
||||
match delta {
|
||||
|
@ -137,7 +137,6 @@ pub(super) fn update_index(
|
||||
index.put_primary_key(wtxn, new_primary_key.name())?;
|
||||
}
|
||||
let mut inner_index_settings = InnerIndexSettings::from_index(index, wtxn, Some(embedders))?;
|
||||
inner_index_settings.recompute_facets(wtxn, index)?;
|
||||
inner_index_settings.recompute_searchables(wtxn, index)?;
|
||||
index.put_field_distribution(wtxn, &field_distribution)?;
|
||||
index.put_documents_ids(wtxn, &document_ids)?;
|
||||
|
@ -6,17 +6,20 @@ use std::sync::Arc;
|
||||
|
||||
use charabia::{Normalize, Tokenizer, TokenizerBuilder};
|
||||
use deserr::{DeserializeError, Deserr};
|
||||
use itertools::{EitherOrBoth, Itertools};
|
||||
use itertools::{merge_join_by, EitherOrBoth, Itertools};
|
||||
use roaring::RoaringBitmap;
|
||||
use serde::{Deserialize, Deserializer, Serialize, Serializer};
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use super::del_add::DelAddOperation;
|
||||
use super::del_add::{DelAdd, DelAddOperation};
|
||||
use super::index_documents::{IndexDocumentsConfig, Transform};
|
||||
use super::IndexerConfig;
|
||||
use crate::constants::{RESERVED_GEO_FIELD_NAME, RESERVED_VECTORS_FIELD_NAME};
|
||||
use crate::attribute_patterns::PatternMatch;
|
||||
use crate::constants::RESERVED_GEO_FIELD_NAME;
|
||||
use crate::criterion::Criterion;
|
||||
use crate::error::UserError;
|
||||
use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
|
||||
use crate::filterable_attributes_rules::match_faceted_field;
|
||||
use crate::index::{
|
||||
IndexEmbeddingConfig, PrefixSearch, DEFAULT_MIN_WORD_LEN_ONE_TYPO,
|
||||
DEFAULT_MIN_WORD_LEN_TWO_TYPOS,
|
||||
@ -31,7 +34,7 @@ use crate::vector::settings::{
|
||||
SubEmbeddingSettings, WriteBackToDocuments,
|
||||
};
|
||||
use crate::vector::{Embedder, EmbeddingConfig, EmbeddingConfigs};
|
||||
use crate::{FieldId, FieldsIdsMap, Index, LocalizedAttributesRule, LocalizedFieldIds, Result};
|
||||
use crate::{FieldId, FilterableAttributesRule, Index, LocalizedAttributesRule, Result};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Copy)]
|
||||
pub enum Setting<T> {
|
||||
@ -155,7 +158,7 @@ pub struct Settings<'a, 't, 'i> {
|
||||
|
||||
searchable_fields: Setting<Vec<String>>,
|
||||
displayed_fields: Setting<Vec<String>>,
|
||||
filterable_fields: Setting<HashSet<String>>,
|
||||
filterable_fields: Setting<Vec<FilterableAttributesRule>>,
|
||||
sortable_fields: Setting<HashSet<String>>,
|
||||
criteria: Setting<Vec<Criterion>>,
|
||||
stop_words: Setting<BTreeSet<String>>,
|
||||
@ -241,8 +244,8 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
||||
self.filterable_fields = Setting::Reset;
|
||||
}
|
||||
|
||||
pub fn set_filterable_fields(&mut self, names: HashSet<String>) {
|
||||
self.filterable_fields = Setting::Set(names);
|
||||
pub fn set_filterable_fields(&mut self, rules: Vec<FilterableAttributesRule>) {
|
||||
self.filterable_fields = Setting::Set(rules);
|
||||
}
|
||||
|
||||
pub fn set_sortable_fields(&mut self, names: HashSet<String>) {
|
||||
@ -516,7 +519,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
||||
}
|
||||
|
||||
/// Updates the index's searchable attributes.
|
||||
fn update_searchable(&mut self) -> Result<bool> {
|
||||
fn update_user_defined_searchable_attributes(&mut self) -> Result<bool> {
|
||||
match self.searchable_fields {
|
||||
Setting::Set(ref fields) => {
|
||||
// Check to see if the searchable fields changed before doing anything else
|
||||
@ -529,26 +532,10 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
||||
return Ok(false);
|
||||
}
|
||||
|
||||
// Since we're updating the settings we can only add new fields at the end of the field id map
|
||||
let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
|
||||
// fields are deduplicated, only the first occurrence is taken into account
|
||||
let names = fields.iter().unique().map(String::as_str).collect::<Vec<_>>();
|
||||
|
||||
// Add all the searchable attributes to the field map, and then add the
|
||||
// remaining fields from the old field map to the new one
|
||||
for name in names.iter() {
|
||||
// The fields ids map won't change the field id of already present elements thus only the
|
||||
// new fields will be inserted.
|
||||
fields_ids_map.insert(name).ok_or(UserError::AttributeLimitReached)?;
|
||||
}
|
||||
|
||||
self.index.put_all_searchable_fields_from_fields_ids_map(
|
||||
self.wtxn,
|
||||
&names,
|
||||
&fields_ids_map.nested_ids(RESERVED_VECTORS_FIELD_NAME),
|
||||
&fields_ids_map,
|
||||
)?;
|
||||
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
|
||||
self.index.put_user_defined_searchable_fields(self.wtxn, &names)?;
|
||||
Ok(true)
|
||||
}
|
||||
Setting::Reset => Ok(self.index.delete_all_searchable_fields(self.wtxn)?),
|
||||
@ -760,14 +747,10 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
||||
fn update_filterable(&mut self) -> Result<()> {
|
||||
match self.filterable_fields {
|
||||
Setting::Set(ref fields) => {
|
||||
let mut new_facets = HashSet::new();
|
||||
for name in fields {
|
||||
new_facets.insert(name.clone());
|
||||
}
|
||||
self.index.put_filterable_fields(self.wtxn, &new_facets)?;
|
||||
self.index.put_filterable_attributes_rules(self.wtxn, fields)?;
|
||||
}
|
||||
Setting::Reset => {
|
||||
self.index.delete_filterable_fields(self.wtxn)?;
|
||||
self.index.delete_filterable_attributes_rules(self.wtxn)?;
|
||||
}
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
@ -1257,7 +1240,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
||||
self.update_separator_tokens()?;
|
||||
self.update_dictionary()?;
|
||||
self.update_synonyms()?;
|
||||
self.update_searchable()?;
|
||||
self.update_user_defined_searchable_attributes()?;
|
||||
self.update_exact_attributes()?;
|
||||
self.update_proximity_precision()?;
|
||||
self.update_prefix_search()?;
|
||||
@ -1267,7 +1250,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
||||
let embedding_config_updates = self.update_embedding_configs()?;
|
||||
|
||||
let mut new_inner_settings = InnerIndexSettings::from_index(self.index, self.wtxn, None)?;
|
||||
new_inner_settings.recompute_facets(self.wtxn, self.index)?;
|
||||
new_inner_settings.recompute_searchables(self.wtxn, self.index)?;
|
||||
|
||||
let primary_key_id = self
|
||||
.index
|
||||
@ -1319,8 +1302,8 @@ impl InnerIndexSettingsDiff {
|
||||
settings_update_only: bool,
|
||||
) -> Self {
|
||||
let only_additional_fields = match (
|
||||
&old_settings.user_defined_searchable_fields,
|
||||
&new_settings.user_defined_searchable_fields,
|
||||
&old_settings.user_defined_searchable_attributes,
|
||||
&new_settings.user_defined_searchable_attributes,
|
||||
) {
|
||||
(None, None) | (Some(_), None) | (None, Some(_)) => None, // None means *
|
||||
(Some(old), Some(new)) => {
|
||||
@ -1342,14 +1325,14 @@ impl InnerIndexSettingsDiff {
|
||||
|| old_settings.dictionary != new_settings.dictionary
|
||||
|| old_settings.proximity_precision != new_settings.proximity_precision
|
||||
|| old_settings.prefix_search != new_settings.prefix_search
|
||||
|| old_settings.localized_searchable_fields_ids
|
||||
!= new_settings.localized_searchable_fields_ids
|
||||
|| old_settings.localized_attributes_rules
|
||||
!= new_settings.localized_attributes_rules
|
||||
};
|
||||
|
||||
let cache_exact_attributes = old_settings.exact_attributes != new_settings.exact_attributes;
|
||||
|
||||
let cache_user_defined_searchables = old_settings.user_defined_searchable_fields
|
||||
!= new_settings.user_defined_searchable_fields;
|
||||
let cache_user_defined_searchables = old_settings.user_defined_searchable_attributes
|
||||
!= new_settings.user_defined_searchable_attributes;
|
||||
|
||||
// if the user-defined searchables changed, then we need to reindex prompts.
|
||||
if cache_user_defined_searchables {
|
||||
@ -1432,30 +1415,70 @@ impl InnerIndexSettingsDiff {
|
||||
}
|
||||
}
|
||||
|
||||
/// List the faceted fields from the inner fid map.
|
||||
/// This is used to list the faceted fields when we are reindexing,
|
||||
/// but it can't be used in document addition because the field id map must be exhaustive.
|
||||
pub fn list_faceted_fields_from_fid_map(&self, del_add: DelAdd) -> BTreeSet<FieldId> {
|
||||
let settings = match del_add {
|
||||
DelAdd::Deletion => &self.old,
|
||||
DelAdd::Addition => &self.new,
|
||||
};
|
||||
|
||||
settings
|
||||
.fields_ids_map
|
||||
.iter_id_metadata()
|
||||
.filter(|(_, metadata)| metadata.is_faceted(&settings.filterable_attributes_rules))
|
||||
.map(|(id, _)| id)
|
||||
.collect()
|
||||
}
|
||||
|
||||
pub fn facet_fids_changed(&self) -> bool {
|
||||
let existing_fields = &self.new.existing_fields;
|
||||
if existing_fields.iter().any(|field| field.contains('.')) {
|
||||
return true;
|
||||
for eob in merge_join_by(
|
||||
self.old.fields_ids_map.iter().filter(|(_, _, metadata)| {
|
||||
metadata.is_faceted(&self.old.filterable_attributes_rules)
|
||||
}),
|
||||
self.new.fields_ids_map.iter().filter(|(_, _, metadata)| {
|
||||
metadata.is_faceted(&self.new.filterable_attributes_rules)
|
||||
}),
|
||||
|(old_fid, _, _), (new_fid, _, _)| old_fid.cmp(new_fid),
|
||||
) {
|
||||
match eob {
|
||||
// If there is a difference, we need to reindex facet databases.
|
||||
EitherOrBoth::Left(_) | EitherOrBoth::Right(_) => return true,
|
||||
// If the field is faceted in both old and new settings, we check the facet-searchable and facet level database.
|
||||
EitherOrBoth::Both((_, _, old_metadata), (_, _, new_metadata)) => {
|
||||
// Check if the field is facet-searchable in the old and new settings.
|
||||
// If there is a difference, we need to reindex facet-search database.
|
||||
let old_filterable_features = old_metadata
|
||||
.filterable_attributes_features(&self.old.filterable_attributes_rules);
|
||||
let new_filterable_features = new_metadata
|
||||
.filterable_attributes_features(&self.new.filterable_attributes_rules);
|
||||
let is_old_facet_searchable =
|
||||
old_filterable_features.is_facet_searchable() && self.old.facet_search;
|
||||
let is_new_facet_searchable =
|
||||
new_filterable_features.is_facet_searchable() && self.new.facet_search;
|
||||
if is_old_facet_searchable != is_new_facet_searchable {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check if the field needs a facet level database in the old and new settings.
|
||||
// If there is a difference, we need to reindex facet level databases.
|
||||
let old_facet_level_database = old_metadata
|
||||
.require_facet_level_database(&self.old.filterable_attributes_rules);
|
||||
let new_facet_level_database = new_metadata
|
||||
.require_facet_level_database(&self.new.filterable_attributes_rules);
|
||||
if old_facet_level_database != new_facet_level_database {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let old_faceted_fields = &self.old.user_defined_faceted_fields;
|
||||
if old_faceted_fields.iter().any(|field| field.contains('.')) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// If there is new faceted fields we indicate that we must reindex as we must
|
||||
// index new fields as facets. It means that the distinct attribute,
|
||||
// an Asc/Desc criterion or a filtered attribute as be added or removed.
|
||||
let new_faceted_fields = &self.new.user_defined_faceted_fields;
|
||||
if new_faceted_fields.iter().any(|field| field.contains('.')) {
|
||||
return true;
|
||||
}
|
||||
|
||||
(existing_fields - old_faceted_fields) != (existing_fields - new_faceted_fields)
|
||||
false
|
||||
}
|
||||
|
||||
pub fn global_facet_settings_changed(&self) -> bool {
|
||||
self.old.localized_faceted_fields_ids != self.new.localized_faceted_fields_ids
|
||||
self.old.localized_attributes_rules != self.new.localized_attributes_rules
|
||||
|| self.old.facet_search != self.new.facet_search
|
||||
}
|
||||
|
||||
@ -1475,10 +1498,6 @@ impl InnerIndexSettingsDiff {
|
||||
self.old.geo_fields_ids != self.new.geo_fields_ids
|
||||
|| (!self.settings_update_only && self.new.geo_fields_ids.is_some())
|
||||
}
|
||||
|
||||
pub fn modified_faceted_fields(&self) -> HashSet<String> {
|
||||
&self.old.user_defined_faceted_fields ^ &self.new.user_defined_faceted_fields
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
@ -1486,20 +1505,17 @@ pub(crate) struct InnerIndexSettings {
|
||||
pub stop_words: Option<fst::Set<Vec<u8>>>,
|
||||
pub allowed_separators: Option<BTreeSet<String>>,
|
||||
pub dictionary: Option<BTreeSet<String>>,
|
||||
pub fields_ids_map: FieldsIdsMap,
|
||||
pub user_defined_faceted_fields: HashSet<String>,
|
||||
pub user_defined_searchable_fields: Option<Vec<String>>,
|
||||
pub faceted_fields_ids: HashSet<FieldId>,
|
||||
pub searchable_fields_ids: Vec<FieldId>,
|
||||
pub fields_ids_map: FieldIdMapWithMetadata,
|
||||
pub localized_attributes_rules: Vec<LocalizedAttributesRule>,
|
||||
pub filterable_attributes_rules: Vec<FilterableAttributesRule>,
|
||||
pub asc_desc_fields: HashSet<String>,
|
||||
pub distinct_field: Option<String>,
|
||||
pub user_defined_searchable_attributes: Option<Vec<String>>,
|
||||
pub sortable_fields: HashSet<String>,
|
||||
pub exact_attributes: HashSet<FieldId>,
|
||||
pub proximity_precision: ProximityPrecision,
|
||||
pub embedding_configs: EmbeddingConfigs,
|
||||
pub existing_fields: HashSet<String>,
|
||||
pub geo_fields_ids: Option<(FieldId, FieldId)>,
|
||||
pub non_searchable_fields_ids: Vec<FieldId>,
|
||||
pub non_faceted_fields_ids: Vec<FieldId>,
|
||||
pub localized_searchable_fields_ids: LocalizedFieldIds,
|
||||
pub localized_faceted_fields_ids: LocalizedFieldIds,
|
||||
pub prefix_search: PrefixSearch,
|
||||
pub facet_search: bool,
|
||||
}
|
||||
@ -1515,12 +1531,6 @@ impl InnerIndexSettings {
|
||||
let allowed_separators = index.allowed_separators(rtxn)?;
|
||||
let dictionary = index.dictionary(rtxn)?;
|
||||
let mut fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||
let user_defined_searchable_fields = index.user_defined_searchable_fields(rtxn)?;
|
||||
let user_defined_searchable_fields =
|
||||
user_defined_searchable_fields.map(|sf| sf.into_iter().map(String::from).collect());
|
||||
let user_defined_faceted_fields = index.user_defined_faceted_fields(rtxn)?;
|
||||
let mut searchable_fields_ids = index.searchable_fields_ids(rtxn)?;
|
||||
let mut faceted_fields_ids = index.faceted_fields_ids(rtxn)?;
|
||||
let exact_attributes = index.exact_attributes_ids(rtxn)?;
|
||||
let proximity_precision = index.proximity_precision(rtxn)?.unwrap_or_default();
|
||||
let embedding_configs = match embedding_configs {
|
||||
@ -1529,87 +1539,57 @@ impl InnerIndexSettings {
|
||||
};
|
||||
let prefix_search = index.prefix_search(rtxn)?.unwrap_or_default();
|
||||
let facet_search = index.facet_search(rtxn)?;
|
||||
let existing_fields: HashSet<_> = index
|
||||
.field_distribution(rtxn)?
|
||||
.into_iter()
|
||||
.filter_map(|(field, count)| (count != 0).then_some(field))
|
||||
.collect();
|
||||
// index.fields_ids_map($a)? ==>> fields_ids_map
|
||||
let geo_fields_ids = match fields_ids_map.id(RESERVED_GEO_FIELD_NAME) {
|
||||
Some(gfid) => {
|
||||
let is_sortable = index.sortable_fields_ids(rtxn)?.contains(&gfid);
|
||||
let is_filterable = index.filterable_fields_ids(rtxn)?.contains(&gfid);
|
||||
Some(_) if index.is_geo_enabled(rtxn)? => {
|
||||
// if `_geo` is faceted then we get the `lat` and `lng`
|
||||
if is_sortable || is_filterable {
|
||||
let field_ids = fields_ids_map
|
||||
.insert("_geo.lat")
|
||||
.zip(fields_ids_map.insert("_geo.lng"))
|
||||
.ok_or(UserError::AttributeLimitReached)?;
|
||||
Some(field_ids)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
let field_ids = fields_ids_map
|
||||
.insert("_geo.lat")
|
||||
.zip(fields_ids_map.insert("_geo.lng"))
|
||||
.ok_or(UserError::AttributeLimitReached)?;
|
||||
Some(field_ids)
|
||||
}
|
||||
None => None,
|
||||
_ => None,
|
||||
};
|
||||
let localized_attributes_rules = index.localized_attributes_rules(rtxn)?;
|
||||
let localized_searchable_fields_ids = LocalizedFieldIds::new(
|
||||
&localized_attributes_rules,
|
||||
&fields_ids_map,
|
||||
searchable_fields_ids.iter().cloned(),
|
||||
);
|
||||
let localized_faceted_fields_ids = LocalizedFieldIds::new(
|
||||
&localized_attributes_rules,
|
||||
&fields_ids_map,
|
||||
faceted_fields_ids.iter().cloned(),
|
||||
);
|
||||
|
||||
let vectors_fids = fields_ids_map.nested_ids(RESERVED_VECTORS_FIELD_NAME);
|
||||
searchable_fields_ids.retain(|id| !vectors_fids.contains(id));
|
||||
faceted_fields_ids.retain(|id| !vectors_fids.contains(id));
|
||||
let localized_attributes_rules =
|
||||
index.localized_attributes_rules(rtxn)?.unwrap_or_default();
|
||||
let filterable_attributes_rules = index.filterable_attributes_rules(rtxn)?;
|
||||
let sortable_fields = index.sortable_fields(rtxn)?;
|
||||
let asc_desc_fields = index.asc_desc_fields(rtxn)?;
|
||||
let distinct_field = index.distinct_field(rtxn)?.map(|f| f.to_string());
|
||||
let user_defined_searchable_attributes = index
|
||||
.user_defined_searchable_fields(rtxn)?
|
||||
.map(|fields| fields.into_iter().map(|f| f.to_string()).collect());
|
||||
let builder = MetadataBuilder::from_index(index, rtxn)?;
|
||||
let fields_ids_map = FieldIdMapWithMetadata::new(fields_ids_map, builder);
|
||||
|
||||
Ok(Self {
|
||||
stop_words,
|
||||
allowed_separators,
|
||||
dictionary,
|
||||
fields_ids_map,
|
||||
user_defined_faceted_fields,
|
||||
user_defined_searchable_fields,
|
||||
faceted_fields_ids,
|
||||
searchable_fields_ids,
|
||||
localized_attributes_rules,
|
||||
filterable_attributes_rules,
|
||||
asc_desc_fields,
|
||||
distinct_field,
|
||||
user_defined_searchable_attributes,
|
||||
sortable_fields,
|
||||
exact_attributes,
|
||||
proximity_precision,
|
||||
embedding_configs,
|
||||
existing_fields,
|
||||
geo_fields_ids,
|
||||
non_searchable_fields_ids: vectors_fids.clone(),
|
||||
non_faceted_fields_ids: vectors_fids.clone(),
|
||||
localized_searchable_fields_ids,
|
||||
localized_faceted_fields_ids,
|
||||
prefix_search,
|
||||
facet_search,
|
||||
})
|
||||
}
|
||||
|
||||
// find and insert the new field ids
|
||||
pub fn recompute_facets(&mut self, wtxn: &mut heed::RwTxn<'_>, index: &Index) -> Result<()> {
|
||||
let new_facets = self
|
||||
.fields_ids_map
|
||||
.iter()
|
||||
.filter(|(fid, _field)| !self.non_faceted_fields_ids.contains(fid))
|
||||
.filter(|(_fid, field)| crate::is_faceted(field, &self.user_defined_faceted_fields))
|
||||
.map(|(_fid, field)| field.to_string())
|
||||
.collect();
|
||||
index.put_faceted_fields(wtxn, &new_facets)?;
|
||||
|
||||
self.faceted_fields_ids = index.faceted_fields_ids(wtxn)?;
|
||||
let localized_attributes_rules = index.localized_attributes_rules(wtxn)?;
|
||||
self.localized_faceted_fields_ids = LocalizedFieldIds::new(
|
||||
&localized_attributes_rules,
|
||||
&self.fields_ids_map,
|
||||
self.faceted_fields_ids.iter().cloned(),
|
||||
);
|
||||
Ok(())
|
||||
pub fn match_faceted_field(&self, field: &str) -> PatternMatch {
|
||||
match_faceted_field(
|
||||
field,
|
||||
&self.filterable_attributes_rules,
|
||||
&self.sortable_fields,
|
||||
&self.asc_desc_fields,
|
||||
&self.distinct_field,
|
||||
)
|
||||
}
|
||||
|
||||
// find and insert the new field ids
|
||||
@ -1619,7 +1599,7 @@ impl InnerIndexSettings {
|
||||
index: &Index,
|
||||
) -> Result<()> {
|
||||
let searchable_fields = self
|
||||
.user_defined_searchable_fields
|
||||
.user_defined_searchable_attributes
|
||||
.as_ref()
|
||||
.map(|searchable| searchable.iter().map(|s| s.as_str()).collect::<Vec<_>>());
|
||||
|
||||
@ -1628,17 +1608,9 @@ impl InnerIndexSettings {
|
||||
index.put_all_searchable_fields_from_fields_ids_map(
|
||||
wtxn,
|
||||
&searchable_fields,
|
||||
&self.non_searchable_fields_ids,
|
||||
&self.fields_ids_map,
|
||||
)?;
|
||||
}
|
||||
self.searchable_fields_ids = index.searchable_fields_ids(wtxn)?;
|
||||
let localized_attributes_rules = index.localized_attributes_rules(wtxn)?;
|
||||
self.localized_searchable_fields_ids = LocalizedFieldIds::new(
|
||||
&localized_attributes_rules,
|
||||
&self.fields_ids_map,
|
||||
self.searchable_fields_ids.iter().cloned(),
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
@ -1,6 +1,6 @@
|
||||
use big_s::S;
|
||||
use heed::types::Bytes;
|
||||
use maplit::{btreemap, btreeset, hashset};
|
||||
use maplit::{btreemap, btreeset};
|
||||
use meili_snap::snapshot;
|
||||
|
||||
use super::*;
|
||||
@ -210,7 +210,7 @@ fn set_filterable_fields() {
|
||||
// Set the filterable fields to be the age.
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_filterable_fields(hashset! { S("age") });
|
||||
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(S("age"))]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
@ -225,8 +225,6 @@ fn set_filterable_fields() {
|
||||
|
||||
// Check that the displayed fields are correctly set.
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let fields_ids = index.filterable_fields(&rtxn).unwrap();
|
||||
assert_eq!(fields_ids, hashset! { S("age") });
|
||||
// Only count the field_id 0 and level 0 facet values.
|
||||
// TODO we must support typed CSVs for numbers to be understood.
|
||||
let fidmap = index.fields_ids_map(&rtxn).unwrap();
|
||||
@ -268,15 +266,13 @@ fn set_filterable_fields() {
|
||||
// Set the filterable fields to be the age and the name.
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_filterable_fields(hashset! { S("age"), S("name") });
|
||||
settings.set_filterable_fields(vec![
|
||||
FilterableAttributesRule::Field(S("age")),
|
||||
FilterableAttributesRule::Field(S("name")),
|
||||
]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
// Check that the displayed fields are correctly set.
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let fields_ids = index.filterable_fields(&rtxn).unwrap();
|
||||
assert_eq!(fields_ids, hashset! { S("age"), S("name") });
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
// Only count the field_id 2 and level 0 facet values.
|
||||
let count = index
|
||||
@ -300,15 +296,10 @@ fn set_filterable_fields() {
|
||||
// Remove the age from the filterable fields.
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_filterable_fields(hashset! { S("name") });
|
||||
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(S("name"))]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
// Check that the displayed fields are correctly set.
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let fields_ids = index.filterable_fields(&rtxn).unwrap();
|
||||
assert_eq!(fields_ids, hashset! { S("name") });
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
// Only count the field_id 2 and level 0 facet values.
|
||||
let count = index
|
||||
@ -637,7 +628,10 @@ fn setting_searchable_recomputes_other_settings() {
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_displayed_fields(vec!["hello".to_string()]);
|
||||
settings.set_filterable_fields(hashset! { S("age"), S("toto") });
|
||||
settings.set_filterable_fields(vec![
|
||||
FilterableAttributesRule::Field(S("age")),
|
||||
FilterableAttributesRule::Field(S("toto")),
|
||||
]);
|
||||
settings.set_criteria(vec![Criterion::Asc(S("toto"))]);
|
||||
})
|
||||
.unwrap();
|
||||
@ -754,7 +748,7 @@ fn setting_impact_relevancy() {
|
||||
// Set the genres setting
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_filterable_fields(hashset! { S("genres") });
|
||||
settings.set_filterable_fields(vec![FilterableAttributesRule::Field(S("genres"))]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
|
@ -1,13 +1,12 @@
|
||||
use big_s::S;
|
||||
use bumpalo::Bump;
|
||||
use heed::EnvOpenOptions;
|
||||
use maplit::hashset;
|
||||
use milli::documents::mmap_from_objects;
|
||||
use milli::progress::Progress;
|
||||
use milli::update::new::indexer;
|
||||
use milli::update::{IndexerConfig, Settings};
|
||||
use milli::vector::EmbeddingConfigs;
|
||||
use milli::{FacetDistribution, Index, Object, OrderBy};
|
||||
use milli::{FacetDistribution, FilterableAttributesRule, Index, Object, OrderBy};
|
||||
use serde_json::{from_value, json};
|
||||
|
||||
#[test]
|
||||
@ -21,10 +20,10 @@ fn test_facet_distribution_with_no_facet_values() {
|
||||
let config = IndexerConfig::default();
|
||||
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||
|
||||
builder.set_filterable_fields(hashset! {
|
||||
S("genres"),
|
||||
S("tags"),
|
||||
});
|
||||
builder.set_filterable_fields(vec![
|
||||
FilterableAttributesRule::Field(S("genres")),
|
||||
FilterableAttributesRule::Field(S("tags")),
|
||||
]);
|
||||
builder.execute(|_| (), || false).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
|
@ -11,7 +11,9 @@ use milli::progress::Progress;
|
||||
use milli::update::new::indexer;
|
||||
use milli::update::{IndexerConfig, Settings};
|
||||
use milli::vector::EmbeddingConfigs;
|
||||
use milli::{AscDesc, Criterion, DocumentId, Index, Member, TermsMatchingStrategy};
|
||||
use milli::{
|
||||
AscDesc, Criterion, DocumentId, FilterableAttributesRule, Index, Member, TermsMatchingStrategy,
|
||||
};
|
||||
use serde::{Deserialize, Deserializer};
|
||||
use slice_group_by::GroupBy;
|
||||
|
||||
@ -42,14 +44,14 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
|
||||
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||
|
||||
builder.set_criteria(criteria.to_vec());
|
||||
builder.set_filterable_fields(hashset! {
|
||||
S("tag"),
|
||||
S("asc_desc_rank"),
|
||||
S("_geo"),
|
||||
S("opt1"),
|
||||
S("opt1.opt2"),
|
||||
S("tag_in")
|
||||
});
|
||||
builder.set_filterable_fields(vec![
|
||||
FilterableAttributesRule::Field(S("tag")),
|
||||
FilterableAttributesRule::Field(S("asc_desc_rank")),
|
||||
FilterableAttributesRule::Field(S("_geo")),
|
||||
FilterableAttributesRule::Field(S("opt1")),
|
||||
FilterableAttributesRule::Field(S("opt1.opt2")),
|
||||
FilterableAttributesRule::Field(S("tag_in")),
|
||||
]);
|
||||
builder.set_sortable_fields(hashset! {
|
||||
S("tag"),
|
||||
S("asc_desc_rank"),
|
||||
|
Loading…
x
Reference in New Issue
Block a user