From 13b607bd68243f81be98ce852b9bf327e08142a1 Mon Sep 17 00:00:00 2001 From: Lucas Black Date: Sun, 18 May 2025 20:24:52 -0700 Subject: [PATCH] Removed matches_wildcard_pattern() and integrated match_pattern() into attributes_to_search_on(), updated test cases --- .../tests/search/restrict_searchable.rs | 57 ++++--------------- crates/milli/src/attribute_patterns.rs | 2 +- crates/milli/src/search/new/mod.rs | 31 +--------- 3 files changed, 13 insertions(+), 77 deletions(-) diff --git a/crates/meilisearch/tests/search/restrict_searchable.rs b/crates/meilisearch/tests/search/restrict_searchable.rs index ffd612557..db1082053 100644 --- a/crates/meilisearch/tests/search/restrict_searchable.rs +++ b/crates/meilisearch/tests/search/restrict_searchable.rs @@ -476,7 +476,7 @@ async fn nested_search_on_title_with_prefix_wildcard() { } #[actix_rt::test] -async fn nested_search_on_title_with_suffix_wildcard() { +async fn nested_search_with_suffix_wildcard() { let server = Server::new().await; let index = index_with_documents(&server, &NESTED_SEARCH_DOCUMENTS).await; @@ -491,17 +491,11 @@ async fn nested_search_on_title_with_suffix_wildcard() { }, ) .await; -} -#[actix_rt::test] -async fn nested_search_all_details_with_deep_wildcard() { - let server = Server::new().await; - let index = index_with_documents(&server, &NESTED_SEARCH_DOCUMENTS).await; - - // Deep wildcard should match deeply nested attributes + // Should return 1 document (ids: 1) index .search( - json!({"q": "gold", "attributesToSearchOn": ["details.**"]}), + json!({"q": "gold", "attributesToSearchOn": ["details.*"]}), |response, code| { snapshot!(code, @"200 OK"); snapshot!(response["hits"].as_array().unwrap().len(), @"1"); @@ -512,7 +506,7 @@ async fn nested_search_all_details_with_deep_wildcard() { // Should return 2 documents (ids: 1 and 2) index .search( - json!({"q": "true", "attributesToSearchOn": ["details.**"]}), + json!({"q": "true", "attributesToSearchOn": ["details.*"]}), |response, code| { snapshot!(code, @"200 OK"); snapshot!(response["hits"].as_array().unwrap().len(), @"2"); @@ -522,7 +516,7 @@ async fn nested_search_all_details_with_deep_wildcard() { } #[actix_rt::test] -async fn nested_search_all_details_restricted_set_with_any_wildcard() { +async fn nested_search_on_title_restricted_set_with_suffix_wildcard() { let server = Server::new().await; let index = index_with_documents(&server, &NESTED_SEARCH_DOCUMENTS).await; let (task, _status_code) = index.update_settings_searchable_attributes(json!(["details.title"])).await; @@ -537,16 +531,6 @@ async fn nested_search_all_details_restricted_set_with_any_wildcard() { }, ) .await; - - index - .search( - json!({"q": "Captain Marvel", "attributesToSearchOn": ["details.**"]}), - |response, code| { - snapshot!(code, @"200 OK"); - snapshot!(response["hits"].as_array().unwrap().len(), @"2"); - }, - ) - .await; } #[actix_rt::test] @@ -577,16 +561,6 @@ async fn nested_search_no_searchable_attribute_set_with_any_wildcard() { ) .await; - index - .search( - json!({"q": "Captain Marvel", "attributesToSearchOn": ["unknown.**",]}), - |response, code| { - snapshot!(code, @"200 OK"); - snapshot!(response["hits"].as_array().unwrap().len(), @"0"); - }, - ) - .await; - let (task, _status_code) = index.update_settings_searchable_attributes(json!(["*"])).await; index.wait_task(task.uid()).await.succeeded(); @@ -599,17 +573,6 @@ async fn nested_search_no_searchable_attribute_set_with_any_wildcard() { }, ) .await; - - // We only match deep wild card at the end, otherwise we need to recursively match deep wildcards - index - .search( - json!({"q": "Captain Marvel", "attributesToSearchOn": ["unknown.**", "details.**"]}), - |response, code| { - snapshot!(code, @"200 OK"); - snapshot!(response["hits"].as_array().unwrap().len(), @"3"); - }, - ) - .await; } #[actix_rt::test] @@ -646,14 +609,14 @@ async fn nested_prefix_search_on_details_with_suffix_wildcard() { } #[actix_rt::test] -async fn nested_prefix_search_on_weaknesses_with_deep_wildcard() { +async fn nested_prefix_search_on_weaknesses_with_suffix_wildcard() { let server = Server::new().await; let index = index_with_documents(&server, &NESTED_SEARCH_DOCUMENTS).await; // Deep wildcard search on nested weaknesses should return 2 documents (ids: 1 and 3) index .search( - json!({"q": "mag", "attributesToSearchOn": ["details.**"]}), + json!({"q": "mag", "attributesToSearchOn": ["details.*"]}), |response, code| { snapshot!(code, @"200 OK"); snapshot!(response["hits"].as_array().unwrap().len(), @"2"); @@ -680,7 +643,7 @@ async fn nested_search_on_title_matching_strategy_all() { } #[actix_rt::test] -async fn nested_attributes_ranking_rule_order_with_wildcard() { +async fn nested_attributes_ranking_rule_order_with_prefix_wildcard() { let server = Server::new().await; let index = index_with_documents(&server, &NESTED_SEARCH_DOCUMENTS).await; @@ -711,14 +674,14 @@ async fn nested_attributes_ranking_rule_order_with_wildcard() { } #[actix_rt::test] -async fn nested_attributes_ranking_rule_order_with_deep_wildcard() { +async fn nested_attributes_ranking_rule_order_with_suffix_wildcard() { let server = Server::new().await; let index = index_with_documents(&server, &NESTED_SEARCH_DOCUMENTS).await; // Document 3 should appear before documents 1 and 2 index .search( - json!({"q": "Captain Marvel", "attributesToSearchOn": ["details.**"], "attributesToRetrieve": ["id"]}), + json!({"q": "Captain Marvel", "attributesToSearchOn": ["details.*"], "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(code, @"200 OK"); snapshot!(json_string!(response["hits"]), diff --git a/crates/milli/src/attribute_patterns.rs b/crates/milli/src/attribute_patterns.rs index 00caa2a6d..8da6942a3 100644 --- a/crates/milli/src/attribute_patterns.rs +++ b/crates/milli/src/attribute_patterns.rs @@ -50,7 +50,7 @@ impl AttributePatterns { /// /// * `pattern` - The pattern to match against. /// * `str` - The string to match against the pattern. -fn match_pattern(pattern: &str, str: &str) -> PatternMatch { +pub fn match_pattern(pattern: &str, str: &str) -> PatternMatch { // If the pattern is a wildcard, return Match if pattern == "*" { return PatternMatch::Match; diff --git a/crates/milli/src/search/new/mod.rs b/crates/milli/src/search/new/mod.rs index 21002c55a..dfe0ddfc9 100644 --- a/crates/milli/src/search/new/mod.rs +++ b/crates/milli/src/search/new/mod.rs @@ -52,6 +52,7 @@ pub use self::geo_sort::Strategy as GeoSortStrategy; use self::graph_based_ranking_rule::Words; use self::interner::Interned; use self::vector_sort::VectorSort; +use crate::attribute_patterns::{match_pattern, PatternMatch}; use crate::constants::RESERVED_GEO_FIELD_NAME; use crate::index::PrefixSearch; use crate::localized_attributes_rules::LocalizedFieldIds; @@ -137,7 +138,7 @@ impl<'ctx> SearchContext<'ctx> { let matching_searchable_weights: Vec<_> = searchable_fields_weights .iter() .filter(|(name, _, _)| { - Self::matches_wildcard_pattern(field_name, name) + match_pattern(field_name, name) == PatternMatch::Match }) .collect(); @@ -190,34 +191,6 @@ impl<'ctx> SearchContext<'ctx> { Ok(()) } - - fn matches_wildcard_pattern(wildcard_pattern: &str, name: &str) -> bool { - let wildcard_subfields: Vec<&str> = wildcard_pattern.split(".").collect(); - let name_subfields: Vec<&str> = name.split(".").collect(); - - // Deep wildcard matches all attributes after ('**') - if !wildcard_subfields.is_empty() && wildcard_subfields.last() == Some(&"**") { - let prefix_len = wildcard_subfields.len() - 1; - if prefix_len > name_subfields.len() { - return false; - } - - return wildcard_subfields[..prefix_len] - .iter() - .zip(name_subfields.iter()) - .all(|(wc, sf)| *wc == "*" || *wc == *sf); - } - - // Using single wildcard ('*') should match length (e.g. 'a.*.c' matches 'a.b.c') - // where '*' can match any single segment - if wildcard_subfields.len() != name_subfields.len() { - return false; - } - - wildcard_subfields.iter() - .zip(name_subfields.iter()) - .all(|(wc, sf)| *wc == "*" || *wc == *sf) - } } #[derive(Debug, Default)]