From 2af9481804d799171ed360cb135c5639f9d79a64 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 17 Jul 2024 11:13:37 +0200 Subject: [PATCH] =?UTF-8?q?Implements=20the=20experimental=20contains=20fi?= =?UTF-8?q?lter=20operator=C2=AB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Cargo.lock | 1 + filter-parser/src/condition.rs | 18 +++ filter-parser/src/error.rs | 2 +- filter-parser/src/lib.rs | 71 +++++++++--- filter-parser/src/value.rs | 1 + index-scheduler/src/features.rs | 19 +++- meilisearch-types/src/features.rs | 2 + .../src/analytics/segment_analytics.rs | 3 + meilisearch/src/error.rs | 4 +- meilisearch/src/option.rs | 14 +++ meilisearch/src/routes/features.rs | 5 + meilisearch/src/routes/indexes/documents.rs | 36 +++--- .../src/routes/indexes/facet_search.rs | 9 +- meilisearch/src/routes/indexes/search.rs | 4 +- meilisearch/src/routes/indexes/similar.rs | 9 +- meilisearch/src/routes/multi_search.rs | 3 +- meilisearch/src/search/federated.rs | 10 +- meilisearch/src/search/mod.rs | 55 ++++++--- meilisearch/tests/common/mod.rs | 9 ++ meilisearch/tests/documents/errors.rs | 6 +- meilisearch/tests/documents/get_documents.rs | 3 +- meilisearch/tests/dumps/mod.rs | 9 +- meilisearch/tests/features/mod.rs | 20 ++-- meilisearch/tests/search/errors.rs | 107 ++++++++++++++---- meilisearch/tests/search/hybrid.rs | 6 +- meilisearch/tests/search/mod.rs | 30 ++++- meilisearch/tests/settings/get_settings.rs | 3 +- meilisearch/tests/similar/errors.rs | 34 +++--- meilisearch/tests/similar/mod.rs | 12 +- meilisearch/tests/vector/mod.rs | 9 +- meilisearch/tests/vector/settings.rs | 3 +- milli/Cargo.toml | 1 + milli/src/index.rs | 41 +++++++ milli/src/search/facet/filter.rs | 47 +++++++- 34 files changed, 484 insertions(+), 122 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7cdf80b8d..3808a2400 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3542,6 +3542,7 @@ dependencies = [ "maplit", "md5", "meili-snap", + "memchr", "memmap2", "mimalloc", "obkv", diff --git a/filter-parser/src/condition.rs b/filter-parser/src/condition.rs index 9abe1c6ea..1b15d2daa 100644 --- a/filter-parser/src/condition.rs +++ b/filter-parser/src/condition.rs @@ -26,6 +26,7 @@ pub enum Condition<'a> { LowerThan(Token<'a>), LowerThanOrEqual(Token<'a>), Between { from: Token<'a>, to: Token<'a> }, + Contains(Token<'a>), } /// condition = value ("==" | ">" ...) value @@ -92,6 +93,23 @@ pub fn parse_not_exists(input: Span) -> IResult { Ok((input, FilterCondition::Not(Box::new(FilterCondition::Condition { fid: key, op: Exists })))) } +/// contains = value "CONTAINS" value +pub fn parse_contains(input: Span) -> IResult { + let (input, (fid, _, value)) = tuple((parse_value, tag("CONTAINS"), cut(parse_value)))(input)?; + Ok((input, FilterCondition::Condition { fid, op: Contains(value) })) +} + +/// contains = value "NOT" WS+ "CONTAINS" value +pub fn parse_not_contains(input: Span) -> IResult { + let keyword = tuple((tag("NOT"), multispace1, tag("CONTAINS"))); + let (input, (fid, _, value)) = tuple((parse_value, keyword, cut(parse_value)))(input)?; + + Ok(( + input, + FilterCondition::Not(Box::new(FilterCondition::Condition { fid, op: Contains(value) })), + )) +} + /// to = value value "TO" WS+ value pub fn parse_to(input: Span) -> IResult { let (input, (key, from, _, _, to)) = diff --git a/filter-parser/src/error.rs b/filter-parser/src/error.rs index c71a3aaee..f530cc690 100644 --- a/filter-parser/src/error.rs +++ b/filter-parser/src/error.rs @@ -146,7 +146,7 @@ impl<'a> Display for Error<'a> { } ErrorKind::InvalidPrimary => { let text = if input.trim().is_empty() { "but instead got nothing.".to_string() } else { format!("at `{}`.", escaped_input) }; - writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` {}", text)? + writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` {}", text)? } ErrorKind::InvalidEscapedNumber => { writeln!(f, "Found an invalid escaped sequence number: `{}`.", escaped_input)? diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs index 077263e85..12847f52b 100644 --- a/filter-parser/src/lib.rs +++ b/filter-parser/src/lib.rs @@ -48,8 +48,8 @@ use std::fmt::Debug; pub use condition::{parse_condition, parse_to, Condition}; use condition::{ - parse_exists, parse_is_empty, parse_is_not_empty, parse_is_not_null, parse_is_null, - parse_not_exists, + parse_contains, parse_exists, parse_is_empty, parse_is_not_empty, parse_is_not_null, + parse_is_null, parse_not_contains, parse_not_exists, }; use error::{cut_with_err, ExpectedValueKind, NomErrorExt}; pub use error::{Error, ErrorKind}; @@ -147,7 +147,37 @@ pub enum FilterCondition<'a> { GeoBoundingBox { top_right_point: [Token<'a>; 2], bottom_left_point: [Token<'a>; 2] }, } +pub enum TraversedElement<'a> { + FilterCondition(&'a FilterCondition<'a>), + Condition(&'a Condition<'a>), +} + impl<'a> FilterCondition<'a> { + pub fn use_contains_operator(&self) -> Option<&Token> { + match self { + FilterCondition::Condition { fid: _, op } => match op { + Condition::GreaterThan(_) + | Condition::GreaterThanOrEqual(_) + | Condition::Equal(_) + | Condition::NotEqual(_) + | Condition::Null + | Condition::Empty + | Condition::Exists + | Condition::LowerThan(_) + | Condition::LowerThanOrEqual(_) + | Condition::Between { .. } => None, + Condition::Contains(tok) => Some(tok), + }, + FilterCondition::Not(this) => this.use_contains_operator(), + FilterCondition::Or(seq) | FilterCondition::And(seq) => { + seq.iter().find_map(|filter| filter.use_contains_operator()) + } + FilterCondition::GeoLowerThan { .. } + | FilterCondition::GeoBoundingBox { .. } + | FilterCondition::In { .. } => None, + } + } + /// Returns the first token found at the specified depth, `None` if no token at this depth. pub fn token_at_depth(&self, depth: usize) -> Option<&Token> { match self { @@ -452,6 +482,8 @@ fn parse_primary(input: Span, depth: usize) -> IResult { parse_exists, parse_not_exists, parse_to, + parse_contains, + parse_not_contains, // the next lines are only for error handling and are written at the end to have the less possible performance impact parse_geo, parse_geo_distance, @@ -534,6 +566,7 @@ impl<'a> std::fmt::Display for Condition<'a> { Condition::LowerThan(token) => write!(f, "< {token}"), Condition::LowerThanOrEqual(token) => write!(f, "<= {token}"), Condition::Between { from, to } => write!(f, "{from} TO {to}"), + Condition::Contains(token) => write!(f, "CONTAINS {token}"), } } } @@ -558,6 +591,7 @@ pub mod tests { unsafe { Span::new_from_raw_offset(offset, lines as u32, value, "") }.into() } + #[track_caller] fn p(s: &str) -> impl std::fmt::Display + '_ { Fc::parse(s).unwrap().unwrap() } @@ -639,6 +673,13 @@ pub mod tests { insta::assert_snapshot!(p("NOT subscribers NOT EXISTS"), @"{subscribers} EXISTS"); insta::assert_snapshot!(p("subscribers NOT EXISTS"), @"NOT ({subscribers} EXISTS)"); + // Test CONTAINS + NOT CONTAINS + insta::assert_snapshot!(p("subscribers CONTAINS 'hello'"), @"{subscribers} CONTAINS {hello}"); + insta::assert_snapshot!(p("NOT subscribers CONTAINS 'hello'"), @"NOT ({subscribers} CONTAINS {hello})"); + insta::assert_snapshot!(p("subscribers NOT CONTAINS hello"), @"NOT ({subscribers} CONTAINS {hello})"); + insta::assert_snapshot!(p("NOT subscribers NOT CONTAINS 'hello'"), @"{subscribers} CONTAINS {hello}"); + insta::assert_snapshot!(p("subscribers NOT CONTAINS 'hello'"), @"NOT ({subscribers} CONTAINS {hello})"); + // Test nested NOT insta::assert_snapshot!(p("NOT NOT NOT NOT x = 5"), @"{x} = {5}"); insta::assert_snapshot!(p("NOT NOT (NOT NOT x = 5)"), @"{x} = {5}"); @@ -710,7 +751,7 @@ pub mod tests { "###); insta::assert_snapshot!(p("'OR'"), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`. 1:5 'OR' "###); @@ -720,12 +761,12 @@ pub mod tests { "###); insta::assert_snapshot!(p("channel Ponce"), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`. 1:14 channel Ponce "###); insta::assert_snapshot!(p("channel = Ponce OR"), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing. 19:19 channel = Ponce OR "###); @@ -810,12 +851,12 @@ pub mod tests { "###); insta::assert_snapshot!(p("colour NOT EXIST"), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`. 1:17 colour NOT EXIST "###); insta::assert_snapshot!(p("subscribers 100 TO1000"), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`. 1:23 subscribers 100 TO1000 "###); @@ -878,35 +919,35 @@ pub mod tests { "###); insta::assert_snapshot!(p(r#"value NULL"#), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value NULL`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value NULL`. 1:11 value NULL "###); insta::assert_snapshot!(p(r#"value NOT NULL"#), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value NOT NULL`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value NOT NULL`. 1:15 value NOT NULL "###); insta::assert_snapshot!(p(r#"value EMPTY"#), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value EMPTY`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value EMPTY`. 1:12 value EMPTY "###); insta::assert_snapshot!(p(r#"value NOT EMPTY"#), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value NOT EMPTY`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value NOT EMPTY`. 1:16 value NOT EMPTY "###); insta::assert_snapshot!(p(r#"value IS"#), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value IS`. 1:9 value IS "###); insta::assert_snapshot!(p(r#"value IS NOT"#), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT`. 1:13 value IS NOT "###); insta::assert_snapshot!(p(r#"value IS EXISTS"#), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS EXISTS`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value IS EXISTS`. 1:16 value IS EXISTS "###); insta::assert_snapshot!(p(r#"value IS NOT EXISTS"#), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT EXISTS`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT EXISTS`. 1:20 value IS NOT EXISTS "###); } diff --git a/filter-parser/src/value.rs b/filter-parser/src/value.rs index b1ecac55b..06ec1daef 100644 --- a/filter-parser/src/value.rs +++ b/filter-parser/src/value.rs @@ -211,6 +211,7 @@ fn is_keyword(s: &str) -> bool { | "IS" | "NULL" | "EMPTY" + | "CONTAINS" | "_geoRadius" | "_geoBoundingBox" ) diff --git a/index-scheduler/src/features.rs b/index-scheduler/src/features.rs index fbb273e54..c998ff444 100644 --- a/index-scheduler/src/features.rs +++ b/index-scheduler/src/features.rs @@ -81,6 +81,19 @@ impl RoFeatures { .into()) } } + + pub fn check_contains_filter(&self) -> Result<()> { + if self.runtime.contains_filter { + Ok(()) + } else { + Err(FeatureNotEnabledError { + disabled_action: "Using `CONTAINS` in a filter", + feature: "contains filter", + issue_link: "https://github.com/orgs/meilisearch/discussions/763", + } + .into()) + } + } } impl FeatureData { @@ -92,9 +105,11 @@ impl FeatureData { let txn = env.read_txn()?; let persisted_features: RuntimeTogglableFeatures = runtime_features_db.get(&txn, EXPERIMENTAL_FEATURES)?.unwrap_or_default(); + let InstanceTogglableFeatures { metrics, logs_route, contains_filter } = instance_features; let runtime = Arc::new(RwLock::new(RuntimeTogglableFeatures { - metrics: instance_features.metrics || persisted_features.metrics, - logs_route: instance_features.logs_route || persisted_features.logs_route, + metrics: metrics || persisted_features.metrics, + logs_route: logs_route || persisted_features.logs_route, + contains_filter: contains_filter || persisted_features.contains_filter, ..persisted_features })); diff --git a/meilisearch-types/src/features.rs b/meilisearch-types/src/features.rs index f15a1c999..b9805a124 100644 --- a/meilisearch-types/src/features.rs +++ b/meilisearch-types/src/features.rs @@ -7,10 +7,12 @@ pub struct RuntimeTogglableFeatures { pub metrics: bool, pub logs_route: bool, pub edit_documents_by_function: bool, + pub contains_filter: bool, } #[derive(Default, Debug, Clone, Copy)] pub struct InstanceTogglableFeatures { pub metrics: bool, pub logs_route: bool, + pub contains_filter: bool, } diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index 81354a139..487eaf003 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -261,6 +261,7 @@ impl super::Analytics for SegmentAnalytics { #[derive(Debug, Clone, Serialize)] struct Infos { env: String, + experimental_contains_filter: bool, experimental_enable_metrics: bool, experimental_search_queue_size: usize, experimental_logs_mode: LogMode, @@ -303,6 +304,7 @@ impl From for Infos { // Thus we must not insert `..` at the end. let Opt { db_path, + experimental_contains_filter, experimental_enable_metrics, experimental_search_queue_size, experimental_logs_mode, @@ -353,6 +355,7 @@ impl From for Infos { // We consider information sensible if it contains a path, an address, or a key. Self { env, + experimental_contains_filter, experimental_enable_metrics, experimental_search_queue_size, experimental_logs_mode, diff --git a/meilisearch/src/error.rs b/meilisearch/src/error.rs index 59b86e774..41473245e 100644 --- a/meilisearch/src/error.rs +++ b/meilisearch/src/error.rs @@ -25,12 +25,12 @@ pub enum MeilisearchHttpError { DocumentNotFound(String), #[error("Sending an empty filter is forbidden.")] EmptyFilter, + #[error("Invalid syntax for the filter parameter: `expected {}, found: {1}`.", .0.join(", "))] + InvalidExpression(&'static [&'static str], Value), #[error("Using `federationOptions` is not allowed in a non-federated search.\n Hint: remove `federationOptions` from query #{0} or add `federation: {{}}` to the request.")] FederationOptionsInNonFederatedRequest(usize), #[error("Inside `.queries[{0}]`: Using pagination options is not allowed in federated queries.\n Hint: remove `{1}` from query #{0} or remove `federation: {{}}` from the request")] PaginationInFederatedQuery(usize, &'static str), - #[error("Invalid syntax for the filter parameter: `expected {}, found: {1}`.", .0.join(", "))] - InvalidExpression(&'static [&'static str], Value), #[error("A {0} payload is missing.")] MissingPayload(PayloadType), #[error("Too many search requests running at the same time: {0}. Retry after 10s.")] diff --git a/meilisearch/src/option.rs b/meilisearch/src/option.rs index af4da1113..27799ca5b 100644 --- a/meilisearch/src/option.rs +++ b/meilisearch/src/option.rs @@ -54,6 +54,7 @@ const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL"; const MEILI_EXPERIMENTAL_LOGS_MODE: &str = "MEILI_EXPERIMENTAL_LOGS_MODE"; const MEILI_EXPERIMENTAL_REPLICATION_PARAMETERS: &str = "MEILI_EXPERIMENTAL_REPLICATION_PARAMETERS"; const MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE: &str = "MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE"; +const MEILI_EXPERIMENTAL_CONTAINS_FILTER: &str = "MEILI_EXPERIMENTAL_CONTAINS_FILTER"; const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS"; const MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE: &str = "MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE"; const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str = @@ -339,6 +340,13 @@ pub struct Opt { #[serde(default)] pub log_level: LogLevel, + /// Experimental contains filter feature. For more information, see: + /// + /// Enables the experimental contains filter operator. + #[clap(long, env = MEILI_EXPERIMENTAL_CONTAINS_FILTER)] + #[serde(default)] + pub experimental_contains_filter: bool, + /// Experimental metrics feature. For more information, see: /// /// Enables the Prometheus metrics on the `GET /metrics` endpoint. @@ -483,6 +491,7 @@ impl Opt { config_file_path: _, #[cfg(feature = "analytics")] no_analytics, + experimental_contains_filter, experimental_enable_metrics, experimental_search_queue_size, experimental_logs_mode, @@ -540,6 +549,10 @@ impl Opt { export_to_env_if_not_present(MEILI_DUMP_DIR, dump_dir); export_to_env_if_not_present(MEILI_LOG_LEVEL, log_level.to_string()); + export_to_env_if_not_present( + MEILI_EXPERIMENTAL_CONTAINS_FILTER, + experimental_contains_filter.to_string(), + ); export_to_env_if_not_present( MEILI_EXPERIMENTAL_ENABLE_METRICS, experimental_enable_metrics.to_string(), @@ -617,6 +630,7 @@ impl Opt { InstanceTogglableFeatures { metrics: self.experimental_enable_metrics, logs_route: self.experimental_enable_logs_route, + contains_filter: self.experimental_contains_filter, } } } diff --git a/meilisearch/src/routes/features.rs b/meilisearch/src/routes/features.rs index 14aece10f..bc656bdbb 100644 --- a/meilisearch/src/routes/features.rs +++ b/meilisearch/src/routes/features.rs @@ -49,6 +49,8 @@ pub struct RuntimeTogglableFeatures { pub logs_route: Option, #[deserr(default)] pub edit_documents_by_function: Option, + #[deserr(default)] + pub contains_filter: Option, } async fn patch_features( @@ -72,6 +74,7 @@ async fn patch_features( .0 .edit_documents_by_function .unwrap_or(old_features.edit_documents_by_function), + contains_filter: new_features.0.contains_filter.unwrap_or(old_features.contains_filter), }; // explicitly destructure for analytics rather than using the `Serialize` implementation, because @@ -82,6 +85,7 @@ async fn patch_features( metrics, logs_route, edit_documents_by_function, + contains_filter, } = new_features; analytics.publish( @@ -91,6 +95,7 @@ async fn patch_features( "metrics": metrics, "logs_route": logs_route, "edit_documents_by_function": edit_documents_by_function, + "contains_filter": contains_filter, }), Some(&req), ); diff --git a/meilisearch/src/routes/indexes/documents.rs b/meilisearch/src/routes/indexes/documents.rs index f1bd2201c..85cf33c54 100644 --- a/meilisearch/src/routes/indexes/documents.rs +++ b/meilisearch/src/routes/indexes/documents.rs @@ -7,7 +7,7 @@ use bstr::ByteSlice as _; use deserr::actix_web::{AwebJson, AwebQueryParameter}; use deserr::Deserr; use futures::StreamExt; -use index_scheduler::{IndexScheduler, TaskId}; +use index_scheduler::{IndexScheduler, RoFeatures, TaskId}; use meilisearch_types::deserr::query_params::Param; use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError}; use meilisearch_types::document_formats::{read_csv, read_json, read_ndjson, PayloadType}; @@ -260,8 +260,15 @@ fn documents_by_query( let retrieve_vectors = RetrieveVectors::new(retrieve_vectors, features)?; let index = index_scheduler.index(&index_uid)?; - let (total, documents) = - retrieve_documents(&index, offset, limit, filter, fields, retrieve_vectors)?; + let (total, documents) = retrieve_documents( + &index, + offset, + limit, + filter, + fields, + retrieve_vectors, + index_scheduler.features(), + )?; let ret = PaginationView::new(offset, limit, total as usize, documents); @@ -565,11 +572,9 @@ pub async fn delete_documents_by_filter( analytics.delete_documents(DocumentDeletionKind::PerFilter, &req); // we ensure the filter is well formed before enqueuing it - || -> Result<_, ResponseError> { - Ok(crate::search::parse_filter(&filter)?.ok_or(MeilisearchHttpError::EmptyFilter)?) - }() - // and whatever was the error, the error code should always be an InvalidDocumentFilter - .map_err(|err| ResponseError::from_msg(err.message, Code::InvalidDocumentFilter))?; + crate::search::parse_filter(&filter, Code::InvalidDocumentFilter, index_scheduler.features())? + .ok_or(MeilisearchHttpError::EmptyFilter)?; + let task = KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr: filter }; let uid = get_task_id(&req, &opt)?; @@ -626,11 +631,12 @@ pub async fn edit_documents_by_function( if let Some(ref filter) = filter { // we ensure the filter is well formed before enqueuing it - || -> Result<_, ResponseError> { - Ok(crate::search::parse_filter(filter)?.ok_or(MeilisearchHttpError::EmptyFilter)?) - }() - // and whatever was the error, the error code should always be an InvalidDocumentFilter - .map_err(|err| ResponseError::from_msg(err.message, Code::InvalidDocumentFilter))?; + crate::search::parse_filter( + filter, + Code::InvalidDocumentFilter, + index_scheduler.features(), + )? + .ok_or(MeilisearchHttpError::EmptyFilter)?; } let task = KindWithContent::DocumentEdition { index_uid, @@ -736,12 +742,12 @@ fn retrieve_documents>( filter: Option, attributes_to_retrieve: Option>, retrieve_vectors: RetrieveVectors, + features: RoFeatures, ) -> Result<(u64, Vec), ResponseError> { let rtxn = index.read_txn()?; let filter = &filter; let filter = if let Some(filter) = filter { - parse_filter(filter) - .map_err(|err| ResponseError::from_msg(err.to_string(), Code::InvalidDocumentFilter))? + parse_filter(filter, Code::InvalidDocumentFilter, features)? } else { None }; diff --git a/meilisearch/src/routes/indexes/facet_search.rs b/meilisearch/src/routes/indexes/facet_search.rs index 89d0418b4..ecb7757af 100644 --- a/meilisearch/src/routes/indexes/facet_search.rs +++ b/meilisearch/src/routes/indexes/facet_search.rs @@ -79,7 +79,14 @@ pub async fn search( let search_kind = search_kind(&search_query, &index_scheduler, &index, features)?; let _permit = search_queue.try_get_search_permit().await?; let search_result = tokio::task::spawn_blocking(move || { - perform_facet_search(&index, search_query, facet_query, facet_name, search_kind) + perform_facet_search( + &index, + search_query, + facet_query, + facet_name, + search_kind, + index_scheduler.features(), + ) }) .await?; diff --git a/meilisearch/src/routes/indexes/search.rs b/meilisearch/src/routes/indexes/search.rs index 985864ba5..836b96147 100644 --- a/meilisearch/src/routes/indexes/search.rs +++ b/meilisearch/src/routes/indexes/search.rs @@ -231,7 +231,7 @@ pub async fn search_with_url_query( let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors, features)?; let _permit = search_queue.try_get_search_permit().await?; let search_result = tokio::task::spawn_blocking(move || { - perform_search(&index, query, search_kind, retrieve_vector) + perform_search(&index, query, search_kind, retrieve_vector, index_scheduler.features()) }) .await?; if let Ok(ref search_result) = search_result { @@ -274,7 +274,7 @@ pub async fn search_with_post( let _permit = search_queue.try_get_search_permit().await?; let search_result = tokio::task::spawn_blocking(move || { - perform_search(&index, query, search_kind, retrieve_vectors) + perform_search(&index, query, search_kind, retrieve_vectors, index_scheduler.features()) }) .await?; if let Ok(ref search_result) = search_result { diff --git a/meilisearch/src/routes/indexes/similar.rs b/meilisearch/src/routes/indexes/similar.rs index 1dd83b09b..5027a473e 100644 --- a/meilisearch/src/routes/indexes/similar.rs +++ b/meilisearch/src/routes/indexes/similar.rs @@ -106,7 +106,14 @@ async fn similar( SearchKind::embedder(&index_scheduler, &index, query.embedder.as_deref(), None)?; tokio::task::spawn_blocking(move || { - perform_similar(&index, query, embedder_name, embedder, retrieve_vectors) + perform_similar( + &index, + query, + embedder_name, + embedder, + retrieve_vectors, + index_scheduler.features(), + ) }) .await? } diff --git a/meilisearch/src/routes/multi_search.rs b/meilisearch/src/routes/multi_search.rs index ae626888d..00431950b 100644 --- a/meilisearch/src/routes/multi_search.rs +++ b/meilisearch/src/routes/multi_search.rs @@ -112,6 +112,7 @@ pub async fn multi_search_with_post( )); } + let features = index_scheduler.features(); let index = index_scheduler .index(&index_uid) .map_err(|err| { @@ -130,7 +131,7 @@ pub async fn multi_search_with_post( .with_index(query_index)?; let search_result = tokio::task::spawn_blocking(move || { - perform_search(&index, query, search_kind, retrieve_vector) + perform_search(&index, query, search_kind, retrieve_vector, features) }) .await .with_index(query_index)?; diff --git a/meilisearch/src/search/federated.rs b/meilisearch/src/search/federated.rs index 6d445eb67..323cbd627 100644 --- a/meilisearch/src/search/federated.rs +++ b/meilisearch/src/search/federated.rs @@ -473,8 +473,14 @@ pub fn perform_federated_search( None => TimeBudget::default(), }; - let (mut search, _is_finite_pagination, _max_total_hits, _offset) = - prepare_search(&index, &rtxn, &query, &search_kind, time_budget)?; + let (mut search, _is_finite_pagination, _max_total_hits, _offset) = prepare_search( + &index, + &rtxn, + &query, + &search_kind, + time_budget, + index_scheduler.features(), + )?; search.scoring_strategy(milli::score_details::ScoringStrategy::Detailed); search.offset(0); diff --git a/meilisearch/src/search/mod.rs b/meilisearch/src/search/mod.rs index 2bb1b5774..6624188ce 100644 --- a/meilisearch/src/search/mod.rs +++ b/meilisearch/src/search/mod.rs @@ -7,6 +7,7 @@ use std::time::{Duration, Instant}; use deserr::Deserr; use either::Either; +use index_scheduler::RoFeatures; use indexmap::IndexMap; use meilisearch_auth::IndexSearchRules; use meilisearch_types::deserr::DeserrJsonError; @@ -761,7 +762,8 @@ fn prepare_search<'t>( query: &'t SearchQuery, search_kind: &SearchKind, time_budget: TimeBudget, -) -> Result<(milli::Search<'t>, bool, usize, usize), MeilisearchHttpError> { + features: RoFeatures, +) -> Result<(milli::Search<'t>, bool, usize, usize), ResponseError> { let mut search = index.search(rtxn); search.time_budget(time_budget); if let Some(ranking_score_threshold) = query.ranking_score_threshold { @@ -848,7 +850,7 @@ fn prepare_search<'t>( search.limit(limit); if let Some(ref filter) = query.filter { - if let Some(facets) = parse_filter(filter)? { + if let Some(facets) = parse_filter(filter, Code::InvalidSearchFilter, features)? { search.filter(facets); } } @@ -872,7 +874,8 @@ pub fn perform_search( query: SearchQuery, search_kind: SearchKind, retrieve_vectors: RetrieveVectors, -) -> Result { + features: RoFeatures, +) -> Result { let before_search = Instant::now(); let rtxn = index.read_txn()?; let time_budget = match index.search_cutoff(&rtxn)? { @@ -881,7 +884,7 @@ pub fn perform_search( }; let (search, is_finite_pagination, max_total_hits, offset) = - prepare_search(index, &rtxn, &query, &search_kind, time_budget)?; + prepare_search(index, &rtxn, &query, &search_kind, time_budget, features)?; let ( milli::SearchResult { @@ -1337,7 +1340,8 @@ pub fn perform_facet_search( facet_query: Option, facet_name: String, search_kind: SearchKind, -) -> Result { + features: RoFeatures, +) -> Result { let before_search = Instant::now(); let rtxn = index.read_txn()?; let time_budget = match index.search_cutoff(&rtxn)? { @@ -1345,7 +1349,8 @@ pub fn perform_facet_search( None => TimeBudget::default(), }; - let (search, _, _, _) = prepare_search(index, &rtxn, &search_query, &search_kind, time_budget)?; + let (search, _, _, _) = + prepare_search(index, &rtxn, &search_query, &search_kind, time_budget, features)?; let mut facet_search = SearchForFacetValues::new( facet_name, search, @@ -1371,6 +1376,7 @@ pub fn perform_similar( embedder_name: String, embedder: Arc, retrieve_vectors: RetrieveVectors, + features: RoFeatures, ) -> Result { let before_search = Instant::now(); let rtxn = index.read_txn()?; @@ -1401,10 +1407,7 @@ pub fn perform_similar( milli::Similar::new(internal_id, offset, limit, index, &rtxn, embedder_name, embedder); if let Some(ref filter) = query.filter { - if let Some(facets) = parse_filter(filter) - // inject InvalidSimilarFilter code - .map_err(|e| ResponseError::from_msg(e.to_string(), Code::InvalidSimilarFilter))? - { + if let Some(facets) = parse_filter(filter, Code::InvalidSimilarFilter, features)? { similar.filter(facets); } } @@ -1760,15 +1763,33 @@ fn format_value( } } -pub(crate) fn parse_filter(facets: &Value) -> Result, MeilisearchHttpError> { - match facets { - Value::String(expr) => { - let condition = Filter::from_str(expr)?; - Ok(condition) +pub(crate) fn parse_filter( + facets: &Value, + filter_parsing_error_code: Code, + features: RoFeatures, +) -> Result, ResponseError> { + let filter = match facets { + Value::String(expr) => Filter::from_str(expr).map_err(|e| e.into()), + Value::Array(arr) => parse_filter_array(arr).map_err(|e| e.into()), + v => Err(MeilisearchHttpError::InvalidExpression(&["String", "Array"], v.clone()).into()), + }; + let filter = filter.map_err(|err: ResponseError| { + ResponseError::from_msg(err.to_string(), filter_parsing_error_code) + })?; + + if let Some(ref filter) = filter { + // If the contains operator is used while the contains filter features is not enabled, errors out + if let Some((token, error)) = + filter.use_contains_operator().zip(features.check_contains_filter().err()) + { + return Err(ResponseError::from_msg( + token.as_external_error(error).to_string(), + Code::FeatureNotEnabled, + )); } - Value::Array(arr) => parse_filter_array(arr), - v => Err(MeilisearchHttpError::InvalidExpression(&["String", "Array"], v.clone())), } + + Ok(filter) } fn parse_filter_array(arr: &[Value]) -> Result, MeilisearchHttpError> { diff --git a/meilisearch/tests/common/mod.rs b/meilisearch/tests/common/mod.rs index 1317dbce7..adcef9fe6 100644 --- a/meilisearch/tests/common/mod.rs +++ b/meilisearch/tests/common/mod.rs @@ -26,6 +26,15 @@ impl Value { panic!("Didn't find any task id in: {self}"); } } + + // Panic if the json doesn't contain the `status` field set to "succeeded" + #[track_caller] + pub fn succeeded(&self) -> &Self { + if self["status"] != serde_json::Value::String(String::from("succeeded")) { + panic!("Called succeeded on {}", serde_json::to_string_pretty(&self.0).unwrap()); + } + self + } } impl From for Value { diff --git a/meilisearch/tests/documents/errors.rs b/meilisearch/tests/documents/errors.rs index 055f6512f..8db761cb8 100644 --- a/meilisearch/tests/documents/errors.rs +++ b/meilisearch/tests/documents/errors.rs @@ -168,7 +168,7 @@ async fn get_all_documents_bad_filter() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `doggo`.\n1:6 doggo", + "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `doggo`.\n1:6 doggo", "code": "invalid_document_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_document_filter" @@ -569,7 +569,7 @@ async fn delete_document_by_filter() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `hello`.\n1:6 hello", + "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `hello`.\n1:6 hello", "code": "invalid_document_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_document_filter" @@ -776,7 +776,7 @@ async fn fetch_document_by_filter() { snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { - "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `cool doggo`.\n1:11 cool doggo", + "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `cool doggo`.\n1:11 cool doggo", "code": "invalid_document_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_document_filter" diff --git a/meilisearch/tests/documents/get_documents.rs b/meilisearch/tests/documents/get_documents.rs index f32adf48d..38e2570b3 100644 --- a/meilisearch/tests/documents/get_documents.rs +++ b/meilisearch/tests/documents/get_documents.rs @@ -536,7 +536,8 @@ async fn get_document_with_vectors() { "vectorStore": true, "metrics": false, "logsRoute": false, - "editDocumentsByFunction": false + "editDocumentsByFunction": false, + "containsFilter": false } "###); diff --git a/meilisearch/tests/dumps/mod.rs b/meilisearch/tests/dumps/mod.rs index 8c53e661b..92f72fe78 100644 --- a/meilisearch/tests/dumps/mod.rs +++ b/meilisearch/tests/dumps/mod.rs @@ -1860,7 +1860,8 @@ async fn import_dump_v6_containing_experimental_features() { "vectorStore": false, "metrics": false, "logsRoute": false, - "editDocumentsByFunction": false + "editDocumentsByFunction": false, + "containsFilter": false } "###); @@ -1954,7 +1955,8 @@ async fn generate_and_import_dump_containing_vectors() { "vectorStore": true, "metrics": false, "logsRoute": false, - "editDocumentsByFunction": false + "editDocumentsByFunction": false, + "containsFilter": false } "###); let index = server.index("pets"); @@ -2025,7 +2027,8 @@ async fn generate_and_import_dump_containing_vectors() { "vectorStore": true, "metrics": false, "logsRoute": false, - "editDocumentsByFunction": false + "editDocumentsByFunction": false, + "containsFilter": false } "###); diff --git a/meilisearch/tests/features/mod.rs b/meilisearch/tests/features/mod.rs index b90e455c5..2473a345a 100644 --- a/meilisearch/tests/features/mod.rs +++ b/meilisearch/tests/features/mod.rs @@ -21,7 +21,8 @@ async fn experimental_features() { "vectorStore": false, "metrics": false, "logsRoute": false, - "editDocumentsByFunction": false + "editDocumentsByFunction": false, + "containsFilter": false } "###); @@ -33,7 +34,8 @@ async fn experimental_features() { "vectorStore": true, "metrics": false, "logsRoute": false, - "editDocumentsByFunction": false + "editDocumentsByFunction": false, + "containsFilter": false } "###); @@ -45,7 +47,8 @@ async fn experimental_features() { "vectorStore": true, "metrics": false, "logsRoute": false, - "editDocumentsByFunction": false + "editDocumentsByFunction": false, + "containsFilter": false } "###); @@ -58,7 +61,8 @@ async fn experimental_features() { "vectorStore": true, "metrics": false, "logsRoute": false, - "editDocumentsByFunction": false + "editDocumentsByFunction": false, + "containsFilter": false } "###); @@ -71,7 +75,8 @@ async fn experimental_features() { "vectorStore": true, "metrics": false, "logsRoute": false, - "editDocumentsByFunction": false + "editDocumentsByFunction": false, + "containsFilter": false } "###); } @@ -91,7 +96,8 @@ async fn experimental_feature_metrics() { "vectorStore": false, "metrics": true, "logsRoute": false, - "editDocumentsByFunction": false + "editDocumentsByFunction": false, + "containsFilter": false } "###); @@ -146,7 +152,7 @@ async fn errors() { meili_snap::snapshot!(code, @"400 Bad Request"); meili_snap::snapshot!(meili_snap::json_string!(response), @r###" { - "message": "Unknown field `NotAFeature`: expected one of `vectorStore`, `metrics`, `logsRoute`, `editDocumentsByFunction`", + "message": "Unknown field `NotAFeature`: expected one of `vectorStore`, `metrics`, `logsRoute`, `editDocumentsByFunction`, `containsFilter`", "code": "bad_request", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#bad_request" diff --git a/meilisearch/tests/search/errors.rs b/meilisearch/tests/search/errors.rs index b615902c2..29f5791ee 100644 --- a/meilisearch/tests/search/errors.rs +++ b/meilisearch/tests/search/errors.rs @@ -645,19 +645,20 @@ async fn filter_invalid_syntax_object() { index.update_settings(json!({"filterableAttributes": ["title"]})).await; let documents = DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(1).await; + let (task, _code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await; - let expected_response = json!({ - "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", - "code": "invalid_search_filter", - "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#invalid_search_filter" - }); index .search(json!({"filter": "title & Glass"}), |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); + snapshot!(response, @r###" + { + "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", + "code": "invalid_search_filter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_search_filter" + } + "###); + snapshot!(code, @"400 Bad Request"); }) .await; } @@ -670,19 +671,20 @@ async fn filter_invalid_syntax_array() { index.update_settings(json!({"filterableAttributes": ["title"]})).await; let documents = DOCUMENTS.clone(); - index.add_documents(documents, None).await; - index.wait_task(1).await; + let (task, _code) = index.add_documents(documents, None).await; + index.wait_task(task.uid()).await; - let expected_response = json!({ - "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", - "code": "invalid_search_filter", - "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#invalid_search_filter" - }); index .search(json!({"filter": ["title & Glass"]}), |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); + snapshot!(response, @r###" + { + "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", + "code": "invalid_search_filter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_search_filter" + } + "###); + snapshot!(code, @"400 Bad Request"); }) .await; } @@ -1203,3 +1205,68 @@ async fn distinct_at_search_time() { } "###); } + +#[actix_rt::test] +async fn search_with_contains_without_enabling_the_feature() { + // Since a filter is deserialized as a json Value it will never fail to deserialize. + // Thus the error message is not generated by deserr but written by us. + let server = Server::new().await; + let index = server.index("doggo"); + // Also, to trigger the error message we need to effectively create the index or else it'll throw an + // index does not exists error. + let (task, _code) = index.create(None).await; + server.wait_task(task.uid()).await.succeeded(); + + index + .search(json!({ "filter": "doggo CONTAINS kefir" }), |response, code| { + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "Using `CONTAINS` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n16:21 doggo CONTAINS kefir", + "code": "feature_not_enabled", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#feature_not_enabled" + } + "###); + }) + .await; + index + .search(json!({ "filter": "doggo != echo AND doggo CONTAINS kefir" }), |response, code| { + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "Using `CONTAINS` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n34:39 doggo != echo AND doggo CONTAINS kefir", + "code": "feature_not_enabled", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#feature_not_enabled" + } + "###); + }) + .await; + + // For the post search we can also use the arrays syntaxes + let (response, code) = + index.search_post(json!({ "filter": ["doggo != echo", "doggo CONTAINS kefir"] })).await; + + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "Using `CONTAINS` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n16:21 doggo CONTAINS kefir", + "code": "feature_not_enabled", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#feature_not_enabled" + } + "###); + let (response, code) = + index.search_post(json!({ "filter": ["doggo != echo", ["doggo CONTAINS kefir"]] })).await; + + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "Using `CONTAINS` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n16:21 doggo CONTAINS kefir", + "code": "feature_not_enabled", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#feature_not_enabled" + } + "###); +} diff --git a/meilisearch/tests/search/hybrid.rs b/meilisearch/tests/search/hybrid.rs index 1a4e8baab..95d3bb933 100644 --- a/meilisearch/tests/search/hybrid.rs +++ b/meilisearch/tests/search/hybrid.rs @@ -19,7 +19,8 @@ async fn index_with_documents_user_provided<'a>( "vectorStore": true, "metrics": false, "logsRoute": false, - "editDocumentsByFunction": false + "editDocumentsByFunction": false, + "containsFilter": false } "###); @@ -48,7 +49,8 @@ async fn index_with_documents_hf<'a>(server: &'a Server, documents: &Value) -> I "vectorStore": true, "metrics": false, "logsRoute": false, - "editDocumentsByFunction": false + "editDocumentsByFunction": false, + "containsFilter": false } "###); diff --git a/meilisearch/tests/search/mod.rs b/meilisearch/tests/search/mod.rs index 181e0937f..7f4648e57 100644 --- a/meilisearch/tests/search/mod.rs +++ b/meilisearch/tests/search/mod.rs @@ -13,9 +13,11 @@ mod pagination; mod restrict_searchable; mod search_queue; +use meilisearch::Opt; use once_cell::sync::Lazy; +use tempfile::TempDir; -use crate::common::{Server, Value}; +use crate::common::{default_settings, Server, Value}; use crate::json; static DOCUMENTS: Lazy = Lazy::new(|| { @@ -576,6 +578,32 @@ async fn search_with_filter_array_notation() { assert_eq!(response["hits"].as_array().unwrap().len(), 3); } +#[actix_rt::test] +async fn search_with_contains_filter() { + let temp = TempDir::new().unwrap(); + let server = Server::new_with_options(Opt { + experimental_contains_filter: true, + ..default_settings(temp.path()) + }) + .await + .unwrap(); + let index = server.index("movies"); + + index.update_settings(json!({"filterableAttributes": ["title"]})).await; + + let documents = DOCUMENTS.clone(); + let (request, _code) = index.add_documents(documents, None).await; + index.wait_task(request.uid()).await.succeeded(); + + let (response, code) = index + .search_post(json!({ + "filter": "title CONTAINS cap" + })) + .await; + assert_eq!(code, 200, "{}", response); + assert_eq!(response["hits"].as_array().unwrap().len(), 2); +} + #[actix_rt::test] async fn search_with_sort_on_numbers() { let server = Server::new().await; diff --git a/meilisearch/tests/settings/get_settings.rs b/meilisearch/tests/settings/get_settings.rs index d1fe71140..5571a1e03 100644 --- a/meilisearch/tests/settings/get_settings.rs +++ b/meilisearch/tests/settings/get_settings.rs @@ -99,7 +99,8 @@ async fn secrets_are_hidden_in_settings() { "vectorStore": true, "metrics": false, "logsRoute": false, - "editDocumentsByFunction": false + "editDocumentsByFunction": false, + "containsFilter": false } "###); diff --git a/meilisearch/tests/similar/errors.rs b/meilisearch/tests/similar/errors.rs index a6d7a3da6..d0be6562f 100644 --- a/meilisearch/tests/similar/errors.rs +++ b/meilisearch/tests/similar/errors.rs @@ -360,16 +360,17 @@ async fn filter_invalid_syntax_object() { snapshot!(code, @"202 Accepted"); index.wait_task(value.uid()).await; - let expected_response = json!({ - "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", - "code": "invalid_similar_filter", - "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#invalid_similar_filter" - }); index .similar(json!({"id": 287947, "filter": "title & Glass"}), |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); + snapshot!(response, @r###" + { + "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", + "code": "invalid_similar_filter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_similar_filter" + } + "###); + snapshot!(code, @"400 Bad Request"); }) .await; } @@ -398,16 +399,17 @@ async fn filter_invalid_syntax_array() { snapshot!(code, @"202 Accepted"); index.wait_task(value.uid()).await; - let expected_response = json!({ - "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", - "code": "invalid_similar_filter", - "type": "invalid_request", - "link": "https://docs.meilisearch.com/errors#invalid_similar_filter" - }); index .similar(json!({"id": 287947, "filter": ["title & Glass"]}), |response, code| { - assert_eq!(response, expected_response); - assert_eq!(code, 400); + snapshot!(response, @r###" + { + "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", + "code": "invalid_similar_filter", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_similar_filter" + } + "###); + snapshot!(code, @"400 Bad Request"); }) .await; } diff --git a/meilisearch/tests/similar/mod.rs b/meilisearch/tests/similar/mod.rs index edfecf83c..b4c95b059 100644 --- a/meilisearch/tests/similar/mod.rs +++ b/meilisearch/tests/similar/mod.rs @@ -56,7 +56,8 @@ async fn basic() { "vectorStore": true, "metrics": false, "logsRoute": false, - "editDocumentsByFunction": false + "editDocumentsByFunction": false, + "containsFilter": false } "###); @@ -246,7 +247,8 @@ async fn ranking_score_threshold() { "vectorStore": true, "metrics": false, "logsRoute": false, - "editDocumentsByFunction": false + "editDocumentsByFunction": false, + "containsFilter": false } "###); @@ -526,7 +528,8 @@ async fn filter() { "vectorStore": true, "metrics": false, "logsRoute": false, - "editDocumentsByFunction": false + "editDocumentsByFunction": false, + "containsFilter": false } "###); @@ -654,7 +657,8 @@ async fn limit_and_offset() { "vectorStore": true, "metrics": false, "logsRoute": false, - "editDocumentsByFunction": false + "editDocumentsByFunction": false, + "containsFilter": false } "###); diff --git a/meilisearch/tests/vector/mod.rs b/meilisearch/tests/vector/mod.rs index 21b643c20..a17ea1d4e 100644 --- a/meilisearch/tests/vector/mod.rs +++ b/meilisearch/tests/vector/mod.rs @@ -17,7 +17,8 @@ async fn add_remove_user_provided() { "vectorStore": true, "metrics": false, "logsRoute": false, - "editDocumentsByFunction": false + "editDocumentsByFunction": false, + "containsFilter": false } "###); @@ -161,7 +162,8 @@ async fn generate_default_user_provided_documents(server: &Server) -> Index { "vectorStore": true, "metrics": false, "logsRoute": false, - "editDocumentsByFunction": false + "editDocumentsByFunction": false, + "containsFilter": false } "###); @@ -519,7 +521,8 @@ async fn add_remove_one_vector_4588() { "vectorStore": true, "metrics": false, "logsRoute": false, - "editDocumentsByFunction": false + "editDocumentsByFunction": false, + "containsFilter": false } "###); diff --git a/meilisearch/tests/vector/settings.rs b/meilisearch/tests/vector/settings.rs index 177e070cd..8fdb858c3 100644 --- a/meilisearch/tests/vector/settings.rs +++ b/meilisearch/tests/vector/settings.rs @@ -15,7 +15,8 @@ async fn update_embedder() { "vectorStore": true, "metrics": false, "logsRoute": false, - "editDocumentsByFunction": false + "editDocumentsByFunction": false, + "containsFilter": false } "###); diff --git a/milli/Cargo.toml b/milli/Cargo.toml index a71d013ff..e635bbcf4 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -38,6 +38,7 @@ heed = { version = "0.20.3", default-features = false, features = [ indexmap = { version = "2.2.6", features = ["serde"] } json-depth-checker = { path = "../json-depth-checker" } levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] } +memchr = "2.5.0" memmap2 = "0.9.4" obkv = "0.2.2" once_cell = "1.19.0" diff --git a/milli/src/index.rs b/milli/src/index.rs index 3886f1857..7d63ba183 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -2142,6 +2142,47 @@ pub(crate) mod tests { ); } + #[test] + fn test_contains() { + let index = TempIndex::new(); + + index + .update_settings(|settings| { + settings.set_filterable_fields(hashset! { S("doggo") }); + }) + .unwrap(); + index + .add_documents(documents!([ + { "id": 0, "doggo": "kefir" }, + { "id": 1, "doggo": "kefirounet" }, + { "id": 2, "doggo": "kefkef" }, + { "id": 3, "doggo": "fifir" }, + { "id": 4, "doggo": "boubou" }, + { "id": 5 }, + ])) + .unwrap(); + + let rtxn = index.read_txn().unwrap(); + let mut search = index.search(&rtxn); + let search_result = search + .filter(Filter::from_str("doggo CONTAINS kefir").unwrap().unwrap()) + .execute() + .unwrap(); + insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[0, 1]>"); + let mut search = index.search(&rtxn); + let search_result = search + .filter(Filter::from_str("doggo CONTAINS KEF").unwrap().unwrap()) + .execute() + .unwrap(); + insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[0, 1, 2]>"); + let mut search = index.search(&rtxn); + let search_result = search + .filter(Filter::from_str("doggo NOT CONTAINS fir").unwrap().unwrap()) + .execute() + .unwrap(); + insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[2, 4, 5]>"); + } + #[test] fn replace_documents_external_ids_and_soft_deletion_check() { use big_s::S; diff --git a/milli/src/search/facet/filter.rs b/milli/src/search/facet/filter.rs index 2fa3dae98..08f554b80 100644 --- a/milli/src/search/facet/filter.rs +++ b/milli/src/search/facet/filter.rs @@ -4,6 +4,8 @@ use std::ops::Bound::{self, Excluded, Included}; use either::Either; pub use filter_parser::{Condition, Error as FPError, FilterCondition, Token}; +use heed::types::LazyDecode; +use memchr::memmem::Finder; use roaring::{MultiOps, RoaringBitmap}; use serde_json::Value; @@ -12,7 +14,11 @@ use crate::error::{Error, UserError}; use crate::heed_codec::facet::{ FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec, OrderedF64Codec, }; -use crate::{distance_between_two_points, lat_lng_to_xyz, FieldId, Index, Result}; +use crate::index::db_name::FACET_ID_STRING_DOCIDS; +use crate::{ + distance_between_two_points, lat_lng_to_xyz, FieldId, Index, InternalError, Result, + SerializationError, +}; /// The maximum number of filters the filter AST can process. const MAX_FILTER_DEPTH: usize = 2000; @@ -218,6 +224,10 @@ impl<'a> Filter<'a> { Ok(Some(Self { condition })) } + + pub fn use_contains_operator(&self) -> Option<&Token> { + self.condition.use_contains_operator() + } } impl<'a> Filter<'a> { @@ -295,6 +305,41 @@ impl<'a> Filter<'a> { let all_ids = index.documents_ids(rtxn)?; return Ok(all_ids - docids); } + Condition::Contains(val) => { + let value = crate::normalize_facet(val.value()); + let finder = Finder::new(&value); + let base = FacetGroupKey { field_id, level: 0, left_bound: "" }; + let docids = strings_db + .prefix_iter(rtxn, &base)? + .remap_data_type::>() + .filter_map(|result| -> Option> { + match result { + Ok((FacetGroupKey { left_bound, .. }, lazy_group_value)) => { + if finder.find(left_bound.as_bytes()).is_some() { + Some(lazy_group_value.decode().map(|gv| gv.bitmap).map_err( + |_| { + InternalError::from(SerializationError::Decoding { + db_name: Some(FACET_ID_STRING_DOCIDS), + }) + .into() + }, + )) + } else { + None + } + } + Err(_e) => { + Some(Err(InternalError::from(SerializationError::Decoding { + db_name: Some(FACET_ID_STRING_DOCIDS), + }) + .into())) + } + } + }) + .union()?; + + return Ok(docids); + } }; let mut output = RoaringBitmap::new();