4804: Implements the experimental contains filter operator r=irevoire a=irevoire

# Pull Request
Related PRD: (private link) https://www.notion.so/meilisearch/Contains-Like-Filter-Operator-0d8ad53c6761466f913432eb1d843f1e
Public usage page: https://meilisearch.notion.site/Contains-filter-operator-usage-3e7421b0aacf45f48ab09abe259a1de6

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/3613

## What does this PR do?
- Extract the contains operator from this PR: https://github.com/meilisearch/meilisearch/pull/3751
- Gate it behind a feature flag
- Add tests


Co-authored-by: Tamo <tamo@meilisearch.com>
This commit is contained in:
meili-bors[bot] 2024-07-17 15:47:11 +00:00 committed by GitHub
commit ea73615abf
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
34 changed files with 487 additions and 121 deletions

1
Cargo.lock generated
View File

@ -3542,6 +3542,7 @@ dependencies = [
"maplit",
"md5",
"meili-snap",
"memchr",
"memmap2",
"mimalloc",
"obkv",

View File

@ -26,6 +26,7 @@ pub enum Condition<'a> {
LowerThan(Token<'a>),
LowerThanOrEqual(Token<'a>),
Between { from: Token<'a>, to: Token<'a> },
Contains { keyword: Token<'a>, word: Token<'a> },
}
/// condition = value ("==" | ">" ...) value
@ -92,6 +93,34 @@ pub fn parse_not_exists(input: Span) -> IResult<FilterCondition> {
Ok((input, FilterCondition::Not(Box::new(FilterCondition::Condition { fid: key, op: Exists }))))
}
/// contains = value "CONTAINS" value
pub fn parse_contains(input: Span) -> IResult<FilterCondition> {
let (input, (fid, contains, value)) =
tuple((parse_value, tag("CONTAINS"), cut(parse_value)))(input)?;
Ok((
input,
FilterCondition::Condition {
fid,
op: Contains { keyword: Token { span: contains, value: None }, word: value },
},
))
}
/// contains = value "NOT" WS+ "CONTAINS" value
pub fn parse_not_contains(input: Span) -> IResult<FilterCondition> {
let keyword = tuple((tag("NOT"), multispace1, tag("CONTAINS")));
let (input, (fid, (_not, _spaces, contains), value)) =
tuple((parse_value, keyword, cut(parse_value)))(input)?;
Ok((
input,
FilterCondition::Not(Box::new(FilterCondition::Condition {
fid,
op: Contains { keyword: Token { span: contains, value: None }, word: value },
})),
))
}
/// to = value value "TO" WS+ value
pub fn parse_to(input: Span) -> IResult<FilterCondition> {
let (input, (key, from, _, _, to)) =

View File

@ -146,7 +146,7 @@ impl<'a> Display for Error<'a> {
}
ErrorKind::InvalidPrimary => {
let text = if input.trim().is_empty() { "but instead got nothing.".to_string() } else { format!("at `{}`.", escaped_input) };
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` {}", text)?
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` {}", text)?
}
ErrorKind::InvalidEscapedNumber => {
writeln!(f, "Found an invalid escaped sequence number: `{}`.", escaped_input)?

View File

@ -48,8 +48,8 @@ use std::fmt::Debug;
pub use condition::{parse_condition, parse_to, Condition};
use condition::{
parse_exists, parse_is_empty, parse_is_not_empty, parse_is_not_null, parse_is_null,
parse_not_exists,
parse_contains, parse_exists, parse_is_empty, parse_is_not_empty, parse_is_not_null,
parse_is_null, parse_not_contains, parse_not_exists,
};
use error::{cut_with_err, ExpectedValueKind, NomErrorExt};
pub use error::{Error, ErrorKind};
@ -147,7 +147,37 @@ pub enum FilterCondition<'a> {
GeoBoundingBox { top_right_point: [Token<'a>; 2], bottom_left_point: [Token<'a>; 2] },
}
pub enum TraversedElement<'a> {
FilterCondition(&'a FilterCondition<'a>),
Condition(&'a Condition<'a>),
}
impl<'a> FilterCondition<'a> {
pub fn use_contains_operator(&self) -> Option<&Token> {
match self {
FilterCondition::Condition { fid: _, op } => match op {
Condition::GreaterThan(_)
| Condition::GreaterThanOrEqual(_)
| Condition::Equal(_)
| Condition::NotEqual(_)
| Condition::Null
| Condition::Empty
| Condition::Exists
| Condition::LowerThan(_)
| Condition::LowerThanOrEqual(_)
| Condition::Between { .. } => None,
Condition::Contains { keyword, word: _ } => Some(keyword),
},
FilterCondition::Not(this) => this.use_contains_operator(),
FilterCondition::Or(seq) | FilterCondition::And(seq) => {
seq.iter().find_map(|filter| filter.use_contains_operator())
}
FilterCondition::GeoLowerThan { .. }
| FilterCondition::GeoBoundingBox { .. }
| FilterCondition::In { .. } => None,
}
}
/// Returns the first token found at the specified depth, `None` if no token at this depth.
pub fn token_at_depth(&self, depth: usize) -> Option<&Token> {
match self {
@ -452,6 +482,8 @@ fn parse_primary(input: Span, depth: usize) -> IResult<FilterCondition> {
parse_exists,
parse_not_exists,
parse_to,
parse_contains,
parse_not_contains,
// the next lines are only for error handling and are written at the end to have the less possible performance impact
parse_geo,
parse_geo_distance,
@ -534,6 +566,7 @@ impl<'a> std::fmt::Display for Condition<'a> {
Condition::LowerThan(token) => write!(f, "< {token}"),
Condition::LowerThanOrEqual(token) => write!(f, "<= {token}"),
Condition::Between { from, to } => write!(f, "{from} TO {to}"),
Condition::Contains { word, keyword: _ } => write!(f, "CONTAINS {word}"),
}
}
}
@ -558,6 +591,7 @@ pub mod tests {
unsafe { Span::new_from_raw_offset(offset, lines as u32, value, "") }.into()
}
#[track_caller]
fn p(s: &str) -> impl std::fmt::Display + '_ {
Fc::parse(s).unwrap().unwrap()
}
@ -639,6 +673,13 @@ pub mod tests {
insta::assert_snapshot!(p("NOT subscribers NOT EXISTS"), @"{subscribers} EXISTS");
insta::assert_snapshot!(p("subscribers NOT EXISTS"), @"NOT ({subscribers} EXISTS)");
// Test CONTAINS + NOT CONTAINS
insta::assert_snapshot!(p("subscribers CONTAINS 'hello'"), @"{subscribers} CONTAINS {hello}");
insta::assert_snapshot!(p("NOT subscribers CONTAINS 'hello'"), @"NOT ({subscribers} CONTAINS {hello})");
insta::assert_snapshot!(p("subscribers NOT CONTAINS hello"), @"NOT ({subscribers} CONTAINS {hello})");
insta::assert_snapshot!(p("NOT subscribers NOT CONTAINS 'hello'"), @"{subscribers} CONTAINS {hello}");
insta::assert_snapshot!(p("subscribers NOT CONTAINS 'hello'"), @"NOT ({subscribers} CONTAINS {hello})");
// Test nested NOT
insta::assert_snapshot!(p("NOT NOT NOT NOT x = 5"), @"{x} = {5}");
insta::assert_snapshot!(p("NOT NOT (NOT NOT x = 5)"), @"{x} = {5}");
@ -710,7 +751,7 @@ pub mod tests {
"###);
insta::assert_snapshot!(p("'OR'"), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`.
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`.
1:5 'OR'
"###);
@ -720,12 +761,12 @@ pub mod tests {
"###);
insta::assert_snapshot!(p("channel Ponce"), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`.
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`.
1:14 channel Ponce
"###);
insta::assert_snapshot!(p("channel = Ponce OR"), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing.
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing.
19:19 channel = Ponce OR
"###);
@ -810,12 +851,12 @@ pub mod tests {
"###);
insta::assert_snapshot!(p("colour NOT EXIST"), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`.
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`.
1:17 colour NOT EXIST
"###);
insta::assert_snapshot!(p("subscribers 100 TO1000"), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`.
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`.
1:23 subscribers 100 TO1000
"###);
@ -878,35 +919,35 @@ pub mod tests {
"###);
insta::assert_snapshot!(p(r#"value NULL"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value NULL`.
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value NULL`.
1:11 value NULL
"###);
insta::assert_snapshot!(p(r#"value NOT NULL"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value NOT NULL`.
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value NOT NULL`.
1:15 value NOT NULL
"###);
insta::assert_snapshot!(p(r#"value EMPTY"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value EMPTY`.
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value EMPTY`.
1:12 value EMPTY
"###);
insta::assert_snapshot!(p(r#"value NOT EMPTY"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value NOT EMPTY`.
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value NOT EMPTY`.
1:16 value NOT EMPTY
"###);
insta::assert_snapshot!(p(r#"value IS"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS`.
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value IS`.
1:9 value IS
"###);
insta::assert_snapshot!(p(r#"value IS NOT"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT`.
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT`.
1:13 value IS NOT
"###);
insta::assert_snapshot!(p(r#"value IS EXISTS"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS EXISTS`.
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value IS EXISTS`.
1:16 value IS EXISTS
"###);
insta::assert_snapshot!(p(r#"value IS NOT EXISTS"#), @r###"
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT EXISTS`.
Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `value IS NOT EXISTS`.
1:20 value IS NOT EXISTS
"###);
}

View File

@ -211,6 +211,7 @@ fn is_keyword(s: &str) -> bool {
| "IS"
| "NULL"
| "EMPTY"
| "CONTAINS"
| "_geoRadius"
| "_geoBoundingBox"
)

View File

@ -81,6 +81,19 @@ impl RoFeatures {
.into())
}
}
pub fn check_contains_filter(&self) -> Result<()> {
if self.runtime.contains_filter {
Ok(())
} else {
Err(FeatureNotEnabledError {
disabled_action: "Using `CONTAINS` in a filter",
feature: "contains filter",
issue_link: "https://github.com/orgs/meilisearch/discussions/763",
}
.into())
}
}
}
impl FeatureData {
@ -92,9 +105,11 @@ impl FeatureData {
let txn = env.read_txn()?;
let persisted_features: RuntimeTogglableFeatures =
runtime_features_db.get(&txn, EXPERIMENTAL_FEATURES)?.unwrap_or_default();
let InstanceTogglableFeatures { metrics, logs_route, contains_filter } = instance_features;
let runtime = Arc::new(RwLock::new(RuntimeTogglableFeatures {
metrics: instance_features.metrics || persisted_features.metrics,
logs_route: instance_features.logs_route || persisted_features.logs_route,
metrics: metrics || persisted_features.metrics,
logs_route: logs_route || persisted_features.logs_route,
contains_filter: contains_filter || persisted_features.contains_filter,
..persisted_features
}));

View File

@ -7,10 +7,12 @@ pub struct RuntimeTogglableFeatures {
pub metrics: bool,
pub logs_route: bool,
pub edit_documents_by_function: bool,
pub contains_filter: bool,
}
#[derive(Default, Debug, Clone, Copy)]
pub struct InstanceTogglableFeatures {
pub metrics: bool,
pub logs_route: bool,
pub contains_filter: bool,
}

View File

@ -261,6 +261,7 @@ impl super::Analytics for SegmentAnalytics {
#[derive(Debug, Clone, Serialize)]
struct Infos {
env: String,
experimental_contains_filter: bool,
experimental_enable_metrics: bool,
experimental_search_queue_size: usize,
experimental_logs_mode: LogMode,
@ -303,6 +304,7 @@ impl From<Opt> for Infos {
// Thus we must not insert `..` at the end.
let Opt {
db_path,
experimental_contains_filter,
experimental_enable_metrics,
experimental_search_queue_size,
experimental_logs_mode,
@ -353,6 +355,7 @@ impl From<Opt> for Infos {
// We consider information sensible if it contains a path, an address, or a key.
Self {
env,
experimental_contains_filter,
experimental_enable_metrics,
experimental_search_queue_size,
experimental_logs_mode,

View File

@ -25,12 +25,12 @@ pub enum MeilisearchHttpError {
DocumentNotFound(String),
#[error("Sending an empty filter is forbidden.")]
EmptyFilter,
#[error("Invalid syntax for the filter parameter: `expected {}, found: {1}`.", .0.join(", "))]
InvalidExpression(&'static [&'static str], Value),
#[error("Using `federationOptions` is not allowed in a non-federated search.\n Hint: remove `federationOptions` from query #{0} or add `federation: {{}}` to the request.")]
FederationOptionsInNonFederatedRequest(usize),
#[error("Inside `.queries[{0}]`: Using pagination options is not allowed in federated queries.\n Hint: remove `{1}` from query #{0} or remove `federation: {{}}` from the request")]
PaginationInFederatedQuery(usize, &'static str),
#[error("Invalid syntax for the filter parameter: `expected {}, found: {1}`.", .0.join(", "))]
InvalidExpression(&'static [&'static str], Value),
#[error("A {0} payload is missing.")]
MissingPayload(PayloadType),
#[error("Too many search requests running at the same time: {0}. Retry after 10s.")]

View File

@ -54,6 +54,7 @@ const MEILI_LOG_LEVEL: &str = "MEILI_LOG_LEVEL";
const MEILI_EXPERIMENTAL_LOGS_MODE: &str = "MEILI_EXPERIMENTAL_LOGS_MODE";
const MEILI_EXPERIMENTAL_REPLICATION_PARAMETERS: &str = "MEILI_EXPERIMENTAL_REPLICATION_PARAMETERS";
const MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE: &str = "MEILI_EXPERIMENTAL_ENABLE_LOGS_ROUTE";
const MEILI_EXPERIMENTAL_CONTAINS_FILTER: &str = "MEILI_EXPERIMENTAL_CONTAINS_FILTER";
const MEILI_EXPERIMENTAL_ENABLE_METRICS: &str = "MEILI_EXPERIMENTAL_ENABLE_METRICS";
const MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE: &str = "MEILI_EXPERIMENTAL_SEARCH_QUEUE_SIZE";
const MEILI_EXPERIMENTAL_REDUCE_INDEXING_MEMORY_USAGE: &str =
@ -339,6 +340,13 @@ pub struct Opt {
#[serde(default)]
pub log_level: LogLevel,
/// Experimental contains filter feature. For more information, see: <https://github.com/orgs/meilisearch/discussions/763>
///
/// Enables the experimental contains filter operator.
#[clap(long, env = MEILI_EXPERIMENTAL_CONTAINS_FILTER)]
#[serde(default)]
pub experimental_contains_filter: bool,
/// Experimental metrics feature. For more information, see: <https://github.com/meilisearch/meilisearch/discussions/3518>
///
/// Enables the Prometheus metrics on the `GET /metrics` endpoint.
@ -483,6 +491,7 @@ impl Opt {
config_file_path: _,
#[cfg(feature = "analytics")]
no_analytics,
experimental_contains_filter,
experimental_enable_metrics,
experimental_search_queue_size,
experimental_logs_mode,
@ -540,6 +549,10 @@ impl Opt {
export_to_env_if_not_present(MEILI_DUMP_DIR, dump_dir);
export_to_env_if_not_present(MEILI_LOG_LEVEL, log_level.to_string());
export_to_env_if_not_present(
MEILI_EXPERIMENTAL_CONTAINS_FILTER,
experimental_contains_filter.to_string(),
);
export_to_env_if_not_present(
MEILI_EXPERIMENTAL_ENABLE_METRICS,
experimental_enable_metrics.to_string(),
@ -617,6 +630,7 @@ impl Opt {
InstanceTogglableFeatures {
metrics: self.experimental_enable_metrics,
logs_route: self.experimental_enable_logs_route,
contains_filter: self.experimental_contains_filter,
}
}
}

View File

@ -49,6 +49,8 @@ pub struct RuntimeTogglableFeatures {
pub logs_route: Option<bool>,
#[deserr(default)]
pub edit_documents_by_function: Option<bool>,
#[deserr(default)]
pub contains_filter: Option<bool>,
}
async fn patch_features(
@ -72,6 +74,7 @@ async fn patch_features(
.0
.edit_documents_by_function
.unwrap_or(old_features.edit_documents_by_function),
contains_filter: new_features.0.contains_filter.unwrap_or(old_features.contains_filter),
};
// explicitly destructure for analytics rather than using the `Serialize` implementation, because
@ -82,6 +85,7 @@ async fn patch_features(
metrics,
logs_route,
edit_documents_by_function,
contains_filter,
} = new_features;
analytics.publish(
@ -91,6 +95,7 @@ async fn patch_features(
"metrics": metrics,
"logs_route": logs_route,
"edit_documents_by_function": edit_documents_by_function,
"contains_filter": contains_filter,
}),
Some(&req),
);

View File

@ -7,7 +7,7 @@ use bstr::ByteSlice as _;
use deserr::actix_web::{AwebJson, AwebQueryParameter};
use deserr::Deserr;
use futures::StreamExt;
use index_scheduler::{IndexScheduler, TaskId};
use index_scheduler::{IndexScheduler, RoFeatures, TaskId};
use meilisearch_types::deserr::query_params::Param;
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
use meilisearch_types::document_formats::{read_csv, read_json, read_ndjson, PayloadType};
@ -260,8 +260,15 @@ fn documents_by_query(
let retrieve_vectors = RetrieveVectors::new(retrieve_vectors, features)?;
let index = index_scheduler.index(&index_uid)?;
let (total, documents) =
retrieve_documents(&index, offset, limit, filter, fields, retrieve_vectors)?;
let (total, documents) = retrieve_documents(
&index,
offset,
limit,
filter,
fields,
retrieve_vectors,
index_scheduler.features(),
)?;
let ret = PaginationView::new(offset, limit, total as usize, documents);
@ -565,11 +572,9 @@ pub async fn delete_documents_by_filter(
analytics.delete_documents(DocumentDeletionKind::PerFilter, &req);
// we ensure the filter is well formed before enqueuing it
|| -> Result<_, ResponseError> {
Ok(crate::search::parse_filter(&filter)?.ok_or(MeilisearchHttpError::EmptyFilter)?)
}()
// and whatever was the error, the error code should always be an InvalidDocumentFilter
.map_err(|err| ResponseError::from_msg(err.message, Code::InvalidDocumentFilter))?;
crate::search::parse_filter(&filter, Code::InvalidDocumentFilter, index_scheduler.features())?
.ok_or(MeilisearchHttpError::EmptyFilter)?;
let task = KindWithContent::DocumentDeletionByFilter { index_uid, filter_expr: filter };
let uid = get_task_id(&req, &opt)?;
@ -626,11 +631,12 @@ pub async fn edit_documents_by_function(
if let Some(ref filter) = filter {
// we ensure the filter is well formed before enqueuing it
|| -> Result<_, ResponseError> {
Ok(crate::search::parse_filter(filter)?.ok_or(MeilisearchHttpError::EmptyFilter)?)
}()
// and whatever was the error, the error code should always be an InvalidDocumentFilter
.map_err(|err| ResponseError::from_msg(err.message, Code::InvalidDocumentFilter))?;
crate::search::parse_filter(
filter,
Code::InvalidDocumentFilter,
index_scheduler.features(),
)?
.ok_or(MeilisearchHttpError::EmptyFilter)?;
}
let task = KindWithContent::DocumentEdition {
index_uid,
@ -736,12 +742,12 @@ fn retrieve_documents<S: AsRef<str>>(
filter: Option<Value>,
attributes_to_retrieve: Option<Vec<S>>,
retrieve_vectors: RetrieveVectors,
features: RoFeatures,
) -> Result<(u64, Vec<Document>), ResponseError> {
let rtxn = index.read_txn()?;
let filter = &filter;
let filter = if let Some(filter) = filter {
parse_filter(filter)
.map_err(|err| ResponseError::from_msg(err.to_string(), Code::InvalidDocumentFilter))?
parse_filter(filter, Code::InvalidDocumentFilter, features)?
} else {
None
};

View File

@ -79,7 +79,14 @@ pub async fn search(
let search_kind = search_kind(&search_query, &index_scheduler, &index, features)?;
let _permit = search_queue.try_get_search_permit().await?;
let search_result = tokio::task::spawn_blocking(move || {
perform_facet_search(&index, search_query, facet_query, facet_name, search_kind)
perform_facet_search(
&index,
search_query,
facet_query,
facet_name,
search_kind,
index_scheduler.features(),
)
})
.await?;

View File

@ -231,7 +231,7 @@ pub async fn search_with_url_query(
let retrieve_vector = RetrieveVectors::new(query.retrieve_vectors, features)?;
let _permit = search_queue.try_get_search_permit().await?;
let search_result = tokio::task::spawn_blocking(move || {
perform_search(&index, query, search_kind, retrieve_vector)
perform_search(&index, query, search_kind, retrieve_vector, index_scheduler.features())
})
.await?;
if let Ok(ref search_result) = search_result {
@ -274,7 +274,7 @@ pub async fn search_with_post(
let _permit = search_queue.try_get_search_permit().await?;
let search_result = tokio::task::spawn_blocking(move || {
perform_search(&index, query, search_kind, retrieve_vectors)
perform_search(&index, query, search_kind, retrieve_vectors, index_scheduler.features())
})
.await?;
if let Ok(ref search_result) = search_result {

View File

@ -106,7 +106,14 @@ async fn similar(
SearchKind::embedder(&index_scheduler, &index, query.embedder.as_deref(), None)?;
tokio::task::spawn_blocking(move || {
perform_similar(&index, query, embedder_name, embedder, retrieve_vectors)
perform_similar(
&index,
query,
embedder_name,
embedder,
retrieve_vectors,
index_scheduler.features(),
)
})
.await?
}

View File

@ -130,7 +130,7 @@ pub async fn multi_search_with_post(
.with_index(query_index)?;
let search_result = tokio::task::spawn_blocking(move || {
perform_search(&index, query, search_kind, retrieve_vector)
perform_search(&index, query, search_kind, retrieve_vector, features)
})
.await
.with_index(query_index)?;

View File

@ -474,7 +474,7 @@ pub fn perform_federated_search(
};
let (mut search, _is_finite_pagination, _max_total_hits, _offset) =
prepare_search(&index, &rtxn, &query, &search_kind, time_budget)?;
prepare_search(&index, &rtxn, &query, &search_kind, time_budget, features)?;
search.scoring_strategy(milli::score_details::ScoringStrategy::Detailed);
search.offset(0);

View File

@ -7,6 +7,7 @@ use std::time::{Duration, Instant};
use deserr::Deserr;
use either::Either;
use index_scheduler::RoFeatures;
use indexmap::IndexMap;
use meilisearch_auth::IndexSearchRules;
use meilisearch_types::deserr::DeserrJsonError;
@ -761,7 +762,8 @@ fn prepare_search<'t>(
query: &'t SearchQuery,
search_kind: &SearchKind,
time_budget: TimeBudget,
) -> Result<(milli::Search<'t>, bool, usize, usize), MeilisearchHttpError> {
features: RoFeatures,
) -> Result<(milli::Search<'t>, bool, usize, usize), ResponseError> {
let mut search = index.search(rtxn);
search.time_budget(time_budget);
if let Some(ranking_score_threshold) = query.ranking_score_threshold {
@ -848,7 +850,7 @@ fn prepare_search<'t>(
search.limit(limit);
if let Some(ref filter) = query.filter {
if let Some(facets) = parse_filter(filter)? {
if let Some(facets) = parse_filter(filter, Code::InvalidSearchFilter, features)? {
search.filter(facets);
}
}
@ -872,7 +874,8 @@ pub fn perform_search(
query: SearchQuery,
search_kind: SearchKind,
retrieve_vectors: RetrieveVectors,
) -> Result<SearchResult, MeilisearchHttpError> {
features: RoFeatures,
) -> Result<SearchResult, ResponseError> {
let before_search = Instant::now();
let rtxn = index.read_txn()?;
let time_budget = match index.search_cutoff(&rtxn)? {
@ -881,7 +884,7 @@ pub fn perform_search(
};
let (search, is_finite_pagination, max_total_hits, offset) =
prepare_search(index, &rtxn, &query, &search_kind, time_budget)?;
prepare_search(index, &rtxn, &query, &search_kind, time_budget, features)?;
let (
milli::SearchResult {
@ -1337,7 +1340,8 @@ pub fn perform_facet_search(
facet_query: Option<String>,
facet_name: String,
search_kind: SearchKind,
) -> Result<FacetSearchResult, MeilisearchHttpError> {
features: RoFeatures,
) -> Result<FacetSearchResult, ResponseError> {
let before_search = Instant::now();
let rtxn = index.read_txn()?;
let time_budget = match index.search_cutoff(&rtxn)? {
@ -1345,7 +1349,8 @@ pub fn perform_facet_search(
None => TimeBudget::default(),
};
let (search, _, _, _) = prepare_search(index, &rtxn, &search_query, &search_kind, time_budget)?;
let (search, _, _, _) =
prepare_search(index, &rtxn, &search_query, &search_kind, time_budget, features)?;
let mut facet_search = SearchForFacetValues::new(
facet_name,
search,
@ -1371,6 +1376,7 @@ pub fn perform_similar(
embedder_name: String,
embedder: Arc<Embedder>,
retrieve_vectors: RetrieveVectors,
features: RoFeatures,
) -> Result<SimilarResult, ResponseError> {
let before_search = Instant::now();
let rtxn = index.read_txn()?;
@ -1401,10 +1407,7 @@ pub fn perform_similar(
milli::Similar::new(internal_id, offset, limit, index, &rtxn, embedder_name, embedder);
if let Some(ref filter) = query.filter {
if let Some(facets) = parse_filter(filter)
// inject InvalidSimilarFilter code
.map_err(|e| ResponseError::from_msg(e.to_string(), Code::InvalidSimilarFilter))?
{
if let Some(facets) = parse_filter(filter, Code::InvalidSimilarFilter, features)? {
similar.filter(facets);
}
}
@ -1760,15 +1763,33 @@ fn format_value(
}
}
pub(crate) fn parse_filter(facets: &Value) -> Result<Option<Filter>, MeilisearchHttpError> {
match facets {
Value::String(expr) => {
let condition = Filter::from_str(expr)?;
Ok(condition)
pub(crate) fn parse_filter(
facets: &Value,
filter_parsing_error_code: Code,
features: RoFeatures,
) -> Result<Option<Filter>, ResponseError> {
let filter = match facets {
Value::String(expr) => Filter::from_str(expr).map_err(|e| e.into()),
Value::Array(arr) => parse_filter_array(arr).map_err(|e| e.into()),
v => Err(MeilisearchHttpError::InvalidExpression(&["String", "Array"], v.clone()).into()),
};
let filter = filter.map_err(|err: ResponseError| {
ResponseError::from_msg(err.to_string(), filter_parsing_error_code)
})?;
if let Some(ref filter) = filter {
// If the contains operator is used while the contains filter features is not enabled, errors out
if let Some((token, error)) =
filter.use_contains_operator().zip(features.check_contains_filter().err())
{
return Err(ResponseError::from_msg(
token.as_external_error(error).to_string(),
Code::FeatureNotEnabled,
));
}
Value::Array(arr) => parse_filter_array(arr),
v => Err(MeilisearchHttpError::InvalidExpression(&["String", "Array"], v.clone())),
}
Ok(filter)
}
fn parse_filter_array(arr: &[Value]) -> Result<Option<Filter>, MeilisearchHttpError> {

View File

@ -26,6 +26,15 @@ impl Value {
panic!("Didn't find any task id in: {self}");
}
}
// Panic if the json doesn't contain the `status` field set to "succeeded"
#[track_caller]
pub fn succeeded(&self) -> &Self {
if self["status"] != serde_json::Value::String(String::from("succeeded")) {
panic!("Called succeeded on {}", serde_json::to_string_pretty(&self.0).unwrap());
}
self
}
}
impl From<serde_json::Value> for Value {

View File

@ -168,7 +168,7 @@ async fn get_all_documents_bad_filter() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `doggo`.\n1:6 doggo",
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `doggo`.\n1:6 doggo",
"code": "invalid_document_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
@ -569,7 +569,7 @@ async fn delete_document_by_filter() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `hello`.\n1:6 hello",
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `hello`.\n1:6 hello",
"code": "invalid_document_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
@ -776,7 +776,7 @@ async fn fetch_document_by_filter() {
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `cool doggo`.\n1:11 cool doggo",
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `cool doggo`.\n1:11 cool doggo",
"code": "invalid_document_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"

View File

@ -536,7 +536,8 @@ async fn get_document_with_vectors() {
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"editDocumentsByFunction": false
"editDocumentsByFunction": false,
"containsFilter": false
}
"###);

View File

@ -1860,7 +1860,8 @@ async fn import_dump_v6_containing_experimental_features() {
"vectorStore": false,
"metrics": false,
"logsRoute": false,
"editDocumentsByFunction": false
"editDocumentsByFunction": false,
"containsFilter": false
}
"###);
@ -1954,7 +1955,8 @@ async fn generate_and_import_dump_containing_vectors() {
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"editDocumentsByFunction": false
"editDocumentsByFunction": false,
"containsFilter": false
}
"###);
let index = server.index("pets");
@ -2025,7 +2027,8 @@ async fn generate_and_import_dump_containing_vectors() {
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"editDocumentsByFunction": false
"editDocumentsByFunction": false,
"containsFilter": false
}
"###);

View File

@ -21,7 +21,8 @@ async fn experimental_features() {
"vectorStore": false,
"metrics": false,
"logsRoute": false,
"editDocumentsByFunction": false
"editDocumentsByFunction": false,
"containsFilter": false
}
"###);
@ -33,7 +34,8 @@ async fn experimental_features() {
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"editDocumentsByFunction": false
"editDocumentsByFunction": false,
"containsFilter": false
}
"###);
@ -45,7 +47,8 @@ async fn experimental_features() {
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"editDocumentsByFunction": false
"editDocumentsByFunction": false,
"containsFilter": false
}
"###);
@ -58,7 +61,8 @@ async fn experimental_features() {
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"editDocumentsByFunction": false
"editDocumentsByFunction": false,
"containsFilter": false
}
"###);
@ -71,7 +75,8 @@ async fn experimental_features() {
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"editDocumentsByFunction": false
"editDocumentsByFunction": false,
"containsFilter": false
}
"###);
}
@ -91,7 +96,8 @@ async fn experimental_feature_metrics() {
"vectorStore": false,
"metrics": true,
"logsRoute": false,
"editDocumentsByFunction": false
"editDocumentsByFunction": false,
"containsFilter": false
}
"###);
@ -146,7 +152,7 @@ async fn errors() {
meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "Unknown field `NotAFeature`: expected one of `vectorStore`, `metrics`, `logsRoute`, `editDocumentsByFunction`",
"message": "Unknown field `NotAFeature`: expected one of `vectorStore`, `metrics`, `logsRoute`, `editDocumentsByFunction`, `containsFilter`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"

View File

@ -645,19 +645,20 @@ async fn filter_invalid_syntax_object() {
index.update_settings(json!({"filterableAttributes": ["title"]})).await;
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(1).await;
let (task, _code) = index.add_documents(documents, None).await;
index.wait_task(task.uid()).await;
let expected_response = json!({
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
});
index
.search(json!({"filter": "title & Glass"}), |response, code| {
assert_eq!(response, expected_response);
assert_eq!(code, 400);
snapshot!(response, @r###"
{
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
}
"###);
snapshot!(code, @"400 Bad Request");
})
.await;
}
@ -670,19 +671,20 @@ async fn filter_invalid_syntax_array() {
index.update_settings(json!({"filterableAttributes": ["title"]})).await;
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(1).await;
let (task, _code) = index.add_documents(documents, None).await;
index.wait_task(task.uid()).await;
let expected_response = json!({
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
});
index
.search(json!({"filter": ["title & Glass"]}), |response, code| {
assert_eq!(response, expected_response);
assert_eq!(code, 400);
snapshot!(response, @r###"
{
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
}
"###);
snapshot!(code, @"400 Bad Request");
})
.await;
}
@ -1203,3 +1205,68 @@ async fn distinct_at_search_time() {
}
"###);
}
#[actix_rt::test]
async fn search_with_contains_without_enabling_the_feature() {
// Since a filter is deserialized as a json Value it will never fail to deserialize.
// Thus the error message is not generated by deserr but written by us.
let server = Server::new().await;
let index = server.index("doggo");
// Also, to trigger the error message we need to effectively create the index or else it'll throw an
// index does not exists error.
let (task, _code) = index.create(None).await;
server.wait_task(task.uid()).await.succeeded();
index
.search(json!({ "filter": "doggo CONTAINS kefir" }), |response, code| {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Using `CONTAINS` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
})
.await;
index
.search(json!({ "filter": "doggo != echo AND doggo CONTAINS kefir" }), |response, code| {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Using `CONTAINS` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n25:33 doggo != echo AND doggo CONTAINS kefir",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
})
.await;
// For the post search we can also use the arrays syntaxes
let (response, code) =
index.search_post(json!({ "filter": ["doggo != echo", "doggo CONTAINS kefir"] })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Using `CONTAINS` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
let (response, code) =
index.search_post(json!({ "filter": ["doggo != echo", ["doggo CONTAINS kefir"]] })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Using `CONTAINS` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
}

View File

@ -19,7 +19,8 @@ async fn index_with_documents_user_provided<'a>(
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"editDocumentsByFunction": false
"editDocumentsByFunction": false,
"containsFilter": false
}
"###);
@ -48,7 +49,8 @@ async fn index_with_documents_hf<'a>(server: &'a Server, documents: &Value) -> I
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"editDocumentsByFunction": false
"editDocumentsByFunction": false,
"containsFilter": false
}
"###);

View File

@ -13,9 +13,11 @@ mod pagination;
mod restrict_searchable;
mod search_queue;
use meilisearch::Opt;
use once_cell::sync::Lazy;
use tempfile::TempDir;
use crate::common::{Server, Value};
use crate::common::{default_settings, Server, Value};
use crate::json;
static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
@ -576,6 +578,32 @@ async fn search_with_filter_array_notation() {
assert_eq!(response["hits"].as_array().unwrap().len(), 3);
}
#[actix_rt::test]
async fn search_with_contains_filter() {
let temp = TempDir::new().unwrap();
let server = Server::new_with_options(Opt {
experimental_contains_filter: true,
..default_settings(temp.path())
})
.await
.unwrap();
let index = server.index("movies");
index.update_settings(json!({"filterableAttributes": ["title"]})).await;
let documents = DOCUMENTS.clone();
let (request, _code) = index.add_documents(documents, None).await;
index.wait_task(request.uid()).await.succeeded();
let (response, code) = index
.search_post(json!({
"filter": "title CONTAINS cap"
}))
.await;
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 2);
}
#[actix_rt::test]
async fn search_with_sort_on_numbers() {
let server = Server::new().await;

View File

@ -99,7 +99,8 @@ async fn secrets_are_hidden_in_settings() {
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"editDocumentsByFunction": false
"editDocumentsByFunction": false,
"containsFilter": false
}
"###);

View File

@ -360,16 +360,17 @@ async fn filter_invalid_syntax_object() {
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await;
let expected_response = json!({
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
"code": "invalid_similar_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
});
index
.similar(json!({"id": 287947, "filter": "title & Glass"}), |response, code| {
assert_eq!(response, expected_response);
assert_eq!(code, 400);
snapshot!(response, @r###"
{
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
"code": "invalid_similar_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
}
"###);
snapshot!(code, @"400 Bad Request");
})
.await;
}
@ -398,16 +399,17 @@ async fn filter_invalid_syntax_array() {
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await;
let expected_response = json!({
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
"code": "invalid_similar_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
});
index
.similar(json!({"id": 287947, "filter": ["title & Glass"]}), |response, code| {
assert_eq!(response, expected_response);
assert_eq!(code, 400);
snapshot!(response, @r###"
{
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
"code": "invalid_similar_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
}
"###);
snapshot!(code, @"400 Bad Request");
})
.await;
}

View File

@ -56,7 +56,8 @@ async fn basic() {
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"editDocumentsByFunction": false
"editDocumentsByFunction": false,
"containsFilter": false
}
"###);
@ -246,7 +247,8 @@ async fn ranking_score_threshold() {
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"editDocumentsByFunction": false
"editDocumentsByFunction": false,
"containsFilter": false
}
"###);
@ -526,7 +528,8 @@ async fn filter() {
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"editDocumentsByFunction": false
"editDocumentsByFunction": false,
"containsFilter": false
}
"###);
@ -654,7 +657,8 @@ async fn limit_and_offset() {
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"editDocumentsByFunction": false
"editDocumentsByFunction": false,
"containsFilter": false
}
"###);

View File

@ -17,7 +17,8 @@ async fn add_remove_user_provided() {
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"editDocumentsByFunction": false
"editDocumentsByFunction": false,
"containsFilter": false
}
"###);
@ -171,7 +172,8 @@ async fn generate_default_user_provided_documents(server: &Server) -> Index {
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"editDocumentsByFunction": false
"editDocumentsByFunction": false,
"containsFilter": false
}
"###);
@ -622,7 +624,8 @@ async fn add_remove_one_vector_4588() {
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"editDocumentsByFunction": false
"editDocumentsByFunction": false,
"containsFilter": false
}
"###);

View File

@ -15,7 +15,8 @@ async fn update_embedder() {
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"editDocumentsByFunction": false
"editDocumentsByFunction": false,
"containsFilter": false
}
"###);

View File

@ -38,6 +38,7 @@ heed = { version = "0.20.3", default-features = false, features = [
indexmap = { version = "2.2.6", features = ["serde"] }
json-depth-checker = { path = "../json-depth-checker" }
levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
memchr = "2.5.0"
memmap2 = "0.9.4"
obkv = "0.2.2"
once_cell = "1.19.0"

View File

@ -2140,6 +2140,47 @@ pub(crate) mod tests {
);
}
#[test]
fn test_contains() {
let index = TempIndex::new();
index
.update_settings(|settings| {
settings.set_filterable_fields(hashset! { S("doggo") });
})
.unwrap();
index
.add_documents(documents!([
{ "id": 0, "doggo": "kefir" },
{ "id": 1, "doggo": "kefirounet" },
{ "id": 2, "doggo": "kefkef" },
{ "id": 3, "doggo": "fifir" },
{ "id": 4, "doggo": "boubou" },
{ "id": 5 },
]))
.unwrap();
let rtxn = index.read_txn().unwrap();
let mut search = index.search(&rtxn);
let search_result = search
.filter(Filter::from_str("doggo CONTAINS kefir").unwrap().unwrap())
.execute()
.unwrap();
insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[0, 1]>");
let mut search = index.search(&rtxn);
let search_result = search
.filter(Filter::from_str("doggo CONTAINS KEF").unwrap().unwrap())
.execute()
.unwrap();
insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[0, 1, 2]>");
let mut search = index.search(&rtxn);
let search_result = search
.filter(Filter::from_str("doggo NOT CONTAINS fir").unwrap().unwrap())
.execute()
.unwrap();
insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[2, 4, 5]>");
}
#[test]
fn replace_documents_external_ids_and_soft_deletion_check() {
use big_s::S;

View File

@ -4,6 +4,8 @@ use std::ops::Bound::{self, Excluded, Included};
use either::Either;
pub use filter_parser::{Condition, Error as FPError, FilterCondition, Token};
use heed::types::LazyDecode;
use memchr::memmem::Finder;
use roaring::{MultiOps, RoaringBitmap};
use serde_json::Value;
@ -12,7 +14,11 @@ use crate::error::{Error, UserError};
use crate::heed_codec::facet::{
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec, OrderedF64Codec,
};
use crate::{distance_between_two_points, lat_lng_to_xyz, FieldId, Index, Result};
use crate::index::db_name::FACET_ID_STRING_DOCIDS;
use crate::{
distance_between_two_points, lat_lng_to_xyz, FieldId, Index, InternalError, Result,
SerializationError,
};
/// The maximum number of filters the filter AST can process.
const MAX_FILTER_DEPTH: usize = 2000;
@ -218,6 +224,10 @@ impl<'a> Filter<'a> {
Ok(Some(Self { condition }))
}
pub fn use_contains_operator(&self) -> Option<&Token> {
self.condition.use_contains_operator()
}
}
impl<'a> Filter<'a> {
@ -295,6 +305,41 @@ impl<'a> Filter<'a> {
let all_ids = index.documents_ids(rtxn)?;
return Ok(all_ids - docids);
}
Condition::Contains { keyword: _, word } => {
let value = crate::normalize_facet(word.value());
let finder = Finder::new(&value);
let base = FacetGroupKey { field_id, level: 0, left_bound: "" };
let docids = strings_db
.prefix_iter(rtxn, &base)?
.remap_data_type::<LazyDecode<FacetGroupValueCodec>>()
.filter_map(|result| -> Option<Result<RoaringBitmap>> {
match result {
Ok((FacetGroupKey { left_bound, .. }, lazy_group_value)) => {
if finder.find(left_bound.as_bytes()).is_some() {
Some(lazy_group_value.decode().map(|gv| gv.bitmap).map_err(
|_| {
InternalError::from(SerializationError::Decoding {
db_name: Some(FACET_ID_STRING_DOCIDS),
})
.into()
},
))
} else {
None
}
}
Err(_e) => {
Some(Err(InternalError::from(SerializationError::Decoding {
db_name: Some(FACET_ID_STRING_DOCIDS),
})
.into()))
}
}
})
.union()?;
return Ok(docids);
}
};
let mut output = RoaringBitmap::new();