4804: Implements the experimental contains filter operator r=irevoire a=irevoire

# Pull Request
Related PRD: (private link) https://www.notion.so/meilisearch/Contains-Like-Filter-Operator-0d8ad53c6761466f913432eb1d843f1e
Public usage page: https://meilisearch.notion.site/Contains-filter-operator-usage-3e7421b0aacf45f48ab09abe259a1de6

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/3613

## What does this PR do?
- Extract the contains operator from this PR: https://github.com/meilisearch/meilisearch/pull/3751
- Gate it behind a feature flag
- Add tests


Co-authored-by: Tamo <tamo@meilisearch.com>
This commit is contained in:
meili-bors[bot] 2024-07-17 15:47:11 +00:00 committed by GitHub
commit ea73615abf
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
34 changed files with 487 additions and 121 deletions

View file

@ -26,6 +26,15 @@ impl Value {
panic!("Didn't find any task id in: {self}");
}
}
// Panic if the json doesn't contain the `status` field set to "succeeded"
#[track_caller]
pub fn succeeded(&self) -> &Self {
if self["status"] != serde_json::Value::String(String::from("succeeded")) {
panic!("Called succeeded on {}", serde_json::to_string_pretty(&self.0).unwrap());
}
self
}
}
impl From<serde_json::Value> for Value {

View file

@ -168,7 +168,7 @@ async fn get_all_documents_bad_filter() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `doggo`.\n1:6 doggo",
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `doggo`.\n1:6 doggo",
"code": "invalid_document_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
@ -569,7 +569,7 @@ async fn delete_document_by_filter() {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `hello`.\n1:6 hello",
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `hello`.\n1:6 hello",
"code": "invalid_document_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"
@ -776,7 +776,7 @@ async fn fetch_document_by_filter() {
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `cool doggo`.\n1:11 cool doggo",
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `cool doggo`.\n1:11 cool doggo",
"code": "invalid_document_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_filter"

View file

@ -536,7 +536,8 @@ async fn get_document_with_vectors() {
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"editDocumentsByFunction": false
"editDocumentsByFunction": false,
"containsFilter": false
}
"###);

View file

@ -1860,7 +1860,8 @@ async fn import_dump_v6_containing_experimental_features() {
"vectorStore": false,
"metrics": false,
"logsRoute": false,
"editDocumentsByFunction": false
"editDocumentsByFunction": false,
"containsFilter": false
}
"###);
@ -1954,7 +1955,8 @@ async fn generate_and_import_dump_containing_vectors() {
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"editDocumentsByFunction": false
"editDocumentsByFunction": false,
"containsFilter": false
}
"###);
let index = server.index("pets");
@ -2025,7 +2027,8 @@ async fn generate_and_import_dump_containing_vectors() {
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"editDocumentsByFunction": false
"editDocumentsByFunction": false,
"containsFilter": false
}
"###);

View file

@ -21,7 +21,8 @@ async fn experimental_features() {
"vectorStore": false,
"metrics": false,
"logsRoute": false,
"editDocumentsByFunction": false
"editDocumentsByFunction": false,
"containsFilter": false
}
"###);
@ -33,7 +34,8 @@ async fn experimental_features() {
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"editDocumentsByFunction": false
"editDocumentsByFunction": false,
"containsFilter": false
}
"###);
@ -45,7 +47,8 @@ async fn experimental_features() {
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"editDocumentsByFunction": false
"editDocumentsByFunction": false,
"containsFilter": false
}
"###);
@ -58,7 +61,8 @@ async fn experimental_features() {
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"editDocumentsByFunction": false
"editDocumentsByFunction": false,
"containsFilter": false
}
"###);
@ -71,7 +75,8 @@ async fn experimental_features() {
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"editDocumentsByFunction": false
"editDocumentsByFunction": false,
"containsFilter": false
}
"###);
}
@ -91,7 +96,8 @@ async fn experimental_feature_metrics() {
"vectorStore": false,
"metrics": true,
"logsRoute": false,
"editDocumentsByFunction": false
"editDocumentsByFunction": false,
"containsFilter": false
}
"###);
@ -146,7 +152,7 @@ async fn errors() {
meili_snap::snapshot!(code, @"400 Bad Request");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"message": "Unknown field `NotAFeature`: expected one of `vectorStore`, `metrics`, `logsRoute`, `editDocumentsByFunction`",
"message": "Unknown field `NotAFeature`: expected one of `vectorStore`, `metrics`, `logsRoute`, `editDocumentsByFunction`, `containsFilter`",
"code": "bad_request",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#bad_request"

View file

@ -645,19 +645,20 @@ async fn filter_invalid_syntax_object() {
index.update_settings(json!({"filterableAttributes": ["title"]})).await;
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(1).await;
let (task, _code) = index.add_documents(documents, None).await;
index.wait_task(task.uid()).await;
let expected_response = json!({
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
});
index
.search(json!({"filter": "title & Glass"}), |response, code| {
assert_eq!(response, expected_response);
assert_eq!(code, 400);
snapshot!(response, @r###"
{
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
}
"###);
snapshot!(code, @"400 Bad Request");
})
.await;
}
@ -670,19 +671,20 @@ async fn filter_invalid_syntax_array() {
index.update_settings(json!({"filterableAttributes": ["title"]})).await;
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(1).await;
let (task, _code) = index.add_documents(documents, None).await;
index.wait_task(task.uid()).await;
let expected_response = json!({
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
});
index
.search(json!({"filter": ["title & Glass"]}), |response, code| {
assert_eq!(response, expected_response);
assert_eq!(code, 400);
snapshot!(response, @r###"
{
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
}
"###);
snapshot!(code, @"400 Bad Request");
})
.await;
}
@ -1203,3 +1205,68 @@ async fn distinct_at_search_time() {
}
"###);
}
#[actix_rt::test]
async fn search_with_contains_without_enabling_the_feature() {
// Since a filter is deserialized as a json Value it will never fail to deserialize.
// Thus the error message is not generated by deserr but written by us.
let server = Server::new().await;
let index = server.index("doggo");
// Also, to trigger the error message we need to effectively create the index or else it'll throw an
// index does not exists error.
let (task, _code) = index.create(None).await;
server.wait_task(task.uid()).await.succeeded();
index
.search(json!({ "filter": "doggo CONTAINS kefir" }), |response, code| {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Using `CONTAINS` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
})
.await;
index
.search(json!({ "filter": "doggo != echo AND doggo CONTAINS kefir" }), |response, code| {
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Using `CONTAINS` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n25:33 doggo != echo AND doggo CONTAINS kefir",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
})
.await;
// For the post search we can also use the arrays syntaxes
let (response, code) =
index.search_post(json!({ "filter": ["doggo != echo", "doggo CONTAINS kefir"] })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Using `CONTAINS` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
let (response, code) =
index.search_post(json!({ "filter": ["doggo != echo", ["doggo CONTAINS kefir"]] })).await;
snapshot!(code, @"400 Bad Request");
snapshot!(json_string!(response), @r###"
{
"message": "Using `CONTAINS` in a filter requires enabling the `contains filter` experimental feature. See https://github.com/orgs/meilisearch/discussions/763\n7:15 doggo CONTAINS kefir",
"code": "feature_not_enabled",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#feature_not_enabled"
}
"###);
}

View file

@ -19,7 +19,8 @@ async fn index_with_documents_user_provided<'a>(
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"editDocumentsByFunction": false
"editDocumentsByFunction": false,
"containsFilter": false
}
"###);
@ -48,7 +49,8 @@ async fn index_with_documents_hf<'a>(server: &'a Server, documents: &Value) -> I
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"editDocumentsByFunction": false
"editDocumentsByFunction": false,
"containsFilter": false
}
"###);

View file

@ -13,9 +13,11 @@ mod pagination;
mod restrict_searchable;
mod search_queue;
use meilisearch::Opt;
use once_cell::sync::Lazy;
use tempfile::TempDir;
use crate::common::{Server, Value};
use crate::common::{default_settings, Server, Value};
use crate::json;
static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
@ -576,6 +578,32 @@ async fn search_with_filter_array_notation() {
assert_eq!(response["hits"].as_array().unwrap().len(), 3);
}
#[actix_rt::test]
async fn search_with_contains_filter() {
let temp = TempDir::new().unwrap();
let server = Server::new_with_options(Opt {
experimental_contains_filter: true,
..default_settings(temp.path())
})
.await
.unwrap();
let index = server.index("movies");
index.update_settings(json!({"filterableAttributes": ["title"]})).await;
let documents = DOCUMENTS.clone();
let (request, _code) = index.add_documents(documents, None).await;
index.wait_task(request.uid()).await.succeeded();
let (response, code) = index
.search_post(json!({
"filter": "title CONTAINS cap"
}))
.await;
assert_eq!(code, 200, "{}", response);
assert_eq!(response["hits"].as_array().unwrap().len(), 2);
}
#[actix_rt::test]
async fn search_with_sort_on_numbers() {
let server = Server::new().await;

View file

@ -99,7 +99,8 @@ async fn secrets_are_hidden_in_settings() {
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"editDocumentsByFunction": false
"editDocumentsByFunction": false,
"containsFilter": false
}
"###);

View file

@ -360,16 +360,17 @@ async fn filter_invalid_syntax_object() {
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await;
let expected_response = json!({
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
"code": "invalid_similar_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
});
index
.similar(json!({"id": 287947, "filter": "title & Glass"}), |response, code| {
assert_eq!(response, expected_response);
assert_eq!(code, 400);
snapshot!(response, @r###"
{
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
"code": "invalid_similar_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
}
"###);
snapshot!(code, @"400 Bad Request");
})
.await;
}
@ -398,16 +399,17 @@ async fn filter_invalid_syntax_array() {
snapshot!(code, @"202 Accepted");
index.wait_task(value.uid()).await;
let expected_response = json!({
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
"code": "invalid_similar_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
});
index
.similar(json!({"id": 287947, "filter": ["title & Glass"]}), |response, code| {
assert_eq!(response, expected_response);
assert_eq!(code, 400);
snapshot!(response, @r###"
{
"message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `IS NULL`, `IS NOT NULL`, `IS EMPTY`, `IS NOT EMPTY`, `CONTAINS`, `NOT CONTAINS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass",
"code": "invalid_similar_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_similar_filter"
}
"###);
snapshot!(code, @"400 Bad Request");
})
.await;
}

View file

@ -56,7 +56,8 @@ async fn basic() {
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"editDocumentsByFunction": false
"editDocumentsByFunction": false,
"containsFilter": false
}
"###);
@ -246,7 +247,8 @@ async fn ranking_score_threshold() {
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"editDocumentsByFunction": false
"editDocumentsByFunction": false,
"containsFilter": false
}
"###);
@ -526,7 +528,8 @@ async fn filter() {
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"editDocumentsByFunction": false
"editDocumentsByFunction": false,
"containsFilter": false
}
"###);
@ -654,7 +657,8 @@ async fn limit_and_offset() {
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"editDocumentsByFunction": false
"editDocumentsByFunction": false,
"containsFilter": false
}
"###);

View file

@ -17,7 +17,8 @@ async fn add_remove_user_provided() {
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"editDocumentsByFunction": false
"editDocumentsByFunction": false,
"containsFilter": false
}
"###);
@ -171,7 +172,8 @@ async fn generate_default_user_provided_documents(server: &Server) -> Index {
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"editDocumentsByFunction": false
"editDocumentsByFunction": false,
"containsFilter": false
}
"###);
@ -622,7 +624,8 @@ async fn add_remove_one_vector_4588() {
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"editDocumentsByFunction": false
"editDocumentsByFunction": false,
"containsFilter": false
}
"###);

View file

@ -15,7 +15,8 @@ async fn update_embedder() {
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"editDocumentsByFunction": false
"editDocumentsByFunction": false,
"containsFilter": false
}
"###);