4535: Support Negative Keywords r=ManyTheFish a=Kerollmops

This PR fixes #4422 by supporting `-` before any word in the query.

The minus symbol `-`, from the ASCII table, is not the only character that can be considered the negative operator. You can see the two other matching characters under the `Based on "-" (U+002D)` section on [this unicode reference website](https://www.compart.com/en/unicode/U+002D).

It's important to notice the strange behavior when a query includes and excludes the same word; only the derivative ( synonyms and split) will be kept:
 - If you input `progamer -progamer`, the engine will still search for `pro gamer`.
 - If you have the synonym `like = love` and you input `like -like`, it will still search for `love`.

## TODO
 - [x] Add analytics
 - [x] Add support to the `-` operator
 - [x] Make sure to support spaces around `-` well
 - [x] Support phrase negation
 - [x] Add tests


Co-authored-by: Clément Renault <clement@meilisearch.com>
This commit is contained in:
meili-bors[bot] 2024-04-04 13:10:27 +00:00 committed by GitHub
commit 5509bafff8
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 260 additions and 20 deletions

View file

@ -185,6 +185,110 @@ async fn phrase_search_with_stop_word() {
.await;
}
#[actix_rt::test]
async fn negative_phrase_search() {
let server = Server::new().await;
let index = server.index("test");
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(0).await;
index
.search(json!({"q": "-\"train your dragon\"" }), |response, code| {
assert_eq!(code, 200, "{}", response);
let hits = response["hits"].as_array().unwrap();
assert_eq!(hits.len(), 4);
assert_eq!(hits[0]["id"], "287947");
assert_eq!(hits[1]["id"], "299537");
assert_eq!(hits[2]["id"], "522681");
assert_eq!(hits[3]["id"], "450465");
})
.await;
}
#[actix_rt::test]
async fn negative_word_search() {
let server = Server::new().await;
let index = server.index("test");
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(0).await;
index
.search(json!({"q": "-escape" }), |response, code| {
assert_eq!(code, 200, "{}", response);
let hits = response["hits"].as_array().unwrap();
assert_eq!(hits.len(), 4);
assert_eq!(hits[0]["id"], "287947");
assert_eq!(hits[1]["id"], "299537");
assert_eq!(hits[2]["id"], "166428");
assert_eq!(hits[3]["id"], "450465");
})
.await;
// Everything that contains derivates of escape but not escape: nothing
index
.search(json!({"q": "-escape escape" }), |response, code| {
assert_eq!(code, 200, "{}", response);
let hits = response["hits"].as_array().unwrap();
assert_eq!(hits.len(), 0);
})
.await;
}
#[actix_rt::test]
async fn non_negative_search() {
let server = Server::new().await;
let index = server.index("test");
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(0).await;
index
.search(json!({"q": "- escape" }), |response, code| {
assert_eq!(code, 200, "{}", response);
let hits = response["hits"].as_array().unwrap();
assert_eq!(hits.len(), 1);
assert_eq!(hits[0]["id"], "522681");
})
.await;
index
.search(json!({"q": "- \"train your dragon\"" }), |response, code| {
assert_eq!(code, 200, "{}", response);
let hits = response["hits"].as_array().unwrap();
assert_eq!(hits.len(), 1);
assert_eq!(hits[0]["id"], "166428");
})
.await;
}
#[actix_rt::test]
async fn negative_special_cases_search() {
let server = Server::new().await;
let index = server.index("test");
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(0).await;
index.update_settings(json!({"synonyms": { "escape": ["glass"] }})).await;
index.wait_task(1).await;
// There is a synonym for escape -> glass but we don't want "escape", only the derivates: glass
index
.search(json!({"q": "-escape escape" }), |response, code| {
assert_eq!(code, 200, "{}", response);
let hits = response["hits"].as_array().unwrap();
assert_eq!(hits.len(), 1);
assert_eq!(hits[0]["id"], "450465");
})
.await;
}
#[cfg(feature = "default")]
#[actix_rt::test]
async fn test_kanji_language_detection() {