mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-03 03:47:02 +02:00
Merge #4535
4535: Support Negative Keywords r=ManyTheFish a=Kerollmops This PR fixes #4422 by supporting `-` before any word in the query. The minus symbol `-`, from the ASCII table, is not the only character that can be considered the negative operator. You can see the two other matching characters under the `Based on "-" (U+002D)` section on [this unicode reference website](https://www.compart.com/en/unicode/U+002D). It's important to notice the strange behavior when a query includes and excludes the same word; only the derivative ( synonyms and split) will be kept: - If you input `progamer -progamer`, the engine will still search for `pro gamer`. - If you have the synonym `like = love` and you input `like -like`, it will still search for `love`. ## TODO - [x] Add analytics - [x] Add support to the `-` operator - [x] Make sure to support spaces around `-` well - [x] Support phrase negation - [x] Add tests Co-authored-by: Clément Renault <clement@meilisearch.com>
This commit is contained in:
commit
5509bafff8
10 changed files with 260 additions and 20 deletions
|
@ -583,6 +583,7 @@ pub struct SearchAggregator {
|
|||
total_received: usize,
|
||||
total_succeeded: usize,
|
||||
total_degraded: usize,
|
||||
total_used_negative_operator: usize,
|
||||
time_spent: BinaryHeap<usize>,
|
||||
|
||||
// sort
|
||||
|
@ -763,12 +764,16 @@ impl SearchAggregator {
|
|||
facet_distribution: _,
|
||||
facet_stats: _,
|
||||
degraded,
|
||||
used_negative_operator,
|
||||
} = result;
|
||||
|
||||
self.total_succeeded = self.total_succeeded.saturating_add(1);
|
||||
if *degraded {
|
||||
self.total_degraded = self.total_degraded.saturating_add(1);
|
||||
}
|
||||
if *used_negative_operator {
|
||||
self.total_used_negative_operator = self.total_used_negative_operator.saturating_add(1);
|
||||
}
|
||||
self.time_spent.push(*processing_time_ms as usize);
|
||||
}
|
||||
|
||||
|
@ -811,6 +816,7 @@ impl SearchAggregator {
|
|||
embedder,
|
||||
hybrid,
|
||||
total_degraded,
|
||||
total_used_negative_operator,
|
||||
} = other;
|
||||
|
||||
if self.timestamp.is_none() {
|
||||
|
@ -826,6 +832,8 @@ impl SearchAggregator {
|
|||
self.total_received = self.total_received.saturating_add(total_received);
|
||||
self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded);
|
||||
self.total_degraded = self.total_degraded.saturating_add(total_degraded);
|
||||
self.total_used_negative_operator =
|
||||
self.total_used_negative_operator.saturating_add(total_used_negative_operator);
|
||||
self.time_spent.append(time_spent);
|
||||
|
||||
// sort
|
||||
|
@ -932,6 +940,7 @@ impl SearchAggregator {
|
|||
embedder,
|
||||
hybrid,
|
||||
total_degraded,
|
||||
total_used_negative_operator,
|
||||
} = self;
|
||||
|
||||
if total_received == 0 {
|
||||
|
@ -952,6 +961,7 @@ impl SearchAggregator {
|
|||
"total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics
|
||||
"total_received": total_received,
|
||||
"total_degraded": total_degraded,
|
||||
"total_used_negative_operator": total_used_negative_operator,
|
||||
},
|
||||
"sort": {
|
||||
"with_geoPoint": sort_with_geo_point,
|
||||
|
|
|
@ -324,9 +324,11 @@ pub struct SearchResult {
|
|||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub facet_stats: Option<BTreeMap<String, FacetStats>>,
|
||||
|
||||
// This information is only used for analytics purposes
|
||||
// These fields are only used for analytics purposes
|
||||
#[serde(skip)]
|
||||
pub degraded: bool,
|
||||
#[serde(skip)]
|
||||
pub used_negative_operator: bool,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Debug, Clone, PartialEq)]
|
||||
|
@ -512,6 +514,7 @@ pub fn perform_search(
|
|||
candidates,
|
||||
document_scores,
|
||||
degraded,
|
||||
used_negative_operator,
|
||||
..
|
||||
} = match &query.hybrid {
|
||||
Some(hybrid) => match *hybrid.semantic_ratio {
|
||||
|
@ -717,6 +720,7 @@ pub fn perform_search(
|
|||
facet_distribution,
|
||||
facet_stats,
|
||||
degraded,
|
||||
used_negative_operator,
|
||||
};
|
||||
Ok(result)
|
||||
}
|
||||
|
|
|
@ -185,6 +185,110 @@ async fn phrase_search_with_stop_word() {
|
|||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn negative_phrase_search() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(0).await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "-\"train your dragon\"" }), |response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
let hits = response["hits"].as_array().unwrap();
|
||||
assert_eq!(hits.len(), 4);
|
||||
assert_eq!(hits[0]["id"], "287947");
|
||||
assert_eq!(hits[1]["id"], "299537");
|
||||
assert_eq!(hits[2]["id"], "522681");
|
||||
assert_eq!(hits[3]["id"], "450465");
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn negative_word_search() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(0).await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "-escape" }), |response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
let hits = response["hits"].as_array().unwrap();
|
||||
assert_eq!(hits.len(), 4);
|
||||
assert_eq!(hits[0]["id"], "287947");
|
||||
assert_eq!(hits[1]["id"], "299537");
|
||||
assert_eq!(hits[2]["id"], "166428");
|
||||
assert_eq!(hits[3]["id"], "450465");
|
||||
})
|
||||
.await;
|
||||
|
||||
// Everything that contains derivates of escape but not escape: nothing
|
||||
index
|
||||
.search(json!({"q": "-escape escape" }), |response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
let hits = response["hits"].as_array().unwrap();
|
||||
assert_eq!(hits.len(), 0);
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn non_negative_search() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(0).await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "- escape" }), |response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
let hits = response["hits"].as_array().unwrap();
|
||||
assert_eq!(hits.len(), 1);
|
||||
assert_eq!(hits[0]["id"], "522681");
|
||||
})
|
||||
.await;
|
||||
|
||||
index
|
||||
.search(json!({"q": "- \"train your dragon\"" }), |response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
let hits = response["hits"].as_array().unwrap();
|
||||
assert_eq!(hits.len(), 1);
|
||||
assert_eq!(hits[0]["id"], "166428");
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[actix_rt::test]
|
||||
async fn negative_special_cases_search() {
|
||||
let server = Server::new().await;
|
||||
let index = server.index("test");
|
||||
|
||||
let documents = DOCUMENTS.clone();
|
||||
index.add_documents(documents, None).await;
|
||||
index.wait_task(0).await;
|
||||
|
||||
index.update_settings(json!({"synonyms": { "escape": ["glass"] }})).await;
|
||||
index.wait_task(1).await;
|
||||
|
||||
// There is a synonym for escape -> glass but we don't want "escape", only the derivates: glass
|
||||
index
|
||||
.search(json!({"q": "-escape escape" }), |response, code| {
|
||||
assert_eq!(code, 200, "{}", response);
|
||||
let hits = response["hits"].as_array().unwrap();
|
||||
assert_eq!(hits.len(), 1);
|
||||
assert_eq!(hits[0]["id"], "450465");
|
||||
})
|
||||
.await;
|
||||
}
|
||||
|
||||
#[cfg(feature = "default")]
|
||||
#[actix_rt::test]
|
||||
async fn test_kanji_language_detection() {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue