mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-25 20:57:35 +01:00
Expose a first working version of the negative keyword
This commit is contained in:
parent
5ea017b922
commit
e4a3e603b3
@ -209,6 +209,20 @@ fn resolve_universe(
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tracing::instrument(level = "trace", skip_all, target = "search")]
|
||||||
|
fn resolve_negative_words(
|
||||||
|
ctx: &mut SearchContext,
|
||||||
|
negative_words: &[Word],
|
||||||
|
) -> Result<RoaringBitmap> {
|
||||||
|
let mut negative_bitmap = RoaringBitmap::new();
|
||||||
|
for &word in negative_words {
|
||||||
|
if let Some(bitmap) = ctx.word_docids(word)? {
|
||||||
|
negative_bitmap |= bitmap;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(negative_bitmap)
|
||||||
|
}
|
||||||
|
|
||||||
/// Return the list of initialised ranking rules to be used for a placeholder search.
|
/// Return the list of initialised ranking rules to be used for a placeholder search.
|
||||||
fn get_ranking_rules_for_placeholder_search<'ctx>(
|
fn get_ranking_rules_for_placeholder_search<'ctx>(
|
||||||
ctx: &SearchContext<'ctx>,
|
ctx: &SearchContext<'ctx>,
|
||||||
@ -620,7 +634,12 @@ pub fn execute_search(
|
|||||||
let tokens = tokenizer.tokenize(query);
|
let tokens = tokenizer.tokenize(query);
|
||||||
drop(entered);
|
drop(entered);
|
||||||
|
|
||||||
let query_terms = located_query_terms_from_tokens(ctx, tokens, words_limit)?;
|
let (query_terms, negative_words) =
|
||||||
|
located_query_terms_from_tokens(ctx, tokens, words_limit)?;
|
||||||
|
|
||||||
|
let ignored_documents = resolve_negative_words(ctx, &negative_words)?;
|
||||||
|
universe -= ignored_documents;
|
||||||
|
|
||||||
if query_terms.is_empty() {
|
if query_terms.is_empty() {
|
||||||
// Do a placeholder search instead
|
// Do a placeholder search instead
|
||||||
None
|
None
|
||||||
@ -630,6 +649,7 @@ pub fn execute_search(
|
|||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
};
|
};
|
||||||
|
|
||||||
let bucket_sort_output = if let Some(query_terms) = query_terms {
|
let bucket_sort_output = if let Some(query_terms) = query_terms {
|
||||||
let (graph, new_located_query_terms) = QueryGraph::from_query(ctx, &query_terms)?;
|
let (graph, new_located_query_terms) = QueryGraph::from_query(ctx, &query_terms)?;
|
||||||
located_query_terms = Some(new_located_query_terms);
|
located_query_terms = Some(new_located_query_terms);
|
||||||
|
@ -6,6 +6,7 @@ use charabia::{SeparatorKind, TokenKind};
|
|||||||
use super::compute_derivations::partially_initialized_term_from_word;
|
use super::compute_derivations::partially_initialized_term_from_word;
|
||||||
use super::{LocatedQueryTerm, ZeroTypoTerm};
|
use super::{LocatedQueryTerm, ZeroTypoTerm};
|
||||||
use crate::search::new::query_term::{Lazy, Phrase, QueryTerm};
|
use crate::search::new::query_term::{Lazy, Phrase, QueryTerm};
|
||||||
|
use crate::search::new::Word;
|
||||||
use crate::{Result, SearchContext, MAX_WORD_LENGTH};
|
use crate::{Result, SearchContext, MAX_WORD_LENGTH};
|
||||||
|
|
||||||
/// Convert the tokenised search query into a list of located query terms.
|
/// Convert the tokenised search query into a list of located query terms.
|
||||||
@ -14,12 +15,14 @@ pub fn located_query_terms_from_tokens(
|
|||||||
ctx: &mut SearchContext,
|
ctx: &mut SearchContext,
|
||||||
query: NormalizedTokenIter,
|
query: NormalizedTokenIter,
|
||||||
words_limit: Option<usize>,
|
words_limit: Option<usize>,
|
||||||
) -> Result<Vec<LocatedQueryTerm>> {
|
) -> Result<(Vec<LocatedQueryTerm>, Vec<Word>)> {
|
||||||
let nbr_typos = number_of_typos_allowed(ctx)?;
|
let nbr_typos = number_of_typos_allowed(ctx)?;
|
||||||
|
|
||||||
let mut located_terms = Vec::new();
|
let mut located_terms = Vec::new();
|
||||||
|
|
||||||
let mut phrase: Option<PhraseBuilder> = None;
|
let mut phrase: Option<PhraseBuilder> = None;
|
||||||
|
let mut negative_next_token = false;
|
||||||
|
let mut negative_words = Vec::new();
|
||||||
|
|
||||||
let parts_limit = words_limit.unwrap_or(usize::MAX);
|
let parts_limit = words_limit.unwrap_or(usize::MAX);
|
||||||
|
|
||||||
@ -33,7 +36,7 @@ pub fn located_query_terms_from_tokens(
|
|||||||
}
|
}
|
||||||
// early return if word limit is exceeded
|
// early return if word limit is exceeded
|
||||||
if located_terms.len() >= parts_limit {
|
if located_terms.len() >= parts_limit {
|
||||||
return Ok(located_terms);
|
return Ok((located_terms, negative_words));
|
||||||
}
|
}
|
||||||
|
|
||||||
match token.kind {
|
match token.kind {
|
||||||
@ -46,6 +49,11 @@ pub fn located_query_terms_from_tokens(
|
|||||||
// 3. if the word is the last token of the query we push it as a prefix word.
|
// 3. if the word is the last token of the query we push it as a prefix word.
|
||||||
if let Some(phrase) = &mut phrase {
|
if let Some(phrase) = &mut phrase {
|
||||||
phrase.push_word(ctx, &token, position)
|
phrase.push_word(ctx, &token, position)
|
||||||
|
} else if negative_next_token {
|
||||||
|
let word = token.lemma().to_string();
|
||||||
|
let word = Word::Original(ctx.word_interner.insert(word));
|
||||||
|
negative_words.push(word);
|
||||||
|
negative_next_token = false;
|
||||||
} else if peekable.peek().is_some() {
|
} else if peekable.peek().is_some() {
|
||||||
match token.kind {
|
match token.kind {
|
||||||
TokenKind::Word => {
|
TokenKind::Word => {
|
||||||
@ -63,7 +71,7 @@ pub fn located_query_terms_from_tokens(
|
|||||||
};
|
};
|
||||||
located_terms.push(located_term);
|
located_terms.push(located_term);
|
||||||
}
|
}
|
||||||
TokenKind::StopWord | TokenKind::Separator(_) | TokenKind::Unknown => {}
|
TokenKind::StopWord | TokenKind::Separator(_) | TokenKind::Unknown => (),
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
let word = token.lemma();
|
let word = token.lemma();
|
||||||
@ -122,6 +130,10 @@ pub fn located_query_terms_from_tokens(
|
|||||||
// Start new phrase if the token ends with an opening quote
|
// Start new phrase if the token ends with an opening quote
|
||||||
(quote_count % 2 == 1).then_some(PhraseBuilder::empty())
|
(quote_count % 2 == 1).then_some(PhraseBuilder::empty())
|
||||||
};
|
};
|
||||||
|
|
||||||
|
if phrase.is_none() && token.lemma() == "-" {
|
||||||
|
negative_next_token = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
_ => (),
|
_ => (),
|
||||||
}
|
}
|
||||||
@ -134,7 +146,7 @@ pub fn located_query_terms_from_tokens(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(located_terms)
|
Ok((located_terms, negative_words))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn number_of_typos_allowed<'ctx>(
|
pub fn number_of_typos_allowed<'ctx>(
|
||||||
@ -317,6 +329,7 @@ mod tests {
|
|||||||
// panics with `attempt to add with overflow` before <https://github.com/meilisearch/meilisearch/issues/3785>
|
// panics with `attempt to add with overflow` before <https://github.com/meilisearch/meilisearch/issues/3785>
|
||||||
let located_query_terms = located_query_terms_from_tokens(&mut ctx, tokens, None)?;
|
let located_query_terms = located_query_terms_from_tokens(&mut ctx, tokens, None)?;
|
||||||
assert!(located_query_terms.is_empty());
|
assert!(located_query_terms.is_empty());
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user