mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 23:04:26 +01:00
Simplify stop word checking in create_primitive_query
This commit is contained in:
parent
2aa11afb87
commit
77f1ff019b
@ -4,7 +4,6 @@ use std::{fmt, mem};
|
|||||||
|
|
||||||
use charabia::classifier::ClassifiedTokenIter;
|
use charabia::classifier::ClassifiedTokenIter;
|
||||||
use charabia::{SeparatorKind, TokenKind};
|
use charabia::{SeparatorKind, TokenKind};
|
||||||
use fst::Set;
|
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use slice_group_by::GroupBy;
|
use slice_group_by::GroupBy;
|
||||||
|
|
||||||
@ -269,8 +268,7 @@ impl<'a> QueryTreeBuilder<'a> {
|
|||||||
&self,
|
&self,
|
||||||
query: ClassifiedTokenIter<A>,
|
query: ClassifiedTokenIter<A>,
|
||||||
) -> Result<Option<(Operation, PrimitiveQuery, MatchingWords)>> {
|
) -> Result<Option<(Operation, PrimitiveQuery, MatchingWords)>> {
|
||||||
let stop_words = self.index.stop_words(self.rtxn)?;
|
let primitive_query = create_primitive_query(query, self.words_limit);
|
||||||
let primitive_query = create_primitive_query(query, stop_words, self.words_limit);
|
|
||||||
if !primitive_query.is_empty() {
|
if !primitive_query.is_empty() {
|
||||||
let qt = create_query_tree(
|
let qt = create_query_tree(
|
||||||
self,
|
self,
|
||||||
@ -722,7 +720,6 @@ impl PrimitiveQueryPart {
|
|||||||
/// the primitive query is an intermediate state to build the query tree.
|
/// the primitive query is an intermediate state to build the query tree.
|
||||||
fn create_primitive_query<A>(
|
fn create_primitive_query<A>(
|
||||||
query: ClassifiedTokenIter<A>,
|
query: ClassifiedTokenIter<A>,
|
||||||
stop_words: Option<Set<&[u8]>>,
|
|
||||||
words_limit: Option<usize>,
|
words_limit: Option<usize>,
|
||||||
) -> PrimitiveQuery
|
) -> PrimitiveQuery
|
||||||
where
|
where
|
||||||
@ -747,13 +744,14 @@ where
|
|||||||
// 2. if the word is not the last token of the query and is not a stop_word we push it as a non-prefix word,
|
// 2. if the word is not the last token of the query and is not a stop_word we push it as a non-prefix word,
|
||||||
// 3. if the word is the last token of the query we push it as a prefix word.
|
// 3. if the word is the last token of the query we push it as a prefix word.
|
||||||
if quoted {
|
if quoted {
|
||||||
if stop_words.as_ref().map_or(false, |swords| swords.contains(token.lemma())) {
|
if let TokenKind::StopWord = token.kind {
|
||||||
phrase.push(None)
|
phrase.push(None)
|
||||||
} else {
|
} else {
|
||||||
phrase.push(Some(token.lemma().to_string()));
|
phrase.push(Some(token.lemma().to_string()));
|
||||||
}
|
}
|
||||||
} else if peekable.peek().is_some() {
|
} else if peekable.peek().is_some() {
|
||||||
if !stop_words.as_ref().map_or(false, |swords| swords.contains(token.lemma())) {
|
if let TokenKind::StopWord = token.kind {
|
||||||
|
} else {
|
||||||
primitive_query
|
primitive_query
|
||||||
.push(PrimitiveQueryPart::Word(token.lemma().to_string(), false));
|
.push(PrimitiveQueryPart::Word(token.lemma().to_string(), false));
|
||||||
}
|
}
|
||||||
@ -836,7 +834,7 @@ mod test {
|
|||||||
words_limit: Option<usize>,
|
words_limit: Option<usize>,
|
||||||
query: ClassifiedTokenIter<A>,
|
query: ClassifiedTokenIter<A>,
|
||||||
) -> Result<Option<(Operation, PrimitiveQuery)>> {
|
) -> Result<Option<(Operation, PrimitiveQuery)>> {
|
||||||
let primitive_query = create_primitive_query(query, None, words_limit);
|
let primitive_query = create_primitive_query(query, words_limit);
|
||||||
if !primitive_query.is_empty() {
|
if !primitive_query.is_empty() {
|
||||||
let qt = create_query_tree(
|
let qt = create_query_tree(
|
||||||
self,
|
self,
|
||||||
|
Loading…
Reference in New Issue
Block a user