Simplify stop word checking in create_primitive_query

This commit is contained in:
Samyak S Sarnayak 2022-10-20 18:35:39 +05:30
parent 2aa11afb87
commit 77f1ff019b
No known key found for this signature in database
GPG Key ID: 365873F2F0C6153B

View File

@ -4,7 +4,6 @@ use std::{fmt, mem};
use charabia::classifier::ClassifiedTokenIter; use charabia::classifier::ClassifiedTokenIter;
use charabia::{SeparatorKind, TokenKind}; use charabia::{SeparatorKind, TokenKind};
use fst::Set;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use slice_group_by::GroupBy; use slice_group_by::GroupBy;
@ -269,8 +268,7 @@ impl<'a> QueryTreeBuilder<'a> {
&self, &self,
query: ClassifiedTokenIter<A>, query: ClassifiedTokenIter<A>,
) -> Result<Option<(Operation, PrimitiveQuery, MatchingWords)>> { ) -> Result<Option<(Operation, PrimitiveQuery, MatchingWords)>> {
let stop_words = self.index.stop_words(self.rtxn)?; let primitive_query = create_primitive_query(query, self.words_limit);
let primitive_query = create_primitive_query(query, stop_words, self.words_limit);
if !primitive_query.is_empty() { if !primitive_query.is_empty() {
let qt = create_query_tree( let qt = create_query_tree(
self, self,
@ -722,7 +720,6 @@ impl PrimitiveQueryPart {
/// the primitive query is an intermediate state to build the query tree. /// the primitive query is an intermediate state to build the query tree.
fn create_primitive_query<A>( fn create_primitive_query<A>(
query: ClassifiedTokenIter<A>, query: ClassifiedTokenIter<A>,
stop_words: Option<Set<&[u8]>>,
words_limit: Option<usize>, words_limit: Option<usize>,
) -> PrimitiveQuery ) -> PrimitiveQuery
where where
@ -747,13 +744,14 @@ where
// 2. if the word is not the last token of the query and is not a stop_word we push it as a non-prefix word, // 2. if the word is not the last token of the query and is not a stop_word we push it as a non-prefix word,
// 3. if the word is the last token of the query we push it as a prefix word. // 3. if the word is the last token of the query we push it as a prefix word.
if quoted { if quoted {
if stop_words.as_ref().map_or(false, |swords| swords.contains(token.lemma())) { if let TokenKind::StopWord = token.kind {
phrase.push(None) phrase.push(None)
} else { } else {
phrase.push(Some(token.lemma().to_string())); phrase.push(Some(token.lemma().to_string()));
} }
} else if peekable.peek().is_some() { } else if peekable.peek().is_some() {
if !stop_words.as_ref().map_or(false, |swords| swords.contains(token.lemma())) { if let TokenKind::StopWord = token.kind {
} else {
primitive_query primitive_query
.push(PrimitiveQueryPart::Word(token.lemma().to_string(), false)); .push(PrimitiveQueryPart::Word(token.lemma().to_string(), false));
} }
@ -836,7 +834,7 @@ mod test {
words_limit: Option<usize>, words_limit: Option<usize>,
query: ClassifiedTokenIter<A>, query: ClassifiedTokenIter<A>,
) -> Result<Option<(Operation, PrimitiveQuery)>> { ) -> Result<Option<(Operation, PrimitiveQuery)>> {
let primitive_query = create_primitive_query(query, None, words_limit); let primitive_query = create_primitive_query(query, words_limit);
if !primitive_query.is_empty() { if !primitive_query.is_empty() {
let qt = create_query_tree( let qt = create_query_tree(
self, self,