mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-24 05:30:16 +01:00
Merge #664
664: Fix phrase search containing stop words r=ManyTheFish a=Samyak2 # Pull Request This a WIP draft PR I wanted to create to let other potential contributors know that I'm working on this issue. I'll be completing this in a few hours from opening this. ## Related issue Fixes #661 and towards fixing meilisearch/meilisearch#2905 ## What does this PR do? - [x] Change Phrase Operation to use a `Vec<Option<String>>` instead of `Vec<String>` where `None` corresponds to a stop word - [x] Update all other uses of phrase operation - [x] Update `resolve_phrase` - [x] Update `create_primitive_query`? - [x] Add test ## PR checklist Please check if your PR fulfills the following requirements: - [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)? - [x] Have you read the contributing guidelines? - [x] Have you made sure that the title is accurate and descriptive of the changes? Co-authored-by: Samyak S Sarnayak <samyak201@gmail.com> Co-authored-by: Samyak Sarnayak <samyak201@gmail.com>
This commit is contained in:
commit
c965200010
@ -579,6 +579,7 @@ fn flatten_query_tree(query_tree: &Operation) -> FlattenedQueryTree {
|
|||||||
Phrase(words) => {
|
Phrase(words) => {
|
||||||
let queries = words
|
let queries = words
|
||||||
.iter()
|
.iter()
|
||||||
|
.filter_map(|w| w.as_ref())
|
||||||
.map(|word| vec![Query { prefix: false, kind: QueryKind::exact(word.clone()) }])
|
.map(|word| vec![Query { prefix: false, kind: QueryKind::exact(word.clone()) }])
|
||||||
.collect();
|
.collect();
|
||||||
vec![queries]
|
vec![queries]
|
||||||
|
@ -299,9 +299,11 @@ fn attribute_start_with_docids(
|
|||||||
}
|
}
|
||||||
Phrase(phrase) => {
|
Phrase(phrase) => {
|
||||||
for word in phrase {
|
for word in phrase {
|
||||||
let wc = ctx.word_position_docids(word, pos)?;
|
if let Some(word) = word {
|
||||||
if let Some(word_candidates) = wc {
|
let wc = ctx.word_position_docids(word, pos)?;
|
||||||
attribute_candidates_array.push(word_candidates);
|
if let Some(word_candidates) = wc {
|
||||||
|
attribute_candidates_array.push(word_candidates);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
pos += 1;
|
pos += 1;
|
||||||
}
|
}
|
||||||
@ -323,7 +325,7 @@ fn intersection_of(mut rbs: Vec<&RoaringBitmap>) -> RoaringBitmap {
|
|||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub enum ExactQueryPart {
|
pub enum ExactQueryPart {
|
||||||
Phrase(Vec<String>),
|
Phrase(Vec<Option<String>>),
|
||||||
Synonyms(Vec<String>),
|
Synonyms(Vec<String>),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -418,15 +418,32 @@ pub fn resolve_query_tree(
|
|||||||
resolve_operation(ctx, query_tree, wdcache)
|
resolve_operation(ctx, query_tree, wdcache)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn resolve_phrase(ctx: &dyn Context, phrase: &[String]) -> Result<RoaringBitmap> {
|
pub fn resolve_phrase(ctx: &dyn Context, phrase: &[Option<String>]) -> Result<RoaringBitmap> {
|
||||||
let mut candidates = RoaringBitmap::new();
|
let mut candidates = RoaringBitmap::new();
|
||||||
let mut first_iter = true;
|
let mut first_iter = true;
|
||||||
let winsize = phrase.len().min(3);
|
let winsize = phrase.len().min(3);
|
||||||
|
|
||||||
|
if phrase.is_empty() {
|
||||||
|
return Ok(candidates);
|
||||||
|
}
|
||||||
|
|
||||||
for win in phrase.windows(winsize) {
|
for win in phrase.windows(winsize) {
|
||||||
// Get all the documents with the matching distance for each word pairs.
|
// Get all the documents with the matching distance for each word pairs.
|
||||||
let mut bitmaps = Vec::with_capacity(winsize.pow(2));
|
let mut bitmaps = Vec::with_capacity(winsize.pow(2));
|
||||||
for (offset, s1) in win.iter().enumerate() {
|
for (offset, s1) in win.iter().enumerate().filter_map(|(index, word)| {
|
||||||
for (dist, s2) in win.iter().skip(offset + 1).enumerate() {
|
if let Some(word) = word {
|
||||||
|
Some((index, word))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}) {
|
||||||
|
for (dist, s2) in win.iter().skip(offset + 1).enumerate().filter_map(|(index, word)| {
|
||||||
|
if let Some(word) = word {
|
||||||
|
Some((index, word))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}) {
|
||||||
if dist == 0 {
|
if dist == 0 {
|
||||||
match ctx.word_pair_proximity_docids(s1, s2, 1)? {
|
match ctx.word_pair_proximity_docids(s1, s2, 1)? {
|
||||||
Some(m) => bitmaps.push(m),
|
Some(m) => bitmaps.push(m),
|
||||||
|
@ -4,6 +4,7 @@ use std::mem::take;
|
|||||||
|
|
||||||
use log::debug;
|
use log::debug;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
use slice_group_by::GroupBy;
|
||||||
|
|
||||||
use super::{
|
use super::{
|
||||||
query_docids, query_pair_proximity_docids, resolve_phrase, resolve_query_tree, Context,
|
query_docids, query_pair_proximity_docids, resolve_phrase, resolve_query_tree, Context,
|
||||||
@ -187,10 +188,15 @@ fn resolve_candidates<'t>(
|
|||||||
Phrase(words) => {
|
Phrase(words) => {
|
||||||
if proximity == 0 {
|
if proximity == 0 {
|
||||||
let most_left = words
|
let most_left = words
|
||||||
.first()
|
.iter()
|
||||||
|
.filter_map(|o| o.as_ref())
|
||||||
|
.next()
|
||||||
.map(|w| Query { prefix: false, kind: QueryKind::exact(w.clone()) });
|
.map(|w| Query { prefix: false, kind: QueryKind::exact(w.clone()) });
|
||||||
let most_right = words
|
let most_right = words
|
||||||
.last()
|
.iter()
|
||||||
|
.rev()
|
||||||
|
.filter_map(|o| o.as_ref())
|
||||||
|
.next()
|
||||||
.map(|w| Query { prefix: false, kind: QueryKind::exact(w.clone()) });
|
.map(|w| Query { prefix: false, kind: QueryKind::exact(w.clone()) });
|
||||||
|
|
||||||
match (most_left, most_right) {
|
match (most_left, most_right) {
|
||||||
@ -473,14 +479,34 @@ fn resolve_plane_sweep_candidates(
|
|||||||
}
|
}
|
||||||
Phrase(words) => {
|
Phrase(words) => {
|
||||||
let mut groups_positions = Vec::with_capacity(words.len());
|
let mut groups_positions = Vec::with_capacity(words.len());
|
||||||
for word in words {
|
|
||||||
let positions = match words_positions.get(word) {
|
// group stop_words together.
|
||||||
Some(positions) => positions.iter().map(|p| (p, 0, p)).collect(),
|
for words in words.linear_group_by_key(Option::is_none) {
|
||||||
None => return Ok(vec![]),
|
// skip if it's a group of stop words.
|
||||||
};
|
if matches!(words.first(), None | Some(None)) {
|
||||||
groups_positions.push(positions);
|
continue;
|
||||||
|
}
|
||||||
|
// make a consecutive plane-sweep on the subgroup of words.
|
||||||
|
let mut subgroup = Vec::with_capacity(words.len());
|
||||||
|
for word in words.into_iter().map(|w| w.as_deref().unwrap()) {
|
||||||
|
match words_positions.get(word) {
|
||||||
|
Some(positions) => {
|
||||||
|
subgroup.push(positions.iter().map(|p| (p, 0, p)).collect())
|
||||||
|
}
|
||||||
|
None => return Ok(vec![]),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
match subgroup.len() {
|
||||||
|
0 => {}
|
||||||
|
1 => groups_positions.push(subgroup.pop().unwrap()),
|
||||||
|
_ => groups_positions.push(plane_sweep(subgroup, true)?),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
match groups_positions.len() {
|
||||||
|
0 => vec![],
|
||||||
|
1 => groups_positions.pop().unwrap(),
|
||||||
|
_ => plane_sweep(groups_positions, false)?,
|
||||||
}
|
}
|
||||||
plane_sweep(groups_positions, true)?
|
|
||||||
}
|
}
|
||||||
Or(_, ops) => {
|
Or(_, ops) => {
|
||||||
let mut result = Vec::new();
|
let mut result = Vec::new();
|
||||||
|
@ -9,6 +9,7 @@ use super::{
|
|||||||
query_docids, resolve_query_tree, Candidates, Context, Criterion, CriterionParameters,
|
query_docids, resolve_query_tree, Candidates, Context, Criterion, CriterionParameters,
|
||||||
CriterionResult,
|
CriterionResult,
|
||||||
};
|
};
|
||||||
|
use crate::search::criteria::resolve_phrase;
|
||||||
use crate::search::query_tree::{maximum_typo, Operation, Query, QueryKind};
|
use crate::search::query_tree::{maximum_typo, Operation, Query, QueryKind};
|
||||||
use crate::search::{word_derivations, WordDerivationsCache};
|
use crate::search::{word_derivations, WordDerivationsCache};
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
@ -256,27 +257,7 @@ fn resolve_candidates<'t>(
|
|||||||
|
|
||||||
match query_tree {
|
match query_tree {
|
||||||
And(ops) => mdfs(ctx, ops, number_typos, cache, wdcache),
|
And(ops) => mdfs(ctx, ops, number_typos, cache, wdcache),
|
||||||
Phrase(words) => {
|
Phrase(words) => resolve_phrase(ctx, words),
|
||||||
let mut candidates = RoaringBitmap::new();
|
|
||||||
let mut first_loop = true;
|
|
||||||
for slice in words.windows(2) {
|
|
||||||
let (left, right) = (&slice[0], &slice[1]);
|
|
||||||
match ctx.word_pair_proximity_docids(left, right, 1)? {
|
|
||||||
Some(pair_docids) => {
|
|
||||||
if pair_docids.is_empty() {
|
|
||||||
return Ok(RoaringBitmap::new());
|
|
||||||
} else if first_loop {
|
|
||||||
candidates = pair_docids;
|
|
||||||
first_loop = false;
|
|
||||||
} else {
|
|
||||||
candidates &= pair_docids;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
None => return Ok(RoaringBitmap::new()),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(candidates)
|
|
||||||
}
|
|
||||||
Or(_, ops) => {
|
Or(_, ops) => {
|
||||||
let mut candidates = RoaringBitmap::new();
|
let mut candidates = RoaringBitmap::new();
|
||||||
for op in ops {
|
for op in ops {
|
||||||
|
@ -4,7 +4,6 @@ use std::{fmt, mem};
|
|||||||
|
|
||||||
use charabia::classifier::ClassifiedTokenIter;
|
use charabia::classifier::ClassifiedTokenIter;
|
||||||
use charabia::{SeparatorKind, TokenKind};
|
use charabia::{SeparatorKind, TokenKind};
|
||||||
use fst::Set;
|
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use slice_group_by::GroupBy;
|
use slice_group_by::GroupBy;
|
||||||
|
|
||||||
@ -18,8 +17,9 @@ type IsPrefix = bool;
|
|||||||
#[derive(Clone, PartialEq, Eq, Hash)]
|
#[derive(Clone, PartialEq, Eq, Hash)]
|
||||||
pub enum Operation {
|
pub enum Operation {
|
||||||
And(Vec<Operation>),
|
And(Vec<Operation>),
|
||||||
// serie of consecutive non prefix and exact words
|
// series of consecutive non prefix and exact words
|
||||||
Phrase(Vec<String>),
|
// `None` means a stop word.
|
||||||
|
Phrase(Vec<Option<String>>),
|
||||||
Or(IsOptionalWord, Vec<Operation>),
|
Or(IsOptionalWord, Vec<Operation>),
|
||||||
Query(Query),
|
Query(Query),
|
||||||
}
|
}
|
||||||
@ -75,9 +75,13 @@ impl Operation {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn phrase(mut words: Vec<String>) -> Self {
|
fn phrase(mut words: Vec<Option<String>>) -> Self {
|
||||||
if words.len() == 1 {
|
if words.len() == 1 {
|
||||||
Self::Query(Query { prefix: false, kind: QueryKind::exact(words.pop().unwrap()) })
|
if let Some(word) = words.pop().unwrap() {
|
||||||
|
Self::Query(Query { prefix: false, kind: QueryKind::exact(word) })
|
||||||
|
} else {
|
||||||
|
Self::Phrase(words)
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
Self::Phrase(words)
|
Self::Phrase(words)
|
||||||
}
|
}
|
||||||
@ -264,8 +268,7 @@ impl<'a> QueryTreeBuilder<'a> {
|
|||||||
&self,
|
&self,
|
||||||
query: ClassifiedTokenIter<A>,
|
query: ClassifiedTokenIter<A>,
|
||||||
) -> Result<Option<(Operation, PrimitiveQuery, MatchingWords)>> {
|
) -> Result<Option<(Operation, PrimitiveQuery, MatchingWords)>> {
|
||||||
let stop_words = self.index.stop_words(self.rtxn)?;
|
let primitive_query = create_primitive_query(query, self.words_limit);
|
||||||
let primitive_query = create_primitive_query(query, stop_words, self.words_limit);
|
|
||||||
if !primitive_query.is_empty() {
|
if !primitive_query.is_empty() {
|
||||||
let qt = create_query_tree(
|
let qt = create_query_tree(
|
||||||
self,
|
self,
|
||||||
@ -370,7 +373,10 @@ fn create_query_tree(
|
|||||||
PrimitiveQueryPart::Word(word, prefix) => {
|
PrimitiveQueryPart::Word(word, prefix) => {
|
||||||
let mut children = synonyms(ctx, &[&word])?.unwrap_or_default();
|
let mut children = synonyms(ctx, &[&word])?.unwrap_or_default();
|
||||||
if let Some((left, right)) = split_best_frequency(ctx, &word)? {
|
if let Some((left, right)) = split_best_frequency(ctx, &word)? {
|
||||||
children.push(Operation::Phrase(vec![left.to_string(), right.to_string()]));
|
children.push(Operation::Phrase(vec![
|
||||||
|
Some(left.to_string()),
|
||||||
|
Some(right.to_string()),
|
||||||
|
]));
|
||||||
}
|
}
|
||||||
let (word_len_one_typo, word_len_two_typo) = ctx.min_word_len_for_typo()?;
|
let (word_len_one_typo, word_len_two_typo) = ctx.min_word_len_for_typo()?;
|
||||||
let exact_words = ctx.exact_words();
|
let exact_words = ctx.exact_words();
|
||||||
@ -583,7 +589,11 @@ fn create_matching_words(
|
|||||||
PrimitiveQueryPart::Phrase(words) => {
|
PrimitiveQueryPart::Phrase(words) => {
|
||||||
let ids: Vec<_> =
|
let ids: Vec<_> =
|
||||||
(0..words.len()).into_iter().map(|i| id + i as PrimitiveWordId).collect();
|
(0..words.len()).into_iter().map(|i| id + i as PrimitiveWordId).collect();
|
||||||
let words = words.into_iter().map(|w| MatchingWord::new(w, 0, false)).collect();
|
let words = words
|
||||||
|
.into_iter()
|
||||||
|
.filter_map(|w| w)
|
||||||
|
.map(|w| MatchingWord::new(w, 0, false))
|
||||||
|
.collect();
|
||||||
matching_words.push((words, ids));
|
matching_words.push((words, ids));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -685,7 +695,7 @@ pub type PrimitiveQuery = Vec<PrimitiveQueryPart>;
|
|||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub enum PrimitiveQueryPart {
|
pub enum PrimitiveQueryPart {
|
||||||
Phrase(Vec<String>),
|
Phrase(Vec<Option<String>>),
|
||||||
Word(String, IsPrefix),
|
Word(String, IsPrefix),
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -710,7 +720,6 @@ impl PrimitiveQueryPart {
|
|||||||
/// the primitive query is an intermediate state to build the query tree.
|
/// the primitive query is an intermediate state to build the query tree.
|
||||||
fn create_primitive_query<A>(
|
fn create_primitive_query<A>(
|
||||||
query: ClassifiedTokenIter<A>,
|
query: ClassifiedTokenIter<A>,
|
||||||
stop_words: Option<Set<&[u8]>>,
|
|
||||||
words_limit: Option<usize>,
|
words_limit: Option<usize>,
|
||||||
) -> PrimitiveQuery
|
) -> PrimitiveQuery
|
||||||
where
|
where
|
||||||
@ -735,9 +744,14 @@ where
|
|||||||
// 2. if the word is not the last token of the query and is not a stop_word we push it as a non-prefix word,
|
// 2. if the word is not the last token of the query and is not a stop_word we push it as a non-prefix word,
|
||||||
// 3. if the word is the last token of the query we push it as a prefix word.
|
// 3. if the word is the last token of the query we push it as a prefix word.
|
||||||
if quoted {
|
if quoted {
|
||||||
phrase.push(token.lemma().to_string());
|
if let TokenKind::StopWord = token.kind {
|
||||||
|
phrase.push(None)
|
||||||
|
} else {
|
||||||
|
phrase.push(Some(token.lemma().to_string()));
|
||||||
|
}
|
||||||
} else if peekable.peek().is_some() {
|
} else if peekable.peek().is_some() {
|
||||||
if !stop_words.as_ref().map_or(false, |swords| swords.contains(token.lemma())) {
|
if let TokenKind::StopWord = token.kind {
|
||||||
|
} else {
|
||||||
primitive_query
|
primitive_query
|
||||||
.push(PrimitiveQueryPart::Word(token.lemma().to_string(), false));
|
.push(PrimitiveQueryPart::Word(token.lemma().to_string(), false));
|
||||||
}
|
}
|
||||||
@ -820,7 +834,7 @@ mod test {
|
|||||||
words_limit: Option<usize>,
|
words_limit: Option<usize>,
|
||||||
query: ClassifiedTokenIter<A>,
|
query: ClassifiedTokenIter<A>,
|
||||||
) -> Result<Option<(Operation, PrimitiveQuery)>> {
|
) -> Result<Option<(Operation, PrimitiveQuery)>> {
|
||||||
let primitive_query = create_primitive_query(query, None, words_limit);
|
let primitive_query = create_primitive_query(query, words_limit);
|
||||||
if !primitive_query.is_empty() {
|
if !primitive_query.is_empty() {
|
||||||
let qt = create_query_tree(
|
let qt = create_query_tree(
|
||||||
self,
|
self,
|
||||||
@ -1065,7 +1079,7 @@ mod test {
|
|||||||
OR
|
OR
|
||||||
AND
|
AND
|
||||||
OR
|
OR
|
||||||
PHRASE ["word", "split"]
|
PHRASE [Some("word"), Some("split")]
|
||||||
Tolerant { word: "wordsplit", max typo: 2 }
|
Tolerant { word: "wordsplit", max typo: 2 }
|
||||||
Exact { word: "fish" }
|
Exact { word: "fish" }
|
||||||
Tolerant { word: "wordsplitfish", max typo: 1 }
|
Tolerant { word: "wordsplitfish", max typo: 1 }
|
||||||
@ -1084,7 +1098,7 @@ mod test {
|
|||||||
|
|
||||||
insta::assert_debug_snapshot!(query_tree, @r###"
|
insta::assert_debug_snapshot!(query_tree, @r###"
|
||||||
OR
|
OR
|
||||||
PHRASE ["quickbrown", "fox"]
|
PHRASE [Some("quickbrown"), Some("fox")]
|
||||||
PrefixTolerant { word: "quickbrownfox", max typo: 2 }
|
PrefixTolerant { word: "quickbrownfox", max typo: 2 }
|
||||||
"###);
|
"###);
|
||||||
}
|
}
|
||||||
@ -1101,7 +1115,7 @@ mod test {
|
|||||||
|
|
||||||
insta::assert_debug_snapshot!(query_tree, @r###"
|
insta::assert_debug_snapshot!(query_tree, @r###"
|
||||||
AND
|
AND
|
||||||
PHRASE ["hey", "friends"]
|
PHRASE [Some("hey"), Some("friends")]
|
||||||
Exact { word: "wooop" }
|
Exact { word: "wooop" }
|
||||||
"###);
|
"###);
|
||||||
}
|
}
|
||||||
@ -1138,8 +1152,8 @@ mod test {
|
|||||||
|
|
||||||
insta::assert_debug_snapshot!(query_tree, @r###"
|
insta::assert_debug_snapshot!(query_tree, @r###"
|
||||||
AND
|
AND
|
||||||
PHRASE ["hey", "friends"]
|
PHRASE [Some("hey"), Some("friends")]
|
||||||
PHRASE ["wooop", "wooop"]
|
PHRASE [Some("wooop"), Some("wooop")]
|
||||||
"###);
|
"###);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1187,7 +1201,7 @@ mod test {
|
|||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
insta::assert_debug_snapshot!(query_tree, @r###"
|
insta::assert_debug_snapshot!(query_tree, @r###"
|
||||||
PHRASE ["hey", "my"]
|
PHRASE [Some("hey"), Some("my")]
|
||||||
"###);
|
"###);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1252,7 +1266,7 @@ mod test {
|
|||||||
|
|
||||||
insta::assert_debug_snapshot!(query_tree, @r###"
|
insta::assert_debug_snapshot!(query_tree, @r###"
|
||||||
AND
|
AND
|
||||||
PHRASE ["hey", "my"]
|
PHRASE [Some("hey"), Some("my")]
|
||||||
Exact { word: "good" }
|
Exact { word: "good" }
|
||||||
"###);
|
"###);
|
||||||
}
|
}
|
||||||
|
@ -15,6 +15,7 @@ use slice_group_by::GroupBy;
|
|||||||
mod distinct;
|
mod distinct;
|
||||||
mod facet_distribution;
|
mod facet_distribution;
|
||||||
mod filters;
|
mod filters;
|
||||||
|
mod phrase_search;
|
||||||
mod query_criteria;
|
mod query_criteria;
|
||||||
mod sort;
|
mod sort;
|
||||||
mod typo_tolerance;
|
mod typo_tolerance;
|
||||||
|
56
milli/tests/search/phrase_search.rs
Normal file
56
milli/tests/search/phrase_search.rs
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
use milli::update::{IndexerConfig, Settings};
|
||||||
|
use milli::{Criterion, Index, Search, TermsMatchingStrategy};
|
||||||
|
|
||||||
|
use crate::search::Criterion::{Attribute, Exactness, Proximity};
|
||||||
|
|
||||||
|
fn set_stop_words(index: &Index, stop_words: &[&str]) {
|
||||||
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
|
let config = IndexerConfig::default();
|
||||||
|
|
||||||
|
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||||
|
let stop_words = stop_words.into_iter().map(|s| s.to_string()).collect();
|
||||||
|
builder.set_stop_words(stop_words);
|
||||||
|
builder.execute(|_| (), || false).unwrap();
|
||||||
|
wtxn.commit().unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
fn test_phrase_search_with_stop_words_given_criteria(criteria: &[Criterion]) {
|
||||||
|
let index = super::setup_search_index_with_criteria(&criteria);
|
||||||
|
|
||||||
|
// Add stop_words
|
||||||
|
set_stop_words(&index, &["a", "an", "the", "of"]);
|
||||||
|
|
||||||
|
// Phrase search containing stop words
|
||||||
|
let txn = index.read_txn().unwrap();
|
||||||
|
|
||||||
|
let mut search = Search::new(&txn, &index);
|
||||||
|
search.query("\"the use of force\"");
|
||||||
|
search.limit(10);
|
||||||
|
search.authorize_typos(false);
|
||||||
|
search.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||||
|
|
||||||
|
let result = search.execute().unwrap();
|
||||||
|
// 1 document should match
|
||||||
|
assert_eq!(result.documents_ids.len(), 1);
|
||||||
|
|
||||||
|
// test for a single stop word only, no other search terms
|
||||||
|
let mut search = Search::new(&txn, &index);
|
||||||
|
search.query("\"the\"");
|
||||||
|
search.limit(10);
|
||||||
|
search.authorize_typos(false);
|
||||||
|
search.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||||
|
let result = search.execute().unwrap();
|
||||||
|
assert_eq!(result.documents_ids.len(), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_phrase_search_with_stop_words_no_criteria() {
|
||||||
|
let criteria = [];
|
||||||
|
test_phrase_search_with_stop_words_given_criteria(&criteria);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_phrase_search_with_stop_words_all_criteria() {
|
||||||
|
let criteria = [Proximity, Attribute, Exactness];
|
||||||
|
test_phrase_search_with_stop_words_given_criteria(&criteria);
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user