mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-22 21:04:27 +01:00
Merge #598
598: Matching query terms policy r=Kerollmops a=ManyTheFish ## Summary Implement several optional words strategy. ## Content Replace `optional_words` boolean with an enum containing several term matching strategies: ```rust pub enum TermsMatchingStrategy { // remove last word first Last, // remove first word first First, // remove more frequent word first Frequency, // remove smallest word first Size, // only one of the word is mandatory Any, // all words are mandatory All, } ``` All strategies implemented during the prototype are kept, but only `Last` and `All` will be published by Meilisearch in the `v0.29.0` release. ## Related spec: https://github.com/meilisearch/specifications/pull/173 prototype discussion: https://github.com/meilisearch/meilisearch/discussions/2639#discussioncomment-3447699 Co-authored-by: ManyTheFish <many@meilisearch.com>
This commit is contained in:
commit
18886dc6b7
@ -11,7 +11,7 @@ use milli::heed::EnvOpenOptions;
|
|||||||
use milli::update::{
|
use milli::update::{
|
||||||
IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings,
|
IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings,
|
||||||
};
|
};
|
||||||
use milli::{Filter, Index, Object};
|
use milli::{Filter, Index, Object, TermsMatchingStrategy};
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
|
|
||||||
pub struct Conf<'a> {
|
pub struct Conf<'a> {
|
||||||
@ -119,7 +119,7 @@ pub fn run_benches(c: &mut criterion::Criterion, confs: &[Conf]) {
|
|||||||
b.iter(|| {
|
b.iter(|| {
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
let mut search = index.search(&rtxn);
|
let mut search = index.search(&rtxn);
|
||||||
search.query(query).optional_words(conf.optional_words);
|
search.query(query).terms_matching_strategy(TermsMatchingStrategy::default());
|
||||||
if let Some(filter) = conf.filter {
|
if let Some(filter) = conf.filter {
|
||||||
let filter = Filter::from_str(filter).unwrap().unwrap();
|
let filter = Filter::from_str(filter).unwrap().unwrap();
|
||||||
search.filter(filter);
|
search.filter(filter);
|
||||||
|
@ -42,7 +42,7 @@ pub use self::heed_codec::{
|
|||||||
pub use self::index::Index;
|
pub use self::index::Index;
|
||||||
pub use self::search::{
|
pub use self::search::{
|
||||||
FacetDistribution, Filter, FormatOptions, MatchBounds, MatcherBuilder, MatchingWord,
|
FacetDistribution, Filter, FormatOptions, MatchBounds, MatcherBuilder, MatchingWord,
|
||||||
MatchingWords, Search, SearchResult, DEFAULT_VALUES_PER_FACET,
|
MatchingWords, Search, SearchResult, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub type Result<T> = std::result::Result<T, error::Error>;
|
pub type Result<T> = std::result::Result<T, error::Error>;
|
||||||
|
@ -44,7 +44,7 @@ pub struct Search<'a> {
|
|||||||
offset: usize,
|
offset: usize,
|
||||||
limit: usize,
|
limit: usize,
|
||||||
sort_criteria: Option<Vec<AscDesc>>,
|
sort_criteria: Option<Vec<AscDesc>>,
|
||||||
optional_words: bool,
|
terms_matching_strategy: TermsMatchingStrategy,
|
||||||
authorize_typos: bool,
|
authorize_typos: bool,
|
||||||
words_limit: usize,
|
words_limit: usize,
|
||||||
rtxn: &'a heed::RoTxn<'a>,
|
rtxn: &'a heed::RoTxn<'a>,
|
||||||
@ -59,7 +59,7 @@ impl<'a> Search<'a> {
|
|||||||
offset: 0,
|
offset: 0,
|
||||||
limit: 20,
|
limit: 20,
|
||||||
sort_criteria: None,
|
sort_criteria: None,
|
||||||
optional_words: true,
|
terms_matching_strategy: TermsMatchingStrategy::default(),
|
||||||
authorize_typos: true,
|
authorize_typos: true,
|
||||||
words_limit: 10,
|
words_limit: 10,
|
||||||
rtxn,
|
rtxn,
|
||||||
@ -87,8 +87,8 @@ impl<'a> Search<'a> {
|
|||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn optional_words(&mut self, value: bool) -> &mut Search<'a> {
|
pub fn terms_matching_strategy(&mut self, value: TermsMatchingStrategy) -> &mut Search<'a> {
|
||||||
self.optional_words = value;
|
self.terms_matching_strategy = value;
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -119,7 +119,7 @@ impl<'a> Search<'a> {
|
|||||||
let (query_tree, primitive_query, matching_words) = match self.query.as_ref() {
|
let (query_tree, primitive_query, matching_words) = match self.query.as_ref() {
|
||||||
Some(query) => {
|
Some(query) => {
|
||||||
let mut builder = QueryTreeBuilder::new(self.rtxn, self.index)?;
|
let mut builder = QueryTreeBuilder::new(self.rtxn, self.index)?;
|
||||||
builder.optional_words(self.optional_words);
|
builder.terms_matching_strategy(self.terms_matching_strategy);
|
||||||
|
|
||||||
builder.authorize_typos(self.is_typo_authorized()?);
|
builder.authorize_typos(self.is_typo_authorized()?);
|
||||||
|
|
||||||
@ -259,7 +259,7 @@ impl fmt::Debug for Search<'_> {
|
|||||||
offset,
|
offset,
|
||||||
limit,
|
limit,
|
||||||
sort_criteria,
|
sort_criteria,
|
||||||
optional_words,
|
terms_matching_strategy,
|
||||||
authorize_typos,
|
authorize_typos,
|
||||||
words_limit,
|
words_limit,
|
||||||
rtxn: _,
|
rtxn: _,
|
||||||
@ -271,7 +271,7 @@ impl fmt::Debug for Search<'_> {
|
|||||||
.field("offset", offset)
|
.field("offset", offset)
|
||||||
.field("limit", limit)
|
.field("limit", limit)
|
||||||
.field("sort_criteria", sort_criteria)
|
.field("sort_criteria", sort_criteria)
|
||||||
.field("optional_words", optional_words)
|
.field("terms_matching_strategy", terms_matching_strategy)
|
||||||
.field("authorize_typos", authorize_typos)
|
.field("authorize_typos", authorize_typos)
|
||||||
.field("words_limit", words_limit)
|
.field("words_limit", words_limit)
|
||||||
.finish()
|
.finish()
|
||||||
@ -286,6 +286,28 @@ pub struct SearchResult {
|
|||||||
pub documents_ids: Vec<DocumentId>,
|
pub documents_ids: Vec<DocumentId>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
pub enum TermsMatchingStrategy {
|
||||||
|
// remove last word first
|
||||||
|
Last,
|
||||||
|
// remove first word first
|
||||||
|
First,
|
||||||
|
// remove more frequent word first
|
||||||
|
Frequency,
|
||||||
|
// remove smallest word first
|
||||||
|
Size,
|
||||||
|
// only one of the word is mandatory
|
||||||
|
Any,
|
||||||
|
// all words are mandatory
|
||||||
|
All,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for TermsMatchingStrategy {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self::Last
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub type WordDerivationsCache = HashMap<(String, bool, u8), Vec<(String, u8)>>;
|
pub type WordDerivationsCache = HashMap<(String, bool, u8), Vec<(String, u8)>>;
|
||||||
|
|
||||||
pub fn word_derivations<'c>(
|
pub fn word_derivations<'c>(
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
|
use std::cmp::min;
|
||||||
use std::{cmp, fmt, mem};
|
use std::{cmp, fmt, mem};
|
||||||
|
|
||||||
use charabia::classifier::ClassifiedTokenIter;
|
use charabia::classifier::ClassifiedTokenIter;
|
||||||
@ -8,6 +9,7 @@ use roaring::RoaringBitmap;
|
|||||||
use slice_group_by::GroupBy;
|
use slice_group_by::GroupBy;
|
||||||
|
|
||||||
use crate::search::matches::matching_words::{MatchingWord, PrimitiveWordId};
|
use crate::search::matches::matching_words::{MatchingWord, PrimitiveWordId};
|
||||||
|
use crate::search::TermsMatchingStrategy;
|
||||||
use crate::{Index, MatchingWords, Result};
|
use crate::{Index, MatchingWords, Result};
|
||||||
|
|
||||||
type IsOptionalWord = bool;
|
type IsOptionalWord = bool;
|
||||||
@ -62,6 +64,13 @@ impl Operation {
|
|||||||
if ops.len() == 1 {
|
if ops.len() == 1 {
|
||||||
ops.pop().unwrap()
|
ops.pop().unwrap()
|
||||||
} else {
|
} else {
|
||||||
|
let ops = ops
|
||||||
|
.into_iter()
|
||||||
|
.flat_map(|o| match o {
|
||||||
|
Operation::Or(wb, children) if wb == word_branch => children,
|
||||||
|
op => vec![op],
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
Self::Or(word_branch, ops)
|
Self::Or(word_branch, ops)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -153,7 +162,7 @@ trait Context {
|
|||||||
pub struct QueryTreeBuilder<'a> {
|
pub struct QueryTreeBuilder<'a> {
|
||||||
rtxn: &'a heed::RoTxn<'a>,
|
rtxn: &'a heed::RoTxn<'a>,
|
||||||
index: &'a Index,
|
index: &'a Index,
|
||||||
optional_words: bool,
|
terms_matching_strategy: TermsMatchingStrategy,
|
||||||
authorize_typos: bool,
|
authorize_typos: bool,
|
||||||
words_limit: Option<usize>,
|
words_limit: Option<usize>,
|
||||||
exact_words: Option<fst::Set<Cow<'a, [u8]>>>,
|
exact_words: Option<fst::Set<Cow<'a, [u8]>>>,
|
||||||
@ -190,19 +199,22 @@ impl<'a> QueryTreeBuilder<'a> {
|
|||||||
Ok(Self {
|
Ok(Self {
|
||||||
rtxn,
|
rtxn,
|
||||||
index,
|
index,
|
||||||
optional_words: true,
|
terms_matching_strategy: TermsMatchingStrategy::default(),
|
||||||
authorize_typos: true,
|
authorize_typos: true,
|
||||||
words_limit: None,
|
words_limit: None,
|
||||||
exact_words: index.exact_words(rtxn)?,
|
exact_words: index.exact_words(rtxn)?,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
/// if `optional_words` is set to `false` the query tree will be
|
/// if `terms_matching_strategy` is set to `All` the query tree will be
|
||||||
/// generated forcing all query words to be present in each matching documents
|
/// generated forcing all query words to be present in each matching documents
|
||||||
/// (the criterion `words` will be ignored).
|
/// (the criterion `words` will be ignored).
|
||||||
/// default value if not called: `true`
|
/// default value if not called: `Last`
|
||||||
pub fn optional_words(&mut self, optional_words: bool) -> &mut Self {
|
pub fn terms_matching_strategy(
|
||||||
self.optional_words = optional_words;
|
&mut self,
|
||||||
|
terms_matching_strategy: TermsMatchingStrategy,
|
||||||
|
) -> &mut Self {
|
||||||
|
self.terms_matching_strategy = terms_matching_strategy;
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -223,7 +235,7 @@ impl<'a> QueryTreeBuilder<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Build the query tree:
|
/// Build the query tree:
|
||||||
/// - if `optional_words` is set to `false` the query tree will be
|
/// - if `terms_matching_strategy` is set to `All` the query tree will be
|
||||||
/// generated forcing all query words to be present in each matching documents
|
/// generated forcing all query words to be present in each matching documents
|
||||||
/// (the criterion `words` will be ignored)
|
/// (the criterion `words` will be ignored)
|
||||||
/// - if `authorize_typos` is set to `false` the query tree will be generated
|
/// - if `authorize_typos` is set to `false` the query tree will be generated
|
||||||
@ -238,7 +250,7 @@ impl<'a> QueryTreeBuilder<'a> {
|
|||||||
if !primitive_query.is_empty() {
|
if !primitive_query.is_empty() {
|
||||||
let qt = create_query_tree(
|
let qt = create_query_tree(
|
||||||
self,
|
self,
|
||||||
self.optional_words,
|
self.terms_matching_strategy,
|
||||||
self.authorize_typos,
|
self.authorize_typos,
|
||||||
&primitive_query,
|
&primitive_query,
|
||||||
)?;
|
)?;
|
||||||
@ -323,7 +335,7 @@ fn synonyms(ctx: &impl Context, word: &[&str]) -> heed::Result<Option<Vec<Operat
|
|||||||
/// Main function that creates the final query tree from the primitive query.
|
/// Main function that creates the final query tree from the primitive query.
|
||||||
fn create_query_tree(
|
fn create_query_tree(
|
||||||
ctx: &impl Context,
|
ctx: &impl Context,
|
||||||
optional_words: bool,
|
terms_matching_strategy: TermsMatchingStrategy,
|
||||||
authorize_typos: bool,
|
authorize_typos: bool,
|
||||||
query: &[PrimitiveQueryPart],
|
query: &[PrimitiveQueryPart],
|
||||||
) -> Result<Operation> {
|
) -> Result<Operation> {
|
||||||
@ -363,6 +375,7 @@ fn create_query_tree(
|
|||||||
ctx: &impl Context,
|
ctx: &impl Context,
|
||||||
authorize_typos: bool,
|
authorize_typos: bool,
|
||||||
query: &[PrimitiveQueryPart],
|
query: &[PrimitiveQueryPart],
|
||||||
|
any_words: bool,
|
||||||
) -> Result<Operation> {
|
) -> Result<Operation> {
|
||||||
const MAX_NGRAM: usize = 3;
|
const MAX_NGRAM: usize = 3;
|
||||||
let mut op_children = Vec::new();
|
let mut op_children = Vec::new();
|
||||||
@ -415,57 +428,93 @@ fn create_query_tree(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if !is_last {
|
if !is_last {
|
||||||
let ngrams = ngrams(ctx, authorize_typos, tail)?;
|
let ngrams = ngrams(ctx, authorize_typos, tail, any_words)?;
|
||||||
and_op_children.push(ngrams);
|
and_op_children.push(ngrams);
|
||||||
}
|
}
|
||||||
or_op_children.push(Operation::and(and_op_children));
|
|
||||||
|
if any_words {
|
||||||
|
or_op_children.push(Operation::or(false, and_op_children));
|
||||||
|
} else {
|
||||||
|
or_op_children.push(Operation::and(and_op_children));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
op_children.push(Operation::or(false, or_op_children));
|
op_children.push(Operation::or(false, or_op_children));
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(Operation::and(op_children))
|
if any_words {
|
||||||
}
|
Ok(Operation::or(false, op_children))
|
||||||
|
} else {
|
||||||
/// Create a new branch removing the last non-phrase query parts.
|
Ok(Operation::and(op_children))
|
||||||
fn optional_word(
|
|
||||||
ctx: &impl Context,
|
|
||||||
authorize_typos: bool,
|
|
||||||
query: PrimitiveQuery,
|
|
||||||
) -> Result<Operation> {
|
|
||||||
let number_phrases = query.iter().filter(|p| p.is_phrase()).count();
|
|
||||||
let mut operation_children = Vec::new();
|
|
||||||
|
|
||||||
let start = number_phrases + (number_phrases == 0) as usize;
|
|
||||||
for len in start..=query.len() {
|
|
||||||
let mut word_count = len - number_phrases;
|
|
||||||
let query: Vec<_> = query
|
|
||||||
.iter()
|
|
||||||
.filter(|p| {
|
|
||||||
if p.is_phrase() {
|
|
||||||
true
|
|
||||||
} else if word_count != 0 {
|
|
||||||
word_count -= 1;
|
|
||||||
true
|
|
||||||
} else {
|
|
||||||
false
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.cloned()
|
|
||||||
.collect();
|
|
||||||
|
|
||||||
let ngrams = ngrams(ctx, authorize_typos, &query)?;
|
|
||||||
operation_children.push(ngrams);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(Operation::or(true, operation_children))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if optional_words {
|
let number_phrases = query.iter().filter(|p| p.is_phrase()).count();
|
||||||
optional_word(ctx, authorize_typos, query.to_vec())
|
let remove_count = query.len() - min(number_phrases, 1);
|
||||||
} else {
|
if remove_count == 0 {
|
||||||
ngrams(ctx, authorize_typos, query)
|
return ngrams(ctx, authorize_typos, query, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let mut operation_children = Vec::new();
|
||||||
|
let mut query = query.to_vec();
|
||||||
|
for _ in 0..remove_count {
|
||||||
|
let pos = match terms_matching_strategy {
|
||||||
|
TermsMatchingStrategy::All => return ngrams(ctx, authorize_typos, &query, false),
|
||||||
|
TermsMatchingStrategy::Any => {
|
||||||
|
let operation = Operation::Or(
|
||||||
|
true,
|
||||||
|
vec![
|
||||||
|
// branch allowing matching documents to contains any query word.
|
||||||
|
ngrams(ctx, authorize_typos, &query, true)?,
|
||||||
|
// branch forcing matching documents to contains all the query words,
|
||||||
|
// keeping this documents of the top of the resulted list.
|
||||||
|
ngrams(ctx, authorize_typos, &query, false)?,
|
||||||
|
],
|
||||||
|
);
|
||||||
|
|
||||||
|
return Ok(operation);
|
||||||
|
}
|
||||||
|
TermsMatchingStrategy::Last => query
|
||||||
|
.iter()
|
||||||
|
.enumerate()
|
||||||
|
.filter(|(_, part)| !part.is_phrase())
|
||||||
|
.last()
|
||||||
|
.map(|(pos, _)| pos),
|
||||||
|
TermsMatchingStrategy::First => {
|
||||||
|
query.iter().enumerate().find(|(_, part)| !part.is_phrase()).map(|(pos, _)| pos)
|
||||||
|
}
|
||||||
|
TermsMatchingStrategy::Size => query
|
||||||
|
.iter()
|
||||||
|
.enumerate()
|
||||||
|
.filter(|(_, part)| !part.is_phrase())
|
||||||
|
.min_by_key(|(_, part)| match part {
|
||||||
|
PrimitiveQueryPart::Word(s, _) => s.len(),
|
||||||
|
_ => unreachable!(),
|
||||||
|
})
|
||||||
|
.map(|(pos, _)| pos),
|
||||||
|
TermsMatchingStrategy::Frequency => query
|
||||||
|
.iter()
|
||||||
|
.enumerate()
|
||||||
|
.filter(|(_, part)| !part.is_phrase())
|
||||||
|
.max_by_key(|(_, part)| match part {
|
||||||
|
PrimitiveQueryPart::Word(s, _) => {
|
||||||
|
ctx.word_documents_count(s).unwrap_or_default().unwrap_or(u64::max_value())
|
||||||
|
}
|
||||||
|
_ => unreachable!(),
|
||||||
|
})
|
||||||
|
.map(|(pos, _)| pos),
|
||||||
|
};
|
||||||
|
|
||||||
|
// compute and push the current branch on the front
|
||||||
|
operation_children.insert(0, ngrams(ctx, authorize_typos, &query, false)?);
|
||||||
|
// remove word from query before creating an new branch
|
||||||
|
match pos {
|
||||||
|
Some(pos) => query.remove(pos),
|
||||||
|
None => break,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(Operation::or(true, operation_children))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Main function that matchings words used for crop and highlight.
|
/// Main function that matchings words used for crop and highlight.
|
||||||
@ -750,15 +799,19 @@ mod test {
|
|||||||
impl TestContext {
|
impl TestContext {
|
||||||
fn build<A: AsRef<[u8]>>(
|
fn build<A: AsRef<[u8]>>(
|
||||||
&self,
|
&self,
|
||||||
optional_words: bool,
|
terms_matching_strategy: TermsMatchingStrategy,
|
||||||
authorize_typos: bool,
|
authorize_typos: bool,
|
||||||
words_limit: Option<usize>,
|
words_limit: Option<usize>,
|
||||||
query: ClassifiedTokenIter<A>,
|
query: ClassifiedTokenIter<A>,
|
||||||
) -> Result<Option<(Operation, PrimitiveQuery)>> {
|
) -> Result<Option<(Operation, PrimitiveQuery)>> {
|
||||||
let primitive_query = create_primitive_query(query, None, words_limit);
|
let primitive_query = create_primitive_query(query, None, words_limit);
|
||||||
if !primitive_query.is_empty() {
|
if !primitive_query.is_empty() {
|
||||||
let qt =
|
let qt = create_query_tree(
|
||||||
create_query_tree(self, optional_words, authorize_typos, &primitive_query)?;
|
self,
|
||||||
|
terms_matching_strategy,
|
||||||
|
authorize_typos,
|
||||||
|
&primitive_query,
|
||||||
|
)?;
|
||||||
Ok(Some((qt, primitive_query)))
|
Ok(Some((qt, primitive_query)))
|
||||||
} else {
|
} else {
|
||||||
Ok(None)
|
Ok(None)
|
||||||
@ -852,8 +905,10 @@ mod test {
|
|||||||
let query = "hey friends";
|
let query = "hey friends";
|
||||||
let tokens = query.tokenize();
|
let tokens = query.tokenize();
|
||||||
|
|
||||||
let (query_tree, _) =
|
let (query_tree, _) = TestContext::default()
|
||||||
TestContext::default().build(false, true, None, tokens).unwrap().unwrap();
|
.build(TermsMatchingStrategy::All, true, None, tokens)
|
||||||
|
.unwrap()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
insta::assert_debug_snapshot!(query_tree, @r###"
|
insta::assert_debug_snapshot!(query_tree, @r###"
|
||||||
OR
|
OR
|
||||||
@ -869,8 +924,10 @@ mod test {
|
|||||||
let query = "hey friends ";
|
let query = "hey friends ";
|
||||||
let tokens = query.tokenize();
|
let tokens = query.tokenize();
|
||||||
|
|
||||||
let (query_tree, _) =
|
let (query_tree, _) = TestContext::default()
|
||||||
TestContext::default().build(false, true, None, tokens).unwrap().unwrap();
|
.build(TermsMatchingStrategy::All, true, None, tokens)
|
||||||
|
.unwrap()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
insta::assert_debug_snapshot!(query_tree, @r###"
|
insta::assert_debug_snapshot!(query_tree, @r###"
|
||||||
OR
|
OR
|
||||||
@ -886,8 +943,10 @@ mod test {
|
|||||||
let query = "hello world ";
|
let query = "hello world ";
|
||||||
let tokens = query.tokenize();
|
let tokens = query.tokenize();
|
||||||
|
|
||||||
let (query_tree, _) =
|
let (query_tree, _) = TestContext::default()
|
||||||
TestContext::default().build(false, true, None, tokens).unwrap().unwrap();
|
.build(TermsMatchingStrategy::All, true, None, tokens)
|
||||||
|
.unwrap()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
insta::assert_debug_snapshot!(query_tree, @r###"
|
insta::assert_debug_snapshot!(query_tree, @r###"
|
||||||
OR
|
OR
|
||||||
@ -911,8 +970,10 @@ mod test {
|
|||||||
let query = "new york city ";
|
let query = "new york city ";
|
||||||
let tokens = query.tokenize();
|
let tokens = query.tokenize();
|
||||||
|
|
||||||
let (query_tree, _) =
|
let (query_tree, _) = TestContext::default()
|
||||||
TestContext::default().build(false, true, None, tokens).unwrap().unwrap();
|
.build(TermsMatchingStrategy::All, true, None, tokens)
|
||||||
|
.unwrap()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
insta::assert_debug_snapshot!(query_tree, @r###"
|
insta::assert_debug_snapshot!(query_tree, @r###"
|
||||||
OR
|
OR
|
||||||
@ -932,12 +993,11 @@ mod test {
|
|||||||
Exact { word: "city" }
|
Exact { word: "city" }
|
||||||
Tolerant { word: "newyork", max typo: 1 }
|
Tolerant { word: "newyork", max typo: 1 }
|
||||||
Exact { word: "city" }
|
Exact { word: "city" }
|
||||||
OR
|
Exact { word: "nyc" }
|
||||||
Exact { word: "nyc" }
|
AND
|
||||||
AND
|
Exact { word: "new" }
|
||||||
Exact { word: "new" }
|
Exact { word: "york" }
|
||||||
Exact { word: "york" }
|
Tolerant { word: "newyorkcity", max typo: 1 }
|
||||||
Tolerant { word: "newyorkcity", max typo: 1 }
|
|
||||||
"###);
|
"###);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -946,8 +1006,10 @@ mod test {
|
|||||||
let query = "n grams ";
|
let query = "n grams ";
|
||||||
let tokens = query.tokenize();
|
let tokens = query.tokenize();
|
||||||
|
|
||||||
let (query_tree, _) =
|
let (query_tree, _) = TestContext::default()
|
||||||
TestContext::default().build(false, true, None, tokens).unwrap().unwrap();
|
.build(TermsMatchingStrategy::All, true, None, tokens)
|
||||||
|
.unwrap()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
insta::assert_debug_snapshot!(query_tree, @r###"
|
insta::assert_debug_snapshot!(query_tree, @r###"
|
||||||
OR
|
OR
|
||||||
@ -963,8 +1025,10 @@ mod test {
|
|||||||
let query = "wordsplit fish ";
|
let query = "wordsplit fish ";
|
||||||
let tokens = query.tokenize();
|
let tokens = query.tokenize();
|
||||||
|
|
||||||
let (query_tree, _) =
|
let (query_tree, _) = TestContext::default()
|
||||||
TestContext::default().build(false, true, None, tokens).unwrap().unwrap();
|
.build(TermsMatchingStrategy::All, true, None, tokens)
|
||||||
|
.unwrap()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
insta::assert_debug_snapshot!(query_tree, @r###"
|
insta::assert_debug_snapshot!(query_tree, @r###"
|
||||||
OR
|
OR
|
||||||
@ -982,8 +1046,10 @@ mod test {
|
|||||||
let query = "\"hey friends\" \" \" \"wooop";
|
let query = "\"hey friends\" \" \" \"wooop";
|
||||||
let tokens = query.tokenize();
|
let tokens = query.tokenize();
|
||||||
|
|
||||||
let (query_tree, _) =
|
let (query_tree, _) = TestContext::default()
|
||||||
TestContext::default().build(false, true, None, tokens).unwrap().unwrap();
|
.build(TermsMatchingStrategy::All, true, None, tokens)
|
||||||
|
.unwrap()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
insta::assert_debug_snapshot!(query_tree, @r###"
|
insta::assert_debug_snapshot!(query_tree, @r###"
|
||||||
AND
|
AND
|
||||||
@ -997,8 +1063,10 @@ mod test {
|
|||||||
let query = "\"hey friends. wooop wooop\"";
|
let query = "\"hey friends. wooop wooop\"";
|
||||||
let tokens = query.tokenize();
|
let tokens = query.tokenize();
|
||||||
|
|
||||||
let (query_tree, _) =
|
let (query_tree, _) = TestContext::default()
|
||||||
TestContext::default().build(false, true, None, tokens).unwrap().unwrap();
|
.build(TermsMatchingStrategy::All, true, None, tokens)
|
||||||
|
.unwrap()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
insta::assert_debug_snapshot!(query_tree, @r###"
|
insta::assert_debug_snapshot!(query_tree, @r###"
|
||||||
AND
|
AND
|
||||||
@ -1012,8 +1080,10 @@ mod test {
|
|||||||
let query = "hey my friend ";
|
let query = "hey my friend ";
|
||||||
let tokens = query.tokenize();
|
let tokens = query.tokenize();
|
||||||
|
|
||||||
let (query_tree, _) =
|
let (query_tree, _) = TestContext::default()
|
||||||
TestContext::default().build(true, true, None, tokens).unwrap().unwrap();
|
.build(TermsMatchingStrategy::default(), true, None, tokens)
|
||||||
|
.unwrap()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
insta::assert_debug_snapshot!(query_tree, @r###"
|
insta::assert_debug_snapshot!(query_tree, @r###"
|
||||||
OR(WORD)
|
OR(WORD)
|
||||||
@ -1043,8 +1113,10 @@ mod test {
|
|||||||
let query = "\"hey my\"";
|
let query = "\"hey my\"";
|
||||||
let tokens = query.tokenize();
|
let tokens = query.tokenize();
|
||||||
|
|
||||||
let (query_tree, _) =
|
let (query_tree, _) = TestContext::default()
|
||||||
TestContext::default().build(true, true, None, tokens).unwrap().unwrap();
|
.build(TermsMatchingStrategy::default(), true, None, tokens)
|
||||||
|
.unwrap()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
insta::assert_debug_snapshot!(query_tree, @r###"
|
insta::assert_debug_snapshot!(query_tree, @r###"
|
||||||
PHRASE ["hey", "my"]
|
PHRASE ["hey", "my"]
|
||||||
@ -1056,8 +1128,10 @@ mod test {
|
|||||||
let query = r#""hey" my good "friend""#;
|
let query = r#""hey" my good "friend""#;
|
||||||
let tokens = query.tokenize();
|
let tokens = query.tokenize();
|
||||||
|
|
||||||
let (query_tree, _) =
|
let (query_tree, _) = TestContext::default()
|
||||||
TestContext::default().build(true, true, None, tokens).unwrap().unwrap();
|
.build(TermsMatchingStrategy::default(), true, None, tokens)
|
||||||
|
.unwrap()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
insta::assert_debug_snapshot!(query_tree, @r###"
|
insta::assert_debug_snapshot!(query_tree, @r###"
|
||||||
OR(WORD)
|
OR(WORD)
|
||||||
@ -1084,8 +1158,10 @@ mod test {
|
|||||||
let query = "hey friends ";
|
let query = "hey friends ";
|
||||||
let tokens = query.tokenize();
|
let tokens = query.tokenize();
|
||||||
|
|
||||||
let (query_tree, _) =
|
let (query_tree, _) = TestContext::default()
|
||||||
TestContext::default().build(false, false, None, tokens).unwrap().unwrap();
|
.build(TermsMatchingStrategy::All, false, None, tokens)
|
||||||
|
.unwrap()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
insta::assert_debug_snapshot!(query_tree, @r###"
|
insta::assert_debug_snapshot!(query_tree, @r###"
|
||||||
OR
|
OR
|
||||||
@ -1101,8 +1177,10 @@ mod test {
|
|||||||
let query = "\"hey my\" good friend";
|
let query = "\"hey my\" good friend";
|
||||||
let tokens = query.tokenize();
|
let tokens = query.tokenize();
|
||||||
|
|
||||||
let (query_tree, _) =
|
let (query_tree, _) = TestContext::default()
|
||||||
TestContext::default().build(false, false, Some(2), tokens).unwrap().unwrap();
|
.build(TermsMatchingStrategy::All, false, Some(2), tokens)
|
||||||
|
.unwrap()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
insta::assert_debug_snapshot!(query_tree, @r###"
|
insta::assert_debug_snapshot!(query_tree, @r###"
|
||||||
AND
|
AND
|
||||||
@ -1145,7 +1223,8 @@ mod test {
|
|||||||
let exact_words = fst::Set::from_iter(Some("goodbye")).unwrap().into_fst().into_inner();
|
let exact_words = fst::Set::from_iter(Some("goodbye")).unwrap().into_fst().into_inner();
|
||||||
let exact_words = Some(fst::Set::new(exact_words).unwrap().map_data(Cow::Owned).unwrap());
|
let exact_words = Some(fst::Set::new(exact_words).unwrap().map_data(Cow::Owned).unwrap());
|
||||||
let context = TestContext { exact_words, ..Default::default() };
|
let context = TestContext { exact_words, ..Default::default() };
|
||||||
let (query_tree, _) = context.build(false, true, Some(2), tokens).unwrap().unwrap();
|
let (query_tree, _) =
|
||||||
|
context.build(TermsMatchingStrategy::All, true, Some(2), tokens).unwrap().unwrap();
|
||||||
|
|
||||||
assert!(matches!(
|
assert!(matches!(
|
||||||
query_tree,
|
query_tree,
|
||||||
|
@ -613,6 +613,7 @@ mod tests {
|
|||||||
use super::*;
|
use super::*;
|
||||||
use crate::documents::documents_batch_reader_from_objects;
|
use crate::documents::documents_batch_reader_from_objects;
|
||||||
use crate::index::tests::TempIndex;
|
use crate::index::tests::TempIndex;
|
||||||
|
use crate::search::TermsMatchingStrategy;
|
||||||
use crate::update::DeleteDocuments;
|
use crate::update::DeleteDocuments;
|
||||||
use crate::BEU16;
|
use crate::BEU16;
|
||||||
|
|
||||||
@ -1207,7 +1208,7 @@ mod tests {
|
|||||||
let mut search = crate::Search::new(&rtxn, &index);
|
let mut search = crate::Search::new(&rtxn, &index);
|
||||||
search.query("document");
|
search.query("document");
|
||||||
search.authorize_typos(true);
|
search.authorize_typos(true);
|
||||||
search.optional_words(true);
|
search.terms_matching_strategy(TermsMatchingStrategy::default());
|
||||||
// all documents should be returned
|
// all documents should be returned
|
||||||
let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
|
let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
|
||||||
assert_eq!(documents_ids.len(), 4);
|
assert_eq!(documents_ids.len(), 4);
|
||||||
@ -1313,7 +1314,7 @@ mod tests {
|
|||||||
let mut search = crate::Search::new(&rtxn, &index);
|
let mut search = crate::Search::new(&rtxn, &index);
|
||||||
search.query("document");
|
search.query("document");
|
||||||
search.authorize_typos(true);
|
search.authorize_typos(true);
|
||||||
search.optional_words(true);
|
search.terms_matching_strategy(TermsMatchingStrategy::default());
|
||||||
// all documents should be returned
|
// all documents should be returned
|
||||||
let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
|
let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
|
||||||
assert_eq!(documents_ids.len(), 4);
|
assert_eq!(documents_ids.len(), 4);
|
||||||
@ -1512,7 +1513,7 @@ mod tests {
|
|||||||
let mut search = crate::Search::new(&rtxn, &index);
|
let mut search = crate::Search::new(&rtxn, &index);
|
||||||
search.query("化妆包");
|
search.query("化妆包");
|
||||||
search.authorize_typos(true);
|
search.authorize_typos(true);
|
||||||
search.optional_words(true);
|
search.terms_matching_strategy(TermsMatchingStrategy::default());
|
||||||
|
|
||||||
// only 1 document should be returned
|
// only 1 document should be returned
|
||||||
let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
|
let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
|
||||||
|
@ -2,7 +2,7 @@ use std::collections::HashSet;
|
|||||||
|
|
||||||
use big_s::S;
|
use big_s::S;
|
||||||
use milli::update::Settings;
|
use milli::update::Settings;
|
||||||
use milli::{Criterion, Search, SearchResult};
|
use milli::{Criterion, Search, SearchResult, TermsMatchingStrategy};
|
||||||
use Criterion::*;
|
use Criterion::*;
|
||||||
|
|
||||||
use crate::search::{self, EXTERNAL_DOCUMENTS_IDS};
|
use crate::search::{self, EXTERNAL_DOCUMENTS_IDS};
|
||||||
@ -28,24 +28,25 @@ macro_rules! test_distinct {
|
|||||||
search.query(search::TEST_QUERY);
|
search.query(search::TEST_QUERY);
|
||||||
search.limit(EXTERNAL_DOCUMENTS_IDS.len());
|
search.limit(EXTERNAL_DOCUMENTS_IDS.len());
|
||||||
search.authorize_typos(true);
|
search.authorize_typos(true);
|
||||||
search.optional_words(true);
|
search.terms_matching_strategy(TermsMatchingStrategy::default());
|
||||||
|
|
||||||
let SearchResult { documents_ids, candidates, .. } = search.execute().unwrap();
|
let SearchResult { documents_ids, candidates, .. } = search.execute().unwrap();
|
||||||
|
|
||||||
assert_eq!(candidates.len(), $n_res);
|
assert_eq!(candidates.len(), $n_res);
|
||||||
|
|
||||||
let mut distinct_values = HashSet::new();
|
let mut distinct_values = HashSet::new();
|
||||||
let expected_external_ids: Vec<_> = search::expected_order(&criteria, true, true, &[])
|
let expected_external_ids: Vec<_> =
|
||||||
.into_iter()
|
search::expected_order(&criteria, true, TermsMatchingStrategy::default(), &[])
|
||||||
.filter_map(|d| {
|
.into_iter()
|
||||||
if distinct_values.contains(&d.$distinct) {
|
.filter_map(|d| {
|
||||||
None
|
if distinct_values.contains(&d.$distinct) {
|
||||||
} else {
|
None
|
||||||
distinct_values.insert(d.$distinct.to_owned());
|
} else {
|
||||||
Some(d.id)
|
distinct_values.insert(d.$distinct.to_owned());
|
||||||
}
|
Some(d.id)
|
||||||
})
|
}
|
||||||
.collect();
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
let documents_ids = search::internal_to_external_ids(&index, &documents_ids);
|
let documents_ids = search::internal_to_external_ids(&index, &documents_ids);
|
||||||
assert_eq!(documents_ids, expected_external_ids);
|
assert_eq!(documents_ids, expected_external_ids);
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
use either::{Either, Left, Right};
|
use either::{Either, Left, Right};
|
||||||
use milli::{Criterion, Filter, Search, SearchResult};
|
use milli::{Criterion, Filter, Search, SearchResult, TermsMatchingStrategy};
|
||||||
use Criterion::*;
|
use Criterion::*;
|
||||||
|
|
||||||
use crate::search::{self, EXTERNAL_DOCUMENTS_IDS};
|
use crate::search::{self, EXTERNAL_DOCUMENTS_IDS};
|
||||||
@ -19,16 +19,17 @@ macro_rules! test_filter {
|
|||||||
search.query(search::TEST_QUERY);
|
search.query(search::TEST_QUERY);
|
||||||
search.limit(EXTERNAL_DOCUMENTS_IDS.len());
|
search.limit(EXTERNAL_DOCUMENTS_IDS.len());
|
||||||
search.authorize_typos(true);
|
search.authorize_typos(true);
|
||||||
search.optional_words(true);
|
search.terms_matching_strategy(TermsMatchingStrategy::default());
|
||||||
search.filter(filter_conditions);
|
search.filter(filter_conditions);
|
||||||
|
|
||||||
let SearchResult { documents_ids, .. } = search.execute().unwrap();
|
let SearchResult { documents_ids, .. } = search.execute().unwrap();
|
||||||
|
|
||||||
let filtered_ids = search::expected_filtered_ids($filter);
|
let filtered_ids = search::expected_filtered_ids($filter);
|
||||||
let expected_external_ids: Vec<_> = search::expected_order(&criteria, true, true, &[])
|
let expected_external_ids: Vec<_> =
|
||||||
.into_iter()
|
search::expected_order(&criteria, true, TermsMatchingStrategy::default(), &[])
|
||||||
.filter_map(|d| if filtered_ids.contains(&d.id) { Some(d.id) } else { None })
|
.into_iter()
|
||||||
.collect();
|
.filter_map(|d| if filtered_ids.contains(&d.id) { Some(d.id) } else { None })
|
||||||
|
.collect();
|
||||||
|
|
||||||
let documents_ids = search::internal_to_external_ids(&index, &documents_ids);
|
let documents_ids = search::internal_to_external_ids(&index, &documents_ids);
|
||||||
assert_eq!(documents_ids, expected_external_ids);
|
assert_eq!(documents_ids, expected_external_ids);
|
||||||
|
@ -8,7 +8,7 @@ use heed::EnvOpenOptions;
|
|||||||
use maplit::{hashmap, hashset};
|
use maplit::{hashmap, hashset};
|
||||||
use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
||||||
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
|
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
|
||||||
use milli::{AscDesc, Criterion, DocumentId, Index, Member, Object};
|
use milli::{AscDesc, Criterion, DocumentId, Index, Member, Object, TermsMatchingStrategy};
|
||||||
use serde::{Deserialize, Deserializer};
|
use serde::{Deserialize, Deserializer};
|
||||||
use slice_group_by::GroupBy;
|
use slice_group_by::GroupBy;
|
||||||
|
|
||||||
@ -96,7 +96,7 @@ pub fn internal_to_external_ids(index: &Index, internal_ids: &[DocumentId]) -> V
|
|||||||
pub fn expected_order(
|
pub fn expected_order(
|
||||||
criteria: &[Criterion],
|
criteria: &[Criterion],
|
||||||
authorize_typo: bool,
|
authorize_typo: bool,
|
||||||
optional_words: bool,
|
optional_words: TermsMatchingStrategy,
|
||||||
sort_by: &[AscDesc],
|
sort_by: &[AscDesc],
|
||||||
) -> Vec<TestDocument> {
|
) -> Vec<TestDocument> {
|
||||||
let dataset =
|
let dataset =
|
||||||
@ -155,9 +155,9 @@ pub fn expected_order(
|
|||||||
groups = std::mem::take(&mut new_groups);
|
groups = std::mem::take(&mut new_groups);
|
||||||
}
|
}
|
||||||
|
|
||||||
if authorize_typo && optional_words {
|
if authorize_typo && optional_words == TermsMatchingStrategy::default() {
|
||||||
groups.into_iter().flatten().collect()
|
groups.into_iter().flatten().collect()
|
||||||
} else if optional_words {
|
} else if optional_words == TermsMatchingStrategy::default() {
|
||||||
groups.into_iter().flatten().filter(|d| d.typo_rank == 0).collect()
|
groups.into_iter().flatten().filter(|d| d.typo_rank == 0).collect()
|
||||||
} else if authorize_typo {
|
} else if authorize_typo {
|
||||||
groups.into_iter().flatten().filter(|d| d.word_rank == 0).collect()
|
groups.into_iter().flatten().filter(|d| d.word_rank == 0).collect()
|
||||||
|
@ -7,7 +7,7 @@ use itertools::Itertools;
|
|||||||
use maplit::hashset;
|
use maplit::hashset;
|
||||||
use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
||||||
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
|
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
|
||||||
use milli::{AscDesc, Criterion, Index, Member, Search, SearchResult};
|
use milli::{AscDesc, Criterion, Index, Member, Search, SearchResult, TermsMatchingStrategy};
|
||||||
use rand::Rng;
|
use rand::Rng;
|
||||||
use Criterion::*;
|
use Criterion::*;
|
||||||
|
|
||||||
@ -15,8 +15,8 @@ use crate::search::{self, EXTERNAL_DOCUMENTS_IDS};
|
|||||||
|
|
||||||
const ALLOW_TYPOS: bool = true;
|
const ALLOW_TYPOS: bool = true;
|
||||||
const DISALLOW_TYPOS: bool = false;
|
const DISALLOW_TYPOS: bool = false;
|
||||||
const ALLOW_OPTIONAL_WORDS: bool = true;
|
const ALLOW_OPTIONAL_WORDS: TermsMatchingStrategy = TermsMatchingStrategy::Last;
|
||||||
const DISALLOW_OPTIONAL_WORDS: bool = false;
|
const DISALLOW_OPTIONAL_WORDS: TermsMatchingStrategy = TermsMatchingStrategy::All;
|
||||||
const ASC_DESC_CANDIDATES_THRESHOLD: usize = 1000;
|
const ASC_DESC_CANDIDATES_THRESHOLD: usize = 1000;
|
||||||
|
|
||||||
macro_rules! test_criterion {
|
macro_rules! test_criterion {
|
||||||
@ -31,7 +31,7 @@ macro_rules! test_criterion {
|
|||||||
search.query(search::TEST_QUERY);
|
search.query(search::TEST_QUERY);
|
||||||
search.limit(EXTERNAL_DOCUMENTS_IDS.len());
|
search.limit(EXTERNAL_DOCUMENTS_IDS.len());
|
||||||
search.authorize_typos($authorize_typos);
|
search.authorize_typos($authorize_typos);
|
||||||
search.optional_words($optional_word);
|
search.terms_matching_strategy($optional_word);
|
||||||
search.sort_criteria($sort_criteria);
|
search.sort_criteria($sort_criteria);
|
||||||
|
|
||||||
let SearchResult { documents_ids, .. } = search.execute().unwrap();
|
let SearchResult { documents_ids, .. } = search.execute().unwrap();
|
||||||
@ -353,13 +353,13 @@ fn criteria_mixup() {
|
|||||||
let mut search = Search::new(&mut rtxn, &index);
|
let mut search = Search::new(&mut rtxn, &index);
|
||||||
search.query(search::TEST_QUERY);
|
search.query(search::TEST_QUERY);
|
||||||
search.limit(EXTERNAL_DOCUMENTS_IDS.len());
|
search.limit(EXTERNAL_DOCUMENTS_IDS.len());
|
||||||
search.optional_words(ALLOW_OPTIONAL_WORDS);
|
search.terms_matching_strategy(ALLOW_OPTIONAL_WORDS);
|
||||||
search.authorize_typos(ALLOW_TYPOS);
|
search.authorize_typos(ALLOW_TYPOS);
|
||||||
|
|
||||||
let SearchResult { documents_ids, .. } = search.execute().unwrap();
|
let SearchResult { documents_ids, .. } = search.execute().unwrap();
|
||||||
|
|
||||||
let expected_external_ids: Vec<_> =
|
let expected_external_ids: Vec<_> =
|
||||||
search::expected_order(&criteria, ALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, &[])
|
search::expected_order(&criteria, ALLOW_TYPOS, ALLOW_OPTIONAL_WORDS, &[])
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|d| d.id)
|
.map(|d| d.id)
|
||||||
.collect();
|
.collect();
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
use big_s::S;
|
use big_s::S;
|
||||||
use milli::Criterion::{Attribute, Exactness, Proximity, Typo, Words};
|
use milli::Criterion::{Attribute, Exactness, Proximity, Typo, Words};
|
||||||
use milli::{AscDesc, Error, Member, Search, UserError};
|
use milli::{AscDesc, Error, Member, Search, TermsMatchingStrategy, UserError};
|
||||||
|
|
||||||
use crate::search::{self, EXTERNAL_DOCUMENTS_IDS};
|
use crate::search::{self, EXTERNAL_DOCUMENTS_IDS};
|
||||||
|
|
||||||
@ -15,7 +15,7 @@ fn sort_ranking_rule_missing() {
|
|||||||
search.query(search::TEST_QUERY);
|
search.query(search::TEST_QUERY);
|
||||||
search.limit(EXTERNAL_DOCUMENTS_IDS.len());
|
search.limit(EXTERNAL_DOCUMENTS_IDS.len());
|
||||||
search.authorize_typos(true);
|
search.authorize_typos(true);
|
||||||
search.optional_words(true);
|
search.terms_matching_strategy(TermsMatchingStrategy::default());
|
||||||
search.sort_criteria(vec![AscDesc::Asc(Member::Field(S("tag")))]);
|
search.sort_criteria(vec![AscDesc::Asc(Member::Field(S("tag")))]);
|
||||||
|
|
||||||
let result = search.execute();
|
let result = search.execute();
|
||||||
|
@ -2,7 +2,7 @@ use std::collections::BTreeSet;
|
|||||||
|
|
||||||
use heed::EnvOpenOptions;
|
use heed::EnvOpenOptions;
|
||||||
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
|
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
|
||||||
use milli::{Criterion, Index, Search};
|
use milli::{Criterion, Index, Search, TermsMatchingStrategy};
|
||||||
use serde_json::json;
|
use serde_json::json;
|
||||||
use tempfile::tempdir;
|
use tempfile::tempdir;
|
||||||
use Criterion::*;
|
use Criterion::*;
|
||||||
@ -20,7 +20,7 @@ fn test_typo_tolerance_one_typo() {
|
|||||||
search.query("zeal");
|
search.query("zeal");
|
||||||
search.limit(10);
|
search.limit(10);
|
||||||
search.authorize_typos(true);
|
search.authorize_typos(true);
|
||||||
search.optional_words(true);
|
search.terms_matching_strategy(TermsMatchingStrategy::default());
|
||||||
|
|
||||||
let result = search.execute().unwrap();
|
let result = search.execute().unwrap();
|
||||||
assert_eq!(result.documents_ids.len(), 1);
|
assert_eq!(result.documents_ids.len(), 1);
|
||||||
@ -29,7 +29,7 @@ fn test_typo_tolerance_one_typo() {
|
|||||||
search.query("zean");
|
search.query("zean");
|
||||||
search.limit(10);
|
search.limit(10);
|
||||||
search.authorize_typos(true);
|
search.authorize_typos(true);
|
||||||
search.optional_words(true);
|
search.terms_matching_strategy(TermsMatchingStrategy::default());
|
||||||
|
|
||||||
let result = search.execute().unwrap();
|
let result = search.execute().unwrap();
|
||||||
assert_eq!(result.documents_ids.len(), 0);
|
assert_eq!(result.documents_ids.len(), 0);
|
||||||
@ -47,7 +47,7 @@ fn test_typo_tolerance_one_typo() {
|
|||||||
search.query("zean");
|
search.query("zean");
|
||||||
search.limit(10);
|
search.limit(10);
|
||||||
search.authorize_typos(true);
|
search.authorize_typos(true);
|
||||||
search.optional_words(true);
|
search.terms_matching_strategy(TermsMatchingStrategy::default());
|
||||||
|
|
||||||
let result = search.execute().unwrap();
|
let result = search.execute().unwrap();
|
||||||
assert_eq!(result.documents_ids.len(), 1);
|
assert_eq!(result.documents_ids.len(), 1);
|
||||||
@ -66,7 +66,7 @@ fn test_typo_tolerance_two_typo() {
|
|||||||
search.query("zealand");
|
search.query("zealand");
|
||||||
search.limit(10);
|
search.limit(10);
|
||||||
search.authorize_typos(true);
|
search.authorize_typos(true);
|
||||||
search.optional_words(true);
|
search.terms_matching_strategy(TermsMatchingStrategy::default());
|
||||||
|
|
||||||
let result = search.execute().unwrap();
|
let result = search.execute().unwrap();
|
||||||
assert_eq!(result.documents_ids.len(), 1);
|
assert_eq!(result.documents_ids.len(), 1);
|
||||||
@ -75,7 +75,7 @@ fn test_typo_tolerance_two_typo() {
|
|||||||
search.query("zealemd");
|
search.query("zealemd");
|
||||||
search.limit(10);
|
search.limit(10);
|
||||||
search.authorize_typos(true);
|
search.authorize_typos(true);
|
||||||
search.optional_words(true);
|
search.terms_matching_strategy(TermsMatchingStrategy::default());
|
||||||
|
|
||||||
let result = search.execute().unwrap();
|
let result = search.execute().unwrap();
|
||||||
assert_eq!(result.documents_ids.len(), 0);
|
assert_eq!(result.documents_ids.len(), 0);
|
||||||
@ -93,7 +93,7 @@ fn test_typo_tolerance_two_typo() {
|
|||||||
search.query("zealemd");
|
search.query("zealemd");
|
||||||
search.limit(10);
|
search.limit(10);
|
||||||
search.authorize_typos(true);
|
search.authorize_typos(true);
|
||||||
search.optional_words(true);
|
search.terms_matching_strategy(TermsMatchingStrategy::default());
|
||||||
|
|
||||||
let result = search.execute().unwrap();
|
let result = search.execute().unwrap();
|
||||||
assert_eq!(result.documents_ids.len(), 1);
|
assert_eq!(result.documents_ids.len(), 1);
|
||||||
@ -142,7 +142,7 @@ fn test_typo_disabled_on_word() {
|
|||||||
search.query("zealand");
|
search.query("zealand");
|
||||||
search.limit(10);
|
search.limit(10);
|
||||||
search.authorize_typos(true);
|
search.authorize_typos(true);
|
||||||
search.optional_words(true);
|
search.terms_matching_strategy(TermsMatchingStrategy::default());
|
||||||
|
|
||||||
let result = search.execute().unwrap();
|
let result = search.execute().unwrap();
|
||||||
assert_eq!(result.documents_ids.len(), 2);
|
assert_eq!(result.documents_ids.len(), 2);
|
||||||
@ -162,7 +162,7 @@ fn test_typo_disabled_on_word() {
|
|||||||
search.query("zealand");
|
search.query("zealand");
|
||||||
search.limit(10);
|
search.limit(10);
|
||||||
search.authorize_typos(true);
|
search.authorize_typos(true);
|
||||||
search.optional_words(true);
|
search.terms_matching_strategy(TermsMatchingStrategy::default());
|
||||||
|
|
||||||
let result = search.execute().unwrap();
|
let result = search.execute().unwrap();
|
||||||
assert_eq!(result.documents_ids.len(), 1);
|
assert_eq!(result.documents_ids.len(), 1);
|
||||||
@ -182,7 +182,7 @@ fn test_disable_typo_on_attribute() {
|
|||||||
search.query("antebelum");
|
search.query("antebelum");
|
||||||
search.limit(10);
|
search.limit(10);
|
||||||
search.authorize_typos(true);
|
search.authorize_typos(true);
|
||||||
search.optional_words(true);
|
search.terms_matching_strategy(TermsMatchingStrategy::default());
|
||||||
|
|
||||||
let result = search.execute().unwrap();
|
let result = search.execute().unwrap();
|
||||||
assert_eq!(result.documents_ids.len(), 1);
|
assert_eq!(result.documents_ids.len(), 1);
|
||||||
@ -200,7 +200,7 @@ fn test_disable_typo_on_attribute() {
|
|||||||
search.query("antebelum");
|
search.query("antebelum");
|
||||||
search.limit(10);
|
search.limit(10);
|
||||||
search.authorize_typos(true);
|
search.authorize_typos(true);
|
||||||
search.optional_words(true);
|
search.terms_matching_strategy(TermsMatchingStrategy::default());
|
||||||
|
|
||||||
let result = search.execute().unwrap();
|
let result = search.execute().unwrap();
|
||||||
assert_eq!(result.documents_ids.len(), 0);
|
assert_eq!(result.documents_ids.len(), 0);
|
||||||
|
Loading…
Reference in New Issue
Block a user