replace optional_words by term_matching_strategy

This commit is contained in:
ManyTheFish 2022-08-22 17:37:36 +02:00
parent f9029727e0
commit 5391e3842c
9 changed files with 46 additions and 39 deletions

View File

@ -119,7 +119,7 @@ pub fn run_benches(c: &mut criterion::Criterion, confs: &[Conf]) {
b.iter(|| {
let rtxn = index.read_txn().unwrap();
let mut search = index.search(&rtxn);
search.query(query).optional_words(TermsMatchingStrategy::default());
search.query(query).terms_matching_strategy(TermsMatchingStrategy::default());
if let Some(filter) = conf.filter {
let filter = Filter::from_str(filter).unwrap().unwrap();
search.filter(filter);

View File

@ -44,7 +44,7 @@ pub struct Search<'a> {
offset: usize,
limit: usize,
sort_criteria: Option<Vec<AscDesc>>,
optional_words: TermsMatchingStrategy,
terms_matching_strategy: TermsMatchingStrategy,
authorize_typos: bool,
words_limit: usize,
rtxn: &'a heed::RoTxn<'a>,
@ -59,7 +59,7 @@ impl<'a> Search<'a> {
offset: 0,
limit: 20,
sort_criteria: None,
optional_words: TermsMatchingStrategy::default(),
terms_matching_strategy: TermsMatchingStrategy::default(),
authorize_typos: true,
words_limit: 10,
rtxn,
@ -87,8 +87,8 @@ impl<'a> Search<'a> {
self
}
pub fn optional_words(&mut self, value: TermsMatchingStrategy) -> &mut Search<'a> {
self.optional_words = value;
pub fn terms_matching_strategy(&mut self, value: TermsMatchingStrategy) -> &mut Search<'a> {
self.terms_matching_strategy = value;
self
}
@ -119,7 +119,7 @@ impl<'a> Search<'a> {
let (query_tree, primitive_query, matching_words) = match self.query.as_ref() {
Some(query) => {
let mut builder = QueryTreeBuilder::new(self.rtxn, self.index)?;
builder.optional_words(self.optional_words);
builder.terms_matching_strategy(self.terms_matching_strategy);
builder.authorize_typos(self.is_typo_authorized()?);
@ -259,7 +259,7 @@ impl fmt::Debug for Search<'_> {
offset,
limit,
sort_criteria,
optional_words,
terms_matching_strategy,
authorize_typos,
words_limit,
rtxn: _,
@ -271,7 +271,7 @@ impl fmt::Debug for Search<'_> {
.field("offset", offset)
.field("limit", limit)
.field("sort_criteria", sort_criteria)
.field("optional_words", optional_words)
.field("terms_matching_strategy", terms_matching_strategy)
.field("authorize_typos", authorize_typos)
.field("words_limit", words_limit)
.finish()

View File

@ -162,7 +162,7 @@ trait Context {
pub struct QueryTreeBuilder<'a> {
rtxn: &'a heed::RoTxn<'a>,
index: &'a Index,
optional_words: TermsMatchingStrategy,
terms_matching_strategy: TermsMatchingStrategy,
authorize_typos: bool,
words_limit: Option<usize>,
exact_words: Option<fst::Set<Cow<'a, [u8]>>>,
@ -199,19 +199,22 @@ impl<'a> QueryTreeBuilder<'a> {
Ok(Self {
rtxn,
index,
optional_words: TermsMatchingStrategy::default(),
terms_matching_strategy: TermsMatchingStrategy::default(),
authorize_typos: true,
words_limit: None,
exact_words: index.exact_words(rtxn)?,
})
}
/// if `optional_words` is set to `false` the query tree will be
/// if `terms_matching_strategy` is set to `All` the query tree will be
/// generated forcing all query words to be present in each matching documents
/// (the criterion `words` will be ignored).
/// default value if not called: `true`
pub fn optional_words(&mut self, optional_words: TermsMatchingStrategy) -> &mut Self {
self.optional_words = optional_words;
/// default value if not called: `Last`
pub fn terms_matching_strategy(
&mut self,
terms_matching_strategy: TermsMatchingStrategy,
) -> &mut Self {
self.terms_matching_strategy = terms_matching_strategy;
self
}
@ -232,7 +235,7 @@ impl<'a> QueryTreeBuilder<'a> {
}
/// Build the query tree:
/// - if `optional_words` is set to `false` the query tree will be
/// - if `terms_matching_strategy` is set to `All` the query tree will be
/// generated forcing all query words to be present in each matching documents
/// (the criterion `words` will be ignored)
/// - if `authorize_typos` is set to `false` the query tree will be generated
@ -247,7 +250,7 @@ impl<'a> QueryTreeBuilder<'a> {
if !primitive_query.is_empty() {
let qt = create_query_tree(
self,
self.optional_words,
self.terms_matching_strategy,
self.authorize_typos,
&primitive_query,
)?;
@ -332,7 +335,7 @@ fn synonyms(ctx: &impl Context, word: &[&str]) -> heed::Result<Option<Vec<Operat
/// Main function that creates the final query tree from the primitive query.
fn create_query_tree(
ctx: &impl Context,
optional_words: TermsMatchingStrategy,
terms_matching_strategy: TermsMatchingStrategy,
authorize_typos: bool,
query: &[PrimitiveQueryPart],
) -> Result<Operation> {
@ -455,7 +458,7 @@ fn create_query_tree(
let mut operation_children = Vec::new();
let mut query = query.to_vec();
for _ in 0..remove_count {
let pos = match optional_words {
let pos = match terms_matching_strategy {
TermsMatchingStrategy::All => return ngrams(ctx, authorize_typos, &query, false),
TermsMatchingStrategy::Any => {
let operation = Operation::Or(
@ -796,15 +799,19 @@ mod test {
impl TestContext {
fn build<A: AsRef<[u8]>>(
&self,
optional_words: TermsMatchingStrategy,
terms_matching_strategy: TermsMatchingStrategy,
authorize_typos: bool,
words_limit: Option<usize>,
query: ClassifiedTokenIter<A>,
) -> Result<Option<(Operation, PrimitiveQuery)>> {
let primitive_query = create_primitive_query(query, None, words_limit);
if !primitive_query.is_empty() {
let qt =
create_query_tree(self, optional_words, authorize_typos, &primitive_query)?;
let qt = create_query_tree(
self,
terms_matching_strategy,
authorize_typos,
&primitive_query,
)?;
Ok(Some((qt, primitive_query)))
} else {
Ok(None)

View File

@ -1208,7 +1208,7 @@ mod tests {
let mut search = crate::Search::new(&rtxn, &index);
search.query("document");
search.authorize_typos(true);
search.optional_words(TermsMatchingStrategy::default());
search.terms_matching_strategy(TermsMatchingStrategy::default());
// all documents should be returned
let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
assert_eq!(documents_ids.len(), 4);
@ -1314,7 +1314,7 @@ mod tests {
let mut search = crate::Search::new(&rtxn, &index);
search.query("document");
search.authorize_typos(true);
search.optional_words(TermsMatchingStrategy::default());
search.terms_matching_strategy(TermsMatchingStrategy::default());
// all documents should be returned
let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
assert_eq!(documents_ids.len(), 4);
@ -1513,7 +1513,7 @@ mod tests {
let mut search = crate::Search::new(&rtxn, &index);
search.query("化妆包");
search.authorize_typos(true);
search.optional_words(TermsMatchingStrategy::default());
search.terms_matching_strategy(TermsMatchingStrategy::default());
// only 1 document should be returned
let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();

View File

@ -28,7 +28,7 @@ macro_rules! test_distinct {
search.query(search::TEST_QUERY);
search.limit(EXTERNAL_DOCUMENTS_IDS.len());
search.authorize_typos(true);
search.optional_words(TermsMatchingStrategy::default());
search.terms_matching_strategy(TermsMatchingStrategy::default());
let SearchResult { documents_ids, candidates, .. } = search.execute().unwrap();

View File

@ -19,7 +19,7 @@ macro_rules! test_filter {
search.query(search::TEST_QUERY);
search.limit(EXTERNAL_DOCUMENTS_IDS.len());
search.authorize_typos(true);
search.optional_words(TermsMatchingStrategy::default());
search.terms_matching_strategy(TermsMatchingStrategy::default());
search.filter(filter_conditions);
let SearchResult { documents_ids, .. } = search.execute().unwrap();

View File

@ -31,7 +31,7 @@ macro_rules! test_criterion {
search.query(search::TEST_QUERY);
search.limit(EXTERNAL_DOCUMENTS_IDS.len());
search.authorize_typos($authorize_typos);
search.optional_words($optional_word);
search.terms_matching_strategy($optional_word);
search.sort_criteria($sort_criteria);
let SearchResult { documents_ids, .. } = search.execute().unwrap();
@ -353,7 +353,7 @@ fn criteria_mixup() {
let mut search = Search::new(&mut rtxn, &index);
search.query(search::TEST_QUERY);
search.limit(EXTERNAL_DOCUMENTS_IDS.len());
search.optional_words(ALLOW_OPTIONAL_WORDS);
search.terms_matching_strategy(ALLOW_OPTIONAL_WORDS);
search.authorize_typos(ALLOW_TYPOS);
let SearchResult { documents_ids, .. } = search.execute().unwrap();

View File

@ -15,7 +15,7 @@ fn sort_ranking_rule_missing() {
search.query(search::TEST_QUERY);
search.limit(EXTERNAL_DOCUMENTS_IDS.len());
search.authorize_typos(true);
search.optional_words(TermsMatchingStrategy::default());
search.terms_matching_strategy(TermsMatchingStrategy::default());
search.sort_criteria(vec![AscDesc::Asc(Member::Field(S("tag")))]);
let result = search.execute();

View File

@ -20,7 +20,7 @@ fn test_typo_tolerance_one_typo() {
search.query("zeal");
search.limit(10);
search.authorize_typos(true);
search.optional_words(TermsMatchingStrategy::default());
search.terms_matching_strategy(TermsMatchingStrategy::default());
let result = search.execute().unwrap();
assert_eq!(result.documents_ids.len(), 1);
@ -29,7 +29,7 @@ fn test_typo_tolerance_one_typo() {
search.query("zean");
search.limit(10);
search.authorize_typos(true);
search.optional_words(TermsMatchingStrategy::default());
search.terms_matching_strategy(TermsMatchingStrategy::default());
let result = search.execute().unwrap();
assert_eq!(result.documents_ids.len(), 0);
@ -47,7 +47,7 @@ fn test_typo_tolerance_one_typo() {
search.query("zean");
search.limit(10);
search.authorize_typos(true);
search.optional_words(TermsMatchingStrategy::default());
search.terms_matching_strategy(TermsMatchingStrategy::default());
let result = search.execute().unwrap();
assert_eq!(result.documents_ids.len(), 1);
@ -66,7 +66,7 @@ fn test_typo_tolerance_two_typo() {
search.query("zealand");
search.limit(10);
search.authorize_typos(true);
search.optional_words(TermsMatchingStrategy::default());
search.terms_matching_strategy(TermsMatchingStrategy::default());
let result = search.execute().unwrap();
assert_eq!(result.documents_ids.len(), 1);
@ -75,7 +75,7 @@ fn test_typo_tolerance_two_typo() {
search.query("zealemd");
search.limit(10);
search.authorize_typos(true);
search.optional_words(TermsMatchingStrategy::default());
search.terms_matching_strategy(TermsMatchingStrategy::default());
let result = search.execute().unwrap();
assert_eq!(result.documents_ids.len(), 0);
@ -93,7 +93,7 @@ fn test_typo_tolerance_two_typo() {
search.query("zealemd");
search.limit(10);
search.authorize_typos(true);
search.optional_words(TermsMatchingStrategy::default());
search.terms_matching_strategy(TermsMatchingStrategy::default());
let result = search.execute().unwrap();
assert_eq!(result.documents_ids.len(), 1);
@ -142,7 +142,7 @@ fn test_typo_disabled_on_word() {
search.query("zealand");
search.limit(10);
search.authorize_typos(true);
search.optional_words(TermsMatchingStrategy::default());
search.terms_matching_strategy(TermsMatchingStrategy::default());
let result = search.execute().unwrap();
assert_eq!(result.documents_ids.len(), 2);
@ -162,7 +162,7 @@ fn test_typo_disabled_on_word() {
search.query("zealand");
search.limit(10);
search.authorize_typos(true);
search.optional_words(TermsMatchingStrategy::default());
search.terms_matching_strategy(TermsMatchingStrategy::default());
let result = search.execute().unwrap();
assert_eq!(result.documents_ids.len(), 1);
@ -182,7 +182,7 @@ fn test_disable_typo_on_attribute() {
search.query("antebelum");
search.limit(10);
search.authorize_typos(true);
search.optional_words(TermsMatchingStrategy::default());
search.terms_matching_strategy(TermsMatchingStrategy::default());
let result = search.execute().unwrap();
assert_eq!(result.documents_ids.len(), 1);
@ -200,7 +200,7 @@ fn test_disable_typo_on_attribute() {
search.query("antebelum");
search.limit(10);
search.authorize_typos(true);
search.optional_words(TermsMatchingStrategy::default());
search.terms_matching_strategy(TermsMatchingStrategy::default());
let result = search.execute().unwrap();
assert_eq!(result.documents_ids.len(), 0);