mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-29 16:24:26 +01:00
Decide to use prefix DB if the word is not an ngram
This commit is contained in:
parent
7a01f20df7
commit
38b7b31beb
@ -177,6 +177,7 @@ pub fn partially_initialized_term_from_word(
|
|||||||
word: &str,
|
word: &str,
|
||||||
max_typo: u8,
|
max_typo: u8,
|
||||||
is_prefix: bool,
|
is_prefix: bool,
|
||||||
|
is_ngram: bool,
|
||||||
) -> Result<QueryTerm> {
|
) -> Result<QueryTerm> {
|
||||||
let word_interned = ctx.word_interner.insert(word.to_owned());
|
let word_interned = ctx.word_interner.insert(word.to_owned());
|
||||||
|
|
||||||
@ -197,12 +198,19 @@ pub fn partially_initialized_term_from_word(
|
|||||||
let fst = ctx.index.words_fst(ctx.txn)?;
|
let fst = ctx.index.words_fst(ctx.txn)?;
|
||||||
|
|
||||||
let use_prefix_db = is_prefix
|
let use_prefix_db = is_prefix
|
||||||
&& ctx
|
&& (ctx
|
||||||
.index
|
.index
|
||||||
.word_prefix_docids
|
.word_prefix_docids
|
||||||
.remap_data_type::<DecodeIgnore>()
|
.remap_data_type::<DecodeIgnore>()
|
||||||
.get(ctx.txn, word)?
|
.get(ctx.txn, word)?
|
||||||
.is_some();
|
.is_some()
|
||||||
|
|| (!is_ngram
|
||||||
|
&& ctx
|
||||||
|
.index
|
||||||
|
.exact_word_prefix_docids
|
||||||
|
.remap_data_type::<DecodeIgnore>()
|
||||||
|
.get(ctx.txn, word)?
|
||||||
|
.is_some()));
|
||||||
let use_prefix_db = if use_prefix_db { Some(word_interned) } else { None };
|
let use_prefix_db = if use_prefix_db { Some(word_interned) } else { None };
|
||||||
|
|
||||||
let mut zero_typo = None;
|
let mut zero_typo = None;
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
use charabia::{normalizer::NormalizedTokenIter, SeparatorKind, TokenKind};
|
use charabia::normalizer::NormalizedTokenIter;
|
||||||
|
use charabia::{SeparatorKind, TokenKind};
|
||||||
use crate::{Result, SearchContext, MAX_WORD_LENGTH};
|
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
use crate::{Result, SearchContext, MAX_WORD_LENGTH};
|
||||||
|
|
||||||
/// Convert the tokenised search query into a list of located query terms.
|
/// Convert the tokenised search query into a list of located query terms.
|
||||||
// TODO: checking if the positions are correct for phrases, separators, ngrams
|
// TODO: checking if the positions are correct for phrases, separators, ngrams
|
||||||
@ -51,6 +51,7 @@ pub fn located_query_terms_from_string(
|
|||||||
word,
|
word,
|
||||||
nbr_typos(word),
|
nbr_typos(word),
|
||||||
false,
|
false,
|
||||||
|
false,
|
||||||
)?;
|
)?;
|
||||||
let located_term = LocatedQueryTerm {
|
let located_term = LocatedQueryTerm {
|
||||||
value: ctx.term_interner.push(term),
|
value: ctx.term_interner.push(term),
|
||||||
@ -62,8 +63,13 @@ pub fn located_query_terms_from_string(
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
let word = token.lemma();
|
let word = token.lemma();
|
||||||
let term =
|
let term = partially_initialized_term_from_word(
|
||||||
partially_initialized_term_from_word(ctx, word, nbr_typos(word), true)?;
|
ctx,
|
||||||
|
word,
|
||||||
|
nbr_typos(word),
|
||||||
|
true,
|
||||||
|
false,
|
||||||
|
)?;
|
||||||
let located_term = LocatedQueryTerm {
|
let located_term = LocatedQueryTerm {
|
||||||
value: ctx.term_interner.push(term),
|
value: ctx.term_interner.push(term),
|
||||||
positions: position..=position,
|
positions: position..=position,
|
||||||
@ -195,7 +201,8 @@ pub fn make_ngram(
|
|||||||
let max_nbr_typos =
|
let max_nbr_typos =
|
||||||
number_of_typos_allowed(ngram_str.as_str()).saturating_sub(terms.len() as u8 - 1);
|
number_of_typos_allowed(ngram_str.as_str()).saturating_sub(terms.len() as u8 - 1);
|
||||||
|
|
||||||
let mut term = partially_initialized_term_from_word(ctx, &ngram_str, max_nbr_typos, is_prefix)?;
|
let mut term =
|
||||||
|
partially_initialized_term_from_word(ctx, &ngram_str, max_nbr_typos, is_prefix, true)?;
|
||||||
|
|
||||||
// Now add the synonyms
|
// Now add the synonyms
|
||||||
let index_synonyms = ctx.index.synonyms(ctx.txn)?;
|
let index_synonyms = ctx.index.synonyms(ctx.txn)?;
|
||||||
|
Loading…
Reference in New Issue
Block a user