mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-11 05:54:30 +01:00
disable typos on exact words
This commit is contained in:
parent
9bbffb8fee
commit
774fa8f065
@ -1,4 +1,4 @@
|
||||
use std::{cmp, fmt, mem};
|
||||
use std::{borrow::Cow, cmp, fmt, mem};
|
||||
|
||||
use fst::Set;
|
||||
use meilisearch_tokenizer::token::SeparatorKind;
|
||||
@ -157,6 +157,7 @@ trait Context {
|
||||
}
|
||||
/// Returns the minimum word len for 1 and 2 typos.
|
||||
fn min_word_len_for_typo(&self) -> heed::Result<(u8, u8)>;
|
||||
fn exact_words(&self) -> crate::Result<fst::Set<Cow<[u8]>>>;
|
||||
}
|
||||
|
||||
/// The query tree builder is the interface to build a query tree.
|
||||
@ -186,6 +187,10 @@ impl<'a> Context for QueryTreeBuilder<'a> {
|
||||
let two = self.index.min_word_len_two_typos(&self.rtxn)?;
|
||||
Ok((one, two))
|
||||
}
|
||||
|
||||
fn exact_words(&self) -> crate::Result<fst::Set<Cow<[u8]>>> {
|
||||
self.index.exact_words(self.rtxn)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> QueryTreeBuilder<'a> {
|
||||
@ -265,15 +270,16 @@ fn split_best_frequency(ctx: &impl Context, word: &str) -> heed::Result<Option<O
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct TypoConfig {
|
||||
pub struct TypoConfig<'a> {
|
||||
pub max_typos: u8,
|
||||
pub word_len_one_typo: u8,
|
||||
pub word_len_two_typo: u8,
|
||||
pub exact_words: fst::Set<Cow<'a, [u8]>>,
|
||||
}
|
||||
|
||||
/// Return the `QueryKind` of a word depending on `authorize_typos`
|
||||
/// and the provided word length.
|
||||
fn typos(word: String, authorize_typos: bool, config: TypoConfig) -> QueryKind {
|
||||
fn typos<'a>(word: String, authorize_typos: bool, config: TypoConfig<'a>) -> QueryKind {
|
||||
if authorize_typos {
|
||||
let count = word.chars().count().min(u8::MAX as usize) as u8;
|
||||
if count < config.word_len_one_typo {
|
||||
@ -333,7 +339,9 @@ fn create_query_tree(
|
||||
children.push(child);
|
||||
}
|
||||
let (word_len_one_typo, word_len_two_typo) = ctx.min_word_len_for_typo()?;
|
||||
let config = TypoConfig { max_typos: 2, word_len_one_typo, word_len_two_typo };
|
||||
let exact_words = ctx.exact_words()?;
|
||||
let config =
|
||||
TypoConfig { max_typos: 2, word_len_one_typo, word_len_two_typo, exact_words };
|
||||
children.push(Operation::Query(Query {
|
||||
prefix,
|
||||
kind: typos(word, authorize_typos, config),
|
||||
@ -385,8 +393,13 @@ fn create_query_tree(
|
||||
let concat = words.concat();
|
||||
let (word_len_one_typo, word_len_two_typo) =
|
||||
ctx.min_word_len_for_typo()?;
|
||||
let config =
|
||||
TypoConfig { max_typos: 1, word_len_one_typo, word_len_two_typo };
|
||||
let exact_words = ctx.exact_words()?;
|
||||
let config = TypoConfig {
|
||||
max_typos: 1,
|
||||
word_len_one_typo,
|
||||
word_len_two_typo,
|
||||
exact_words,
|
||||
};
|
||||
let query = Query {
|
||||
prefix: is_prefix,
|
||||
kind: typos(concat, authorize_typos, config),
|
||||
@ -605,6 +618,12 @@ mod test {
|
||||
fn min_word_len_for_typo(&self) -> heed::Result<(u8, u8)> {
|
||||
Ok((DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS))
|
||||
}
|
||||
|
||||
fn exact_words(&self) -> crate::Result<fst::Set<Cow<[u8]>>> {
|
||||
let builder = fst::SetBuilder::new(Vec::new()).unwrap();
|
||||
let data = builder.into_inner().unwrap();
|
||||
Ok(fst::Set::new(Cow::Owned(data)).unwrap())
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for TestContext {
|
||||
@ -1225,7 +1244,9 @@ mod test {
|
||||
|
||||
#[test]
|
||||
fn test_min_word_len_typo() {
|
||||
let config = TypoConfig { max_typos: 2, word_len_one_typo: 5, word_len_two_typo: 7 };
|
||||
let exact_words = fst::Set::from_iter([b""]).unwrap().map_data(Cow::Owned).unwrap();
|
||||
let config =
|
||||
TypoConfig { max_typos: 2, word_len_one_typo: 5, word_len_two_typo: 7, exact_words };
|
||||
|
||||
assert_eq!(
|
||||
typos("hello".to_string(), true, config.clone()),
|
||||
|
Loading…
x
Reference in New Issue
Block a user