disable typos on exact words

This commit is contained in:
ad hoc 2022-03-21 16:25:15 +01:00
parent 9bbffb8fee
commit 774fa8f065
No known key found for this signature in database
GPG Key ID: 4F00A782990CC643

View File

@ -1,4 +1,4 @@
use std::{cmp, fmt, mem};
use std::{borrow::Cow, cmp, fmt, mem};
use fst::Set;
use meilisearch_tokenizer::token::SeparatorKind;
@ -157,6 +157,7 @@ trait Context {
}
/// Returns the minimum word len for 1 and 2 typos.
fn min_word_len_for_typo(&self) -> heed::Result<(u8, u8)>;
fn exact_words(&self) -> crate::Result<fst::Set<Cow<[u8]>>>;
}
/// The query tree builder is the interface to build a query tree.
@ -186,6 +187,10 @@ impl<'a> Context for QueryTreeBuilder<'a> {
let two = self.index.min_word_len_two_typos(&self.rtxn)?;
Ok((one, two))
}
fn exact_words(&self) -> crate::Result<fst::Set<Cow<[u8]>>> {
self.index.exact_words(self.rtxn)
}
}
impl<'a> QueryTreeBuilder<'a> {
@ -265,15 +270,16 @@ fn split_best_frequency(ctx: &impl Context, word: &str) -> heed::Result<Option<O
}
#[derive(Clone)]
pub struct TypoConfig {
pub struct TypoConfig<'a> {
pub max_typos: u8,
pub word_len_one_typo: u8,
pub word_len_two_typo: u8,
pub exact_words: fst::Set<Cow<'a, [u8]>>,
}
/// Return the `QueryKind` of a word depending on `authorize_typos`
/// and the provided word length.
fn typos(word: String, authorize_typos: bool, config: TypoConfig) -> QueryKind {
fn typos<'a>(word: String, authorize_typos: bool, config: TypoConfig<'a>) -> QueryKind {
if authorize_typos {
let count = word.chars().count().min(u8::MAX as usize) as u8;
if count < config.word_len_one_typo {
@ -333,7 +339,9 @@ fn create_query_tree(
children.push(child);
}
let (word_len_one_typo, word_len_two_typo) = ctx.min_word_len_for_typo()?;
let config = TypoConfig { max_typos: 2, word_len_one_typo, word_len_two_typo };
let exact_words = ctx.exact_words()?;
let config =
TypoConfig { max_typos: 2, word_len_one_typo, word_len_two_typo, exact_words };
children.push(Operation::Query(Query {
prefix,
kind: typos(word, authorize_typos, config),
@ -385,8 +393,13 @@ fn create_query_tree(
let concat = words.concat();
let (word_len_one_typo, word_len_two_typo) =
ctx.min_word_len_for_typo()?;
let config =
TypoConfig { max_typos: 1, word_len_one_typo, word_len_two_typo };
let exact_words = ctx.exact_words()?;
let config = TypoConfig {
max_typos: 1,
word_len_one_typo,
word_len_two_typo,
exact_words,
};
let query = Query {
prefix: is_prefix,
kind: typos(concat, authorize_typos, config),
@ -605,6 +618,12 @@ mod test {
fn min_word_len_for_typo(&self) -> heed::Result<(u8, u8)> {
Ok((DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS))
}
fn exact_words(&self) -> crate::Result<fst::Set<Cow<[u8]>>> {
let builder = fst::SetBuilder::new(Vec::new()).unwrap();
let data = builder.into_inner().unwrap();
Ok(fst::Set::new(Cow::Owned(data)).unwrap())
}
}
impl Default for TestContext {
@ -1225,7 +1244,9 @@ mod test {
#[test]
fn test_min_word_len_typo() {
let config = TypoConfig { max_typos: 2, word_len_one_typo: 5, word_len_two_typo: 7 };
let exact_words = fst::Set::from_iter([b""]).unwrap().map_data(Cow::Owned).unwrap();
let config =
TypoConfig { max_typos: 2, word_len_one_typo: 5, word_len_two_typo: 7, exact_words };
assert_eq!(
typos("hello".to_string(), true, config.clone()),