mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-23 13:24:27 +01:00
dynamic minimum word len for typos in query tree builder
This commit is contained in:
parent
5a24e60572
commit
a1a3a49bc9
@ -155,6 +155,8 @@ trait Context {
|
|||||||
None => Ok(None),
|
None => Ok(None),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
/// Returns the minimum word len for 1 and 2 typos.
|
||||||
|
fn min_word_len_for_typo(&self) -> heed::Result<(u8, u8)>;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The query tree builder is the interface to build a query tree.
|
/// The query tree builder is the interface to build a query tree.
|
||||||
@ -178,6 +180,12 @@ impl<'a> Context for QueryTreeBuilder<'a> {
|
|||||||
fn word_documents_count(&self, word: &str) -> heed::Result<Option<u64>> {
|
fn word_documents_count(&self, word: &str) -> heed::Result<Option<u64>> {
|
||||||
self.index.word_documents_count(self.rtxn, word)
|
self.index.word_documents_count(self.rtxn, word)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn min_word_len_for_typo(&self) -> heed::Result<(u8, u8)> {
|
||||||
|
let one = self.index.min_word_len_1_typo(&self.rtxn)?;
|
||||||
|
let two = self.index.min_word_len_2_typo(&self.rtxn)?;
|
||||||
|
Ok((one, two))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> QueryTreeBuilder<'a> {
|
impl<'a> QueryTreeBuilder<'a> {
|
||||||
@ -256,14 +264,23 @@ fn split_best_frequency(ctx: &impl Context, word: &str) -> heed::Result<Option<O
|
|||||||
Ok(best.map(|(_, left, right)| Operation::Phrase(vec![left.to_string(), right.to_string()])))
|
Ok(best.map(|(_, left, right)| Operation::Phrase(vec![left.to_string(), right.to_string()])))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub struct TypoConfig {
|
||||||
|
pub max_typos: u8,
|
||||||
|
pub word_len_1_typo: u8,
|
||||||
|
pub word_len_2_typo: u8,
|
||||||
|
}
|
||||||
|
|
||||||
/// Return the `QueryKind` of a word depending on `authorize_typos`
|
/// Return the `QueryKind` of a word depending on `authorize_typos`
|
||||||
/// and the provided word length.
|
/// and the provided word length.
|
||||||
fn typos(word: String, authorize_typos: bool, max_typos: u8) -> QueryKind {
|
fn typos(word: String, authorize_typos: bool, config: TypoConfig) -> QueryKind {
|
||||||
if authorize_typos {
|
if authorize_typos {
|
||||||
match word.chars().count() {
|
let count = word.chars().count().min(u8::MAX as usize) as u8;
|
||||||
0..=4 => QueryKind::exact(word),
|
if (0..config.word_len_1_typo).contains(&count) {
|
||||||
5..=8 => QueryKind::tolerant(1.min(max_typos), word),
|
QueryKind::exact(word)
|
||||||
_ => QueryKind::tolerant(2.min(max_typos), word),
|
} else if (config.word_len_1_typo..config.word_len_2_typo).contains(&count) {
|
||||||
|
QueryKind::tolerant(1.min(config.max_typos), word)
|
||||||
|
} else {
|
||||||
|
QueryKind::tolerant(2.min(config.max_typos), word)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
QueryKind::exact(word)
|
QueryKind::exact(word)
|
||||||
@ -314,9 +331,11 @@ fn create_query_tree(
|
|||||||
if let Some(child) = split_best_frequency(ctx, &word)? {
|
if let Some(child) = split_best_frequency(ctx, &word)? {
|
||||||
children.push(child);
|
children.push(child);
|
||||||
}
|
}
|
||||||
|
let (word_len_1_typo, word_len_2_typo) = ctx.min_word_len_for_typo()?;
|
||||||
|
let config = TypoConfig { max_typos: 2, word_len_1_typo, word_len_2_typo };
|
||||||
children.push(Operation::Query(Query {
|
children.push(Operation::Query(Query {
|
||||||
prefix,
|
prefix,
|
||||||
kind: typos(word, authorize_typos, 2),
|
kind: typos(word, authorize_typos, config),
|
||||||
}));
|
}));
|
||||||
Ok(Operation::or(false, children))
|
Ok(Operation::or(false, children))
|
||||||
}
|
}
|
||||||
@ -363,9 +382,12 @@ fn create_query_tree(
|
|||||||
.collect();
|
.collect();
|
||||||
let mut operations = synonyms(ctx, &words)?.unwrap_or_default();
|
let mut operations = synonyms(ctx, &words)?.unwrap_or_default();
|
||||||
let concat = words.concat();
|
let concat = words.concat();
|
||||||
|
let (word_len_1_typo, word_len_2_typo) = ctx.min_word_len_for_typo()?;
|
||||||
|
let config =
|
||||||
|
TypoConfig { max_typos: 1, word_len_1_typo, word_len_2_typo };
|
||||||
let query = Query {
|
let query = Query {
|
||||||
prefix: is_prefix,
|
prefix: is_prefix,
|
||||||
kind: typos(concat, authorize_typos, 1),
|
kind: typos(concat, authorize_typos, config),
|
||||||
};
|
};
|
||||||
operations.push(Operation::Query(query));
|
operations.push(Operation::Query(query));
|
||||||
and_op_children.push(Operation::or(false, operations));
|
and_op_children.push(Operation::or(false, operations));
|
||||||
@ -576,6 +598,10 @@ mod test {
|
|||||||
let words: Vec<_> = words.iter().map(|s| s.as_ref().to_owned()).collect();
|
let words: Vec<_> = words.iter().map(|s| s.as_ref().to_owned()).collect();
|
||||||
Ok(self.synonyms.get(&words).cloned())
|
Ok(self.synonyms.get(&words).cloned())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn min_word_len_for_typo(&self) -> heed::Result<(u8, u8)> {
|
||||||
|
Ok((5, 9))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for TestContext {
|
impl Default for TestContext {
|
||||||
|
Loading…
Reference in New Issue
Block a user