mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-30 08:44:27 +01:00
typos on first letter counts as 2
This commit is contained in:
parent
642c01d0dc
commit
55e6cb9c7b
@ -18,3 +18,6 @@ opt-level = 3
|
|||||||
opt-level = 3
|
opt-level = 3
|
||||||
[profile.test.build-override]
|
[profile.test.build-override]
|
||||||
opt-level = 3
|
opt-level = 3
|
||||||
|
|
||||||
|
[patch.crates-io]
|
||||||
|
fst = { path = "/Users/mpostma/Documents/code/rust/fst/" }
|
||||||
|
@ -7,7 +7,8 @@ use std::str::Utf8Error;
|
|||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
|
|
||||||
use distinct::{Distinct, DocIter, FacetDistinct, NoopDistinct};
|
use distinct::{Distinct, DocIter, FacetDistinct, NoopDistinct};
|
||||||
use fst::{IntoStreamer, Streamer};
|
use fst::automaton::Str;
|
||||||
|
use fst::{Automaton, IntoStreamer, Streamer};
|
||||||
use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA};
|
use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA};
|
||||||
use log::debug;
|
use log::debug;
|
||||||
use meilisearch_tokenizer::{Analyzer, AnalyzerConfig};
|
use meilisearch_tokenizer::{Analyzer, AnalyzerConfig};
|
||||||
@ -285,6 +286,18 @@ pub fn word_derivations<'c>(
|
|||||||
Entry::Vacant(entry) => {
|
Entry::Vacant(entry) => {
|
||||||
let mut derived_words = Vec::new();
|
let mut derived_words = Vec::new();
|
||||||
let dfa = build_dfa(word, max_typo, is_prefix);
|
let dfa = build_dfa(word, max_typo, is_prefix);
|
||||||
|
if max_typo == 1 {
|
||||||
|
let starts = Str::new(get_first(word));
|
||||||
|
let mut stream = fst.search_with_state(starts.intersection(&dfa)).into_stream();
|
||||||
|
|
||||||
|
while let Some((word, state)) = stream.next() {
|
||||||
|
let word = std::str::from_utf8(word)?;
|
||||||
|
let distance = dfa.distance(state.1);
|
||||||
|
derived_words.push((word.to_string(), distance.to_u8()));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(entry.insert(derived_words))
|
||||||
|
} else {
|
||||||
let mut stream = fst.search_with_state(&dfa).into_stream();
|
let mut stream = fst.search_with_state(&dfa).into_stream();
|
||||||
|
|
||||||
while let Some((word, state)) = stream.next() {
|
while let Some((word, state)) = stream.next() {
|
||||||
@ -296,6 +309,14 @@ pub fn word_derivations<'c>(
|
|||||||
Ok(entry.insert(derived_words))
|
Ok(entry.insert(derived_words))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_first(s: &str) -> &str {
|
||||||
|
match s.chars().next() {
|
||||||
|
Some(c) => &s[..c.len_utf8()],
|
||||||
|
None => s,
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn build_dfa(word: &str, typos: u8, is_prefix: bool) -> DFA {
|
pub fn build_dfa(word: &str, typos: u8, is_prefix: bool) -> DFA {
|
||||||
|
Loading…
Reference in New Issue
Block a user