mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-23 13:24:27 +01:00
feat: Order automatons by importance
This commit is contained in:
parent
ebc95cb8f2
commit
81d44a0854
@ -2,7 +2,7 @@ use std::hash::Hash;
|
|||||||
use std::ops::Range;
|
use std::ops::Range;
|
||||||
use std::rc::Rc;
|
use std::rc::Rc;
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
use std::{cmp, mem};
|
use std::{mem, cmp, cmp::Reverse};
|
||||||
|
|
||||||
use fst::{Streamer, IntoStreamer};
|
use fst::{Streamer, IntoStreamer};
|
||||||
use hashbrown::HashMap;
|
use hashbrown::HashMap;
|
||||||
@ -24,30 +24,38 @@ use crate::{TmpMatch, Highlight, DocumentId, Store, RawDocument, Document};
|
|||||||
const NGRAMS: usize = 3;
|
const NGRAMS: usize = 3;
|
||||||
|
|
||||||
struct Automaton {
|
struct Automaton {
|
||||||
|
index: usize,
|
||||||
|
ngram: usize,
|
||||||
query_len: usize,
|
query_len: usize,
|
||||||
is_exact: bool,
|
is_exact: bool,
|
||||||
dfa: DFA,
|
dfa: DFA,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Automaton {
|
impl Automaton {
|
||||||
fn exact(query: &str) -> Automaton {
|
fn exact(index: usize, ngram: usize, query: &str) -> Automaton {
|
||||||
Automaton {
|
Automaton {
|
||||||
|
index,
|
||||||
|
ngram,
|
||||||
query_len: query.len(),
|
query_len: query.len(),
|
||||||
is_exact: true,
|
is_exact: true,
|
||||||
dfa: build_dfa(query),
|
dfa: build_dfa(query),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn prefix_exact(query: &str) -> Automaton {
|
fn prefix_exact(index: usize, ngram: usize, query: &str) -> Automaton {
|
||||||
Automaton {
|
Automaton {
|
||||||
|
index,
|
||||||
|
ngram,
|
||||||
query_len: query.len(),
|
query_len: query.len(),
|
||||||
is_exact: true,
|
is_exact: true,
|
||||||
dfa: build_prefix_dfa(query),
|
dfa: build_prefix_dfa(query),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn non_exact(query: &str) -> Automaton {
|
fn non_exact(index: usize, ngram: usize, query: &str) -> Automaton {
|
||||||
Automaton {
|
Automaton {
|
||||||
|
index,
|
||||||
|
ngram,
|
||||||
query_len: query.len(),
|
query_len: query.len(),
|
||||||
is_exact: false,
|
is_exact: false,
|
||||||
dfa: build_dfa(query),
|
dfa: build_dfa(query),
|
||||||
@ -82,9 +90,9 @@ fn generate_automatons<S: Store>(query: &str, store: &S) -> Result<(Vec<Automato
|
|||||||
let not_prefix_dfa = has_following_word || has_end_whitespace || word.chars().all(is_cjk);
|
let not_prefix_dfa = has_following_word || has_end_whitespace || word.chars().all(is_cjk);
|
||||||
|
|
||||||
let automaton = if not_prefix_dfa {
|
let automaton = if not_prefix_dfa {
|
||||||
Automaton::exact(word)
|
Automaton::exact(automatons.len(), 1, word)
|
||||||
} else {
|
} else {
|
||||||
Automaton::prefix_exact(word)
|
Automaton::prefix_exact(automatons.len(), 1, word)
|
||||||
};
|
};
|
||||||
automatons.push(automaton);
|
automatons.push(automaton);
|
||||||
}
|
}
|
||||||
@ -127,9 +135,9 @@ fn generate_automatons<S: Store>(query: &str, store: &S) -> Result<(Vec<Automato
|
|||||||
|
|
||||||
for synonym in synonyms_words {
|
for synonym in synonyms_words {
|
||||||
let automaton = if nb_synonym_words == 1 {
|
let automaton = if nb_synonym_words == 1 {
|
||||||
Automaton::exact(synonym)
|
Automaton::exact(automatons.len(), n, synonym)
|
||||||
} else {
|
} else {
|
||||||
Automaton::non_exact(synonym)
|
Automaton::non_exact(automatons.len(), n, synonym)
|
||||||
};
|
};
|
||||||
automatons.push(automaton);
|
automatons.push(automaton);
|
||||||
}
|
}
|
||||||
@ -145,12 +153,17 @@ fn generate_automatons<S: Store>(query: &str, store: &S) -> Result<(Vec<Automato
|
|||||||
let real_query_index = automatons.len();
|
let real_query_index = automatons.len();
|
||||||
enhancer_builder.declare(query_range.clone(), real_query_index, &[&normalized]);
|
enhancer_builder.declare(query_range.clone(), real_query_index, &[&normalized]);
|
||||||
|
|
||||||
let automaton = Automaton::exact(&normalized);
|
let automaton = Automaton::exact(automatons.len(), n, &normalized);
|
||||||
automatons.push(automaton);
|
automatons.push(automaton);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// order automatons, the most important first,
|
||||||
|
// we keep the original automatons at the front.
|
||||||
|
let original_len = query_words.len();
|
||||||
|
automatons[original_len..].sort_unstable_by_key(|a| (Reverse(a.is_exact), Reverse(a.ngram)));
|
||||||
|
|
||||||
Ok((automatons, enhancer_builder.build()))
|
Ok((automatons, enhancer_builder.build()))
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -326,7 +339,7 @@ where S: Store,
|
|||||||
let start = Instant::now();
|
let start = Instant::now();
|
||||||
while let Some((input, indexed_values)) = stream.next() {
|
while let Some((input, indexed_values)) = stream.next() {
|
||||||
for iv in indexed_values {
|
for iv in indexed_values {
|
||||||
let Automaton { is_exact, query_len, ref dfa } = automatons[iv.index];
|
let Automaton { index, is_exact, query_len, ref dfa, .. } = automatons[iv.index];
|
||||||
let distance = dfa.eval(input).to_u8();
|
let distance = dfa.eval(input).to_u8();
|
||||||
let is_exact = is_exact && distance == 0 && input.len() == query_len;
|
let is_exact = is_exact && distance == 0 && input.len() == query_len;
|
||||||
|
|
||||||
@ -342,7 +355,7 @@ where S: Store,
|
|||||||
let attribute = searchables.map_or(Some(di.attribute), |r| r.get(di.attribute));
|
let attribute = searchables.map_or(Some(di.attribute), |r| r.get(di.attribute));
|
||||||
if let Some(attribute) = attribute {
|
if let Some(attribute) = attribute {
|
||||||
let match_ = TmpMatch {
|
let match_ = TmpMatch {
|
||||||
query_index: iv.index as u32,
|
query_index: index as u32,
|
||||||
distance,
|
distance,
|
||||||
attribute,
|
attribute,
|
||||||
word_index: di.word_index,
|
word_index: di.word_index,
|
||||||
|
Loading…
Reference in New Issue
Block a user