Return an internal error in the case of matching word is invalid

This commit is contained in:
ManyTheFish 2023-03-01 18:52:14 +01:00
parent 900bae3d9d
commit 37489fd495
5 changed files with 18 additions and 8 deletions

View File

@ -29,7 +29,7 @@ fn bench_formatting(c: &mut criterion::Criterion) {
(vec![Rc::new(MatchingWord::new("thedoord".to_string(), 1, true).unwrap())], vec![0, 1, 2]), (vec![Rc::new(MatchingWord::new("thedoord".to_string(), 1, true).unwrap())], vec![0, 1, 2]),
(vec![Rc::new(MatchingWord::new("doord".to_string(), 1, true).unwrap())], vec![1, 2]), (vec![Rc::new(MatchingWord::new("doord".to_string(), 1, true).unwrap())], vec![1, 2]),
] ]
), TokenizerBuilder::default().build()), ).unwrap(), TokenizerBuilder::default().build()),
}, },
]; ];

View File

@ -59,6 +59,8 @@ pub enum InternalError {
Utf8(#[from] str::Utf8Error), Utf8(#[from] str::Utf8Error),
#[error("An indexation process was explicitly aborted.")] #[error("An indexation process was explicitly aborted.")]
AbortedIndexation, AbortedIndexation,
#[error("The matching words list contains at least one invalid member.")]
InvalidMatchingWords,
} }
#[derive(Error, Debug)] #[derive(Error, Debug)]

View File

@ -7,6 +7,7 @@ use std::rc::Rc;
use charabia::Token; use charabia::Token;
use levenshtein_automata::{Distance, DFA}; use levenshtein_automata::{Distance, DFA};
use crate::error::InternalError;
use crate::search::build_dfa; use crate::search::build_dfa;
use crate::MAX_WORD_LENGTH; use crate::MAX_WORD_LENGTH;
@ -31,12 +32,19 @@ impl fmt::Debug for MatchingWords {
} }
impl MatchingWords { impl MatchingWords {
pub fn new(mut matching_words: Vec<(Vec<Rc<MatchingWord>>, Vec<PrimitiveWordId>)>) -> Self { pub fn new(
mut matching_words: Vec<(Vec<Rc<MatchingWord>>, Vec<PrimitiveWordId>)>,
) -> crate::Result<Self> {
// if one of the matching_words vec doesn't contain a word.
if matching_words.iter().any(|(mw, _)| mw.is_empty()) {
return Err(InternalError::InvalidMatchingWords.into());
}
// Sort word by len in DESC order prioritizing the longuest matches, // Sort word by len in DESC order prioritizing the longuest matches,
// in order to highlight the longuest part of the matched word. // in order to highlight the longuest part of the matched word.
matching_words.sort_unstable_by_key(|(mw, _)| Reverse((mw.len(), mw[0].word.len()))); matching_words.sort_unstable_by_key(|(mw, _)| Reverse((mw.len(), mw[0].word.len())));
Self { inner: matching_words } Ok(Self { inner: matching_words })
} }
/// Returns an iterator over terms that match or partially match the given token. /// Returns an iterator over terms that match or partially match the given token.
@ -360,7 +368,7 @@ mod tests {
(vec![all[2].clone()], vec![2]), (vec![all[2].clone()], vec![2]),
]; ];
let matching_words = MatchingWords::new(matching_words); let matching_words = MatchingWords::new(matching_words).unwrap();
assert_eq!( assert_eq!(
matching_words matching_words

View File

@ -513,7 +513,7 @@ mod tests {
(vec![all[2].clone()], vec![2]), (vec![all[2].clone()], vec![2]),
]; ];
MatchingWords::new(matching_words) MatchingWords::new(matching_words).unwrap()
} }
impl MatcherBuilder<'_, Vec<u8>> { impl MatcherBuilder<'_, Vec<u8>> {
@ -600,7 +600,7 @@ mod tests {
]; ];
let matching_words = vec![(vec![all[0].clone()], vec![0]), (vec![all[1].clone()], vec![1])]; let matching_words = vec![(vec![all[0].clone()], vec![0]), (vec![all[1].clone()], vec![1])];
let matching_words = MatchingWords::new(matching_words); let matching_words = MatchingWords::new(matching_words).unwrap();
let builder = MatcherBuilder::from_matching_words(matching_words); let builder = MatcherBuilder::from_matching_words(matching_words);
@ -847,7 +847,7 @@ mod tests {
(vec![all[4].clone()], vec![2]), (vec![all[4].clone()], vec![2]),
]; ];
let matching_words = MatchingWords::new(matching_words); let matching_words = MatchingWords::new(matching_words).unwrap();
let mut builder = MatcherBuilder::from_matching_words(matching_words); let mut builder = MatcherBuilder::from_matching_words(matching_words);
builder.highlight_prefix("_".to_string()); builder.highlight_prefix("_".to_string());

View File

@ -747,7 +747,7 @@ fn create_matching_words(
let mut matching_word_cache = MatchingWordCache::default(); let mut matching_word_cache = MatchingWordCache::default();
let mut matching_words = Vec::new(); let mut matching_words = Vec::new();
ngrams(ctx, authorize_typos, query, &mut matching_words, &mut matching_word_cache, 0)?; ngrams(ctx, authorize_typos, query, &mut matching_words, &mut matching_word_cache, 0)?;
Ok(MatchingWords::new(matching_words)) MatchingWords::new(matching_words)
} }
pub type PrimitiveQuery = Vec<PrimitiveQueryPart>; pub type PrimitiveQuery = Vec<PrimitiveQueryPart>;