add test for exact words

This commit is contained in:
ad hoc 2022-03-22 09:55:49 +01:00
parent 774fa8f065
commit 8b1e5d9c6d
No known key found for this signature in database
GPG Key ID: 4F00A782990CC643
2 changed files with 26 additions and 8 deletions

View File

@ -584,6 +584,8 @@ mod test {
struct TestContext { struct TestContext {
synonyms: HashMap<Vec<String>, Vec<Vec<String>>>, synonyms: HashMap<Vec<String>, Vec<Vec<String>>>,
postings: HashMap<String, RoaringBitmap>, postings: HashMap<String, RoaringBitmap>,
// Raw bytes for the exact word fst Set
exact_words: Vec<u8>,
} }
impl TestContext { impl TestContext {
@ -620,9 +622,7 @@ mod test {
} }
fn exact_words(&self) -> crate::Result<fst::Set<Cow<[u8]>>> { fn exact_words(&self) -> crate::Result<fst::Set<Cow<[u8]>>> {
let builder = fst::SetBuilder::new(Vec::new()).unwrap(); Ok(fst::Set::new(Cow::Borrowed(self.exact_words.as_slice())).unwrap())
let data = builder.into_inner().unwrap();
Ok(fst::Set::new(Cow::Owned(data)).unwrap())
} }
} }
@ -640,6 +640,8 @@ mod test {
RoaringBitmap::from_sorted_iter(values.into_iter()).unwrap() RoaringBitmap::from_sorted_iter(values.into_iter()).unwrap()
} }
let exact_words = fst::SetBuilder::new(Vec::new()).unwrap().into_inner().unwrap();
TestContext { TestContext {
synonyms: hashmap! { synonyms: hashmap! {
vec![String::from("hello")] => vec![ vec![String::from("hello")] => vec![
@ -679,6 +681,7 @@ mod test {
String::from("good") => random_postings(rng, 1250), String::from("good") => random_postings(rng, 1250),
String::from("morning") => random_postings(rng, 125), String::from("morning") => random_postings(rng, 125),
}, },
exact_words,
} }
} }
} }
@ -1263,4 +1266,20 @@ mod test {
QueryKind::Tolerant { typo: 2, word: "verylongword".to_string() } QueryKind::Tolerant { typo: 2, word: "verylongword".to_string() }
); );
} }
#[test]
fn disable_typo_on_word() {
let query = "goodbye";
let analyzer = Analyzer::new(AnalyzerConfig::<Vec<u8>>::default());
let result = analyzer.analyze(query);
let tokens = result.tokens();
let exact_words = fst::Set::from_iter(Some("goodbye")).unwrap().into_fst().into_inner();
let context = TestContext { exact_words, ..Default::default() };
let (query_tree, _) = context.build(false, true, Some(2), tokens).unwrap().unwrap();
assert!(matches!(
query_tree,
Operation::Query(Query { prefix: true, kind: QueryKind::Exact { .. } })
));
}
} }

View File

@ -92,7 +92,7 @@ pub struct Settings<'a, 't, 'u, 'i> {
authorize_typos: Setting<bool>, authorize_typos: Setting<bool>,
min_word_len_two_typos: Setting<u8>, min_word_len_two_typos: Setting<u8>,
min_word_len_one_typo: Setting<u8>, min_word_len_one_typo: Setting<u8>,
exact_words: Setting<Vec<String>>, exact_words: Setting<BTreeSet<String>>,
} }
impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
@ -115,9 +115,9 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
primary_key: Setting::NotSet, primary_key: Setting::NotSet,
authorize_typos: Setting::NotSet, authorize_typos: Setting::NotSet,
exact_words: Setting::NotSet, exact_words: Setting::NotSet,
indexer_config,
min_word_len_two_typos: Setting::Reset, min_word_len_two_typos: Setting::Reset,
min_word_len_one_typo: Setting::Reset, min_word_len_one_typo: Setting::Reset,
indexer_config,
} }
} }
@ -218,7 +218,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
self.min_word_len_one_typo = Setting::Reset; self.min_word_len_one_typo = Setting::Reset;
} }
pub fn set_exact_words(&mut self, words: Vec<String>) { pub fn set_exact_words(&mut self, words: BTreeSet<String>) {
self.exact_words = Setting::Set(words); self.exact_words = Setting::Set(words);
} }
@ -539,8 +539,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
fn update_exact_words(&mut self) -> Result<()> { fn update_exact_words(&mut self) -> Result<()> {
match self.exact_words { match self.exact_words {
Setting::Set(ref mut words) => { Setting::Set(ref mut words) => {
words.sort_unstable(); let words = fst::Set::from_iter(words.iter())?;
let words = fst::Set::from_iter(words)?;
self.index.put_exact_words(&mut self.wtxn, &words)?; self.index.put_exact_words(&mut self.wtxn, &words)?;
} }
Setting::Reset => { Setting::Reset => {