feat(all): introduce disable typo on words

This commit is contained in:
ad hoc 2022-03-22 18:17:33 +01:00
parent dd43ba6234
commit e9f66b8766
No known key found for this signature in database
GPG Key ID: 4F00A782990CC643
2 changed files with 19 additions and 0 deletions

View File

@ -5,6 +5,7 @@ use std::ops::Deref;
use std::path::Path; use std::path::Path;
use std::sync::Arc; use std::sync::Arc;
use fst::IntoStreamer;
use milli::heed::{EnvOpenOptions, RoTxn}; use milli::heed::{EnvOpenOptions, RoTxn};
use milli::update::{IndexerConfig, Setting}; use milli::update::{IndexerConfig, Setting};
use milli::{obkv_to_json, FieldDistribution, FieldId}; use milli::{obkv_to_json, FieldDistribution, FieldId};
@ -174,9 +175,17 @@ impl Index {
two_typos: Setting::Set(self.min_word_len_two_typos(txn)?), two_typos: Setting::Set(self.min_word_len_two_typos(txn)?),
}; };
let disabled_words = self
.exact_words(txn)?
.into_stream()
.into_strs()?
.into_iter()
.collect();
let typo_tolerance = TypoSettings { let typo_tolerance = TypoSettings {
enabled: Setting::Set(self.authorize_typos(txn)?), enabled: Setting::Set(self.authorize_typos(txn)?),
min_word_length_for_typo: Setting::Set(min_typo_word_len), min_word_length_for_typo: Setting::Set(min_typo_word_len),
disable_on_words: Setting::Set(disabled_words),
}; };
Ok(Settings { Ok(Settings {

View File

@ -61,6 +61,9 @@ pub struct TypoSettings {
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))] #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
#[serde(default, skip_serializing_if = "Setting::is_not_set")] #[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub min_word_length_for_typo: Setting<MinWordLengthTypoSetting>, pub min_word_length_for_typo: Setting<MinWordLengthTypoSetting>,
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub disable_on_words: Setting<BTreeSet<String>>,
} }
/// Holds all the settings for an index. `T` can either be `Checked` if they represents settings /// Holds all the settings for an index. `T` can either be `Checked` if they represents settings
/// whose validity is guaranteed, or `Unchecked` if they need to be validated. In the later case, a /// whose validity is guaranteed, or `Unchecked` if they need to be validated. In the later case, a
@ -393,6 +396,13 @@ pub fn apply_settings_to_builder(
} }
Setting::NotSet => (), Setting::NotSet => (),
} }
match value.disable_on_words {
Setting::Set(ref words) => {
builder.set_exact_words(words.clone());
}
Setting::Reset => builder.reset_exact_words(),
Setting::NotSet => (),
}
} }
Setting::Reset => { Setting::Reset => {
// all typo settings need to be reset here. // all typo settings need to be reset here.