diff --git a/meilisearch-error/src/lib.rs b/meilisearch-error/src/lib.rs index 81371eb6d..11613497c 100644 --- a/meilisearch-error/src/lib.rs +++ b/meilisearch-error/src/lib.rs @@ -120,6 +120,7 @@ pub enum Code { IndexAlreadyExists, IndexNotFound, InvalidIndexUid, + InvalidMinWordLengthForTypo, // invalid state error InvalidState, @@ -271,6 +272,9 @@ impl Code { InvalidApiKeyDescription => { ErrCode::invalid("invalid_api_key_description", StatusCode::BAD_REQUEST) } + InvalidMinWordLengthForTypo => { + ErrCode::invalid("invalid_min_word_length_for_typo", StatusCode::BAD_REQUEST) + } } } diff --git a/meilisearch-lib/src/error.rs b/meilisearch-lib/src/error.rs index f30c698e5..c3e7b8313 100644 --- a/meilisearch-lib/src/error.rs +++ b/meilisearch-lib/src/error.rs @@ -41,7 +41,9 @@ impl ErrorCode for MilliError<'_> { UserError::CriterionError(_) => Code::InvalidRankingRule, UserError::InvalidGeoField { .. } => Code::InvalidGeoField, UserError::SortError(_) => Code::Sort, - UserError::InvalidMinTypoWordLenSetting(_, _) => unreachable!(), + UserError::InvalidMinTypoWordLenSetting(_, _) => { + Code::InvalidMinWordLengthForTypo + } } } } diff --git a/meilisearch-lib/src/index/index.rs b/meilisearch-lib/src/index/index.rs index 33b3f4ad2..778205dbb 100644 --- a/meilisearch-lib/src/index/index.rs +++ b/meilisearch-lib/src/index/index.rs @@ -5,6 +5,7 @@ use std::ops::Deref; use std::path::Path; use std::sync::Arc; +use fst::IntoStreamer; use milli::heed::{EnvOpenOptions, RoTxn}; use milli::update::{IndexerConfig, Setting}; use milli::{obkv_to_json, FieldDistribution, FieldId}; @@ -17,7 +18,7 @@ use crate::EnvSizer; use super::error::IndexError; use super::error::Result; -use super::updates::TypoSettings; +use super::updates::{MinWordLengthTypoSetting, TypoSettings}; use super::{Checked, Settings}; pub type Document = Map; @@ -169,8 +170,22 @@ impl Index { }) .collect(); + let min_typo_word_len = MinWordLengthTypoSetting { + one_typo: Setting::Set(self.min_word_len_one_typo(txn)?), + two_typos: Setting::Set(self.min_word_len_two_typos(txn)?), + }; + + let disabled_words = self + .exact_words(txn)? + .into_stream() + .into_strs()? + .into_iter() + .collect(); + let typo_tolerance = TypoSettings { enabled: Setting::Set(self.authorize_typos(txn)?), + min_word_length_for_typo: Setting::Set(min_typo_word_len), + disable_on_words: Setting::Set(disabled_words), }; Ok(Settings { diff --git a/meilisearch-lib/src/index/updates.rs b/meilisearch-lib/src/index/updates.rs index 95145f92c..f9bc990de 100644 --- a/meilisearch-lib/src/index/updates.rs +++ b/meilisearch-lib/src/index/updates.rs @@ -37,14 +37,33 @@ pub struct Checked; #[derive(Clone, Default, Debug, Serialize, Deserialize, PartialEq)] pub struct Unchecked; +#[cfg_attr(test, derive(proptest_derive::Arbitrary))] #[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)] #[serde(deny_unknown_fields)] #[serde(rename_all = "camelCase")] +pub struct MinWordLengthTypoSetting { + #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))] + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + pub one_typo: Setting, + #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))] + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + pub two_typos: Setting, +} + #[cfg_attr(test, derive(proptest_derive::Arbitrary))] +#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)] +#[serde(deny_unknown_fields)] +#[serde(rename_all = "camelCase")] pub struct TypoSettings { #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))] #[serde(default, skip_serializing_if = "Setting::is_not_set")] pub enabled: Setting, + #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))] + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + pub min_word_length_for_typo: Setting, + #[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))] + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + pub disable_on_words: Setting>, } /// Holds all the settings for an index. `T` can either be `Checked` if they represents settings /// whose validity is guaranteed, or `Unchecked` if they need to be validated. In the later case, a @@ -352,14 +371,44 @@ pub fn apply_settings_to_builder( } match settings.typo { - Setting::Set(ref value) => match value.enabled { - Setting::Set(val) => builder.set_autorize_typos(val), - Setting::Reset => builder.reset_authorize_typos(), - Setting::NotSet => (), - }, + Setting::Set(ref value) => { + match value.enabled { + Setting::Set(val) => builder.set_autorize_typos(val), + Setting::Reset => builder.reset_authorize_typos(), + Setting::NotSet => (), + } + match value.min_word_length_for_typo { + Setting::Set(ref setting) => { + match setting.one_typo { + Setting::Set(val) => builder.set_min_word_len_one_typo(val), + Setting::Reset => builder.reset_min_word_len_one_typo(), + Setting::NotSet => (), + } + match setting.two_typos { + Setting::Set(val) => builder.set_min_word_len_two_typos(val), + Setting::Reset => builder.reset_min_word_len_two_typos(), + Setting::NotSet => (), + } + } + Setting::Reset => { + builder.reset_min_word_len_one_typo(); + builder.reset_min_word_len_two_typos(); + } + Setting::NotSet => (), + } + match value.disable_on_words { + Setting::Set(ref words) => { + builder.set_exact_words(words.clone()); + } + Setting::Reset => builder.reset_exact_words(), + Setting::NotSet => (), + } + } Setting::Reset => { // all typo settings need to be reset here. builder.reset_authorize_typos(); + builder.reset_min_word_len_one_typo(); + builder.reset_min_word_len_two_typos(); } Setting::NotSet => (), }