mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-22 12:54:26 +01:00
Merge #2259
2259: disable typos on words r=MarinPostma a=MarinPostma Introduce the disable typo setting as per https://github.com/meilisearch/specifications/pull/117. waiting for https://github.com/meilisearch/milli/pull/474. Co-authored-by: ad hoc <postma.marin@protonmail.com>
This commit is contained in:
commit
c321ac61b5
@ -120,6 +120,7 @@ pub enum Code {
|
||||
IndexAlreadyExists,
|
||||
IndexNotFound,
|
||||
InvalidIndexUid,
|
||||
InvalidMinWordLengthForTypo,
|
||||
|
||||
// invalid state error
|
||||
InvalidState,
|
||||
@ -271,6 +272,9 @@ impl Code {
|
||||
InvalidApiKeyDescription => {
|
||||
ErrCode::invalid("invalid_api_key_description", StatusCode::BAD_REQUEST)
|
||||
}
|
||||
InvalidMinWordLengthForTypo => {
|
||||
ErrCode::invalid("invalid_min_word_length_for_typo", StatusCode::BAD_REQUEST)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -41,7 +41,9 @@ impl ErrorCode for MilliError<'_> {
|
||||
UserError::CriterionError(_) => Code::InvalidRankingRule,
|
||||
UserError::InvalidGeoField { .. } => Code::InvalidGeoField,
|
||||
UserError::SortError(_) => Code::Sort,
|
||||
UserError::InvalidMinTypoWordLenSetting(_, _) => unreachable!(),
|
||||
UserError::InvalidMinTypoWordLenSetting(_, _) => {
|
||||
Code::InvalidMinWordLengthForTypo
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -5,6 +5,7 @@ use std::ops::Deref;
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
|
||||
use fst::IntoStreamer;
|
||||
use milli::heed::{EnvOpenOptions, RoTxn};
|
||||
use milli::update::{IndexerConfig, Setting};
|
||||
use milli::{obkv_to_json, FieldDistribution, FieldId};
|
||||
@ -17,7 +18,7 @@ use crate::EnvSizer;
|
||||
|
||||
use super::error::IndexError;
|
||||
use super::error::Result;
|
||||
use super::updates::TypoSettings;
|
||||
use super::updates::{MinWordLengthTypoSetting, TypoSettings};
|
||||
use super::{Checked, Settings};
|
||||
|
||||
pub type Document = Map<String, Value>;
|
||||
@ -169,8 +170,22 @@ impl Index {
|
||||
})
|
||||
.collect();
|
||||
|
||||
let min_typo_word_len = MinWordLengthTypoSetting {
|
||||
one_typo: Setting::Set(self.min_word_len_one_typo(txn)?),
|
||||
two_typos: Setting::Set(self.min_word_len_two_typos(txn)?),
|
||||
};
|
||||
|
||||
let disabled_words = self
|
||||
.exact_words(txn)?
|
||||
.into_stream()
|
||||
.into_strs()?
|
||||
.into_iter()
|
||||
.collect();
|
||||
|
||||
let typo_tolerance = TypoSettings {
|
||||
enabled: Setting::Set(self.authorize_typos(txn)?),
|
||||
min_word_length_for_typo: Setting::Set(min_typo_word_len),
|
||||
disable_on_words: Setting::Set(disabled_words),
|
||||
};
|
||||
|
||||
Ok(Settings {
|
||||
|
@ -37,14 +37,33 @@ pub struct Checked;
|
||||
#[derive(Clone, Default, Debug, Serialize, Deserialize, PartialEq)]
|
||||
pub struct Unchecked;
|
||||
|
||||
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct MinWordLengthTypoSetting {
|
||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
pub one_typo: Setting<u8>,
|
||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
pub two_typos: Setting<u8>,
|
||||
}
|
||||
|
||||
#[cfg_attr(test, derive(proptest_derive::Arbitrary))]
|
||||
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq)]
|
||||
#[serde(deny_unknown_fields)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct TypoSettings {
|
||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
pub enabled: Setting<bool>,
|
||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
pub min_word_length_for_typo: Setting<MinWordLengthTypoSetting>,
|
||||
#[cfg_attr(test, proptest(strategy = "test::setting_strategy()"))]
|
||||
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
|
||||
pub disable_on_words: Setting<BTreeSet<String>>,
|
||||
}
|
||||
/// Holds all the settings for an index. `T` can either be `Checked` if they represents settings
|
||||
/// whose validity is guaranteed, or `Unchecked` if they need to be validated. In the later case, a
|
||||
@ -352,14 +371,44 @@ pub fn apply_settings_to_builder(
|
||||
}
|
||||
|
||||
match settings.typo {
|
||||
Setting::Set(ref value) => match value.enabled {
|
||||
Setting::Set(val) => builder.set_autorize_typos(val),
|
||||
Setting::Reset => builder.reset_authorize_typos(),
|
||||
Setting::NotSet => (),
|
||||
},
|
||||
Setting::Set(ref value) => {
|
||||
match value.enabled {
|
||||
Setting::Set(val) => builder.set_autorize_typos(val),
|
||||
Setting::Reset => builder.reset_authorize_typos(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
match value.min_word_length_for_typo {
|
||||
Setting::Set(ref setting) => {
|
||||
match setting.one_typo {
|
||||
Setting::Set(val) => builder.set_min_word_len_one_typo(val),
|
||||
Setting::Reset => builder.reset_min_word_len_one_typo(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
match setting.two_typos {
|
||||
Setting::Set(val) => builder.set_min_word_len_two_typos(val),
|
||||
Setting::Reset => builder.reset_min_word_len_two_typos(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
}
|
||||
Setting::Reset => {
|
||||
builder.reset_min_word_len_one_typo();
|
||||
builder.reset_min_word_len_two_typos();
|
||||
}
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
match value.disable_on_words {
|
||||
Setting::Set(ref words) => {
|
||||
builder.set_exact_words(words.clone());
|
||||
}
|
||||
Setting::Reset => builder.reset_exact_words(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
}
|
||||
Setting::Reset => {
|
||||
// all typo settings need to be reset here.
|
||||
builder.reset_authorize_typos();
|
||||
builder.reset_min_word_len_one_typo();
|
||||
builder.reset_min_word_len_two_typos();
|
||||
}
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user