mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-23 05:14:27 +01:00
introduce word len for typo setting
This commit is contained in:
parent
9fe40df960
commit
5a24e60572
@ -72,6 +72,7 @@ pub enum UserError {
|
|||||||
SerdeJson(serde_json::Error),
|
SerdeJson(serde_json::Error),
|
||||||
SortError(SortError),
|
SortError(SortError),
|
||||||
UnknownInternalDocumentId { document_id: DocumentId },
|
UnknownInternalDocumentId { document_id: DocumentId },
|
||||||
|
InvalidMinTypoWordSetting(u8, u8),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<io::Error> for Error {
|
impl From<io::Error> for Error {
|
||||||
@ -291,6 +292,7 @@ ranking rules settings to use the sort parameter at search time.",
|
|||||||
Self::UnknownInternalDocumentId { document_id } => {
|
Self::UnknownInternalDocumentId { document_id } => {
|
||||||
write!(f, "An unknown internal document id have been used: `{}`.", document_id)
|
write!(f, "An unknown internal document id have been used: `{}`.", document_id)
|
||||||
}
|
}
|
||||||
|
Self::InvalidMinTypoWordSetting(one, two) => write!(f, "Invalid settings for MinWordLenForTypo, expected 0 < 1-typo < 2-typos < 255, but found 1-typo: {} and 2-typo: {}", one, two),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -23,6 +23,9 @@ use crate::{
|
|||||||
Search, StrBEU32Codec, StrStrU8Codec, BEU32,
|
Search, StrBEU32Codec, StrStrU8Codec, BEU32,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
pub const DEFAULT_MIN_WORD_LEN_1_TYPO: u8 = 5;
|
||||||
|
pub const DEFAULT_MIN_WORD_LEN_2_TYPOS: u8 = 9;
|
||||||
|
|
||||||
pub mod main_key {
|
pub mod main_key {
|
||||||
pub const CRITERIA_KEY: &str = "criteria";
|
pub const CRITERIA_KEY: &str = "criteria";
|
||||||
pub const DISPLAYED_FIELDS_KEY: &str = "displayed-fields";
|
pub const DISPLAYED_FIELDS_KEY: &str = "displayed-fields";
|
||||||
@ -47,6 +50,8 @@ pub mod main_key {
|
|||||||
pub const CREATED_AT_KEY: &str = "created-at";
|
pub const CREATED_AT_KEY: &str = "created-at";
|
||||||
pub const UPDATED_AT_KEY: &str = "updated-at";
|
pub const UPDATED_AT_KEY: &str = "updated-at";
|
||||||
pub const AUTHORIZE_TYPOS: &str = "authorize-typos";
|
pub const AUTHORIZE_TYPOS: &str = "authorize-typos";
|
||||||
|
pub const ONE_TYPO_WORD_LEN: &str = "one-typo-word-len";
|
||||||
|
pub const TWO_TYPOS_WORD_LEN: &str = "two-typos-word-len";
|
||||||
}
|
}
|
||||||
|
|
||||||
pub mod db_name {
|
pub mod db_name {
|
||||||
@ -886,6 +891,42 @@ impl Index {
|
|||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn min_word_len_1_typo(&self, txn: &RoTxn) -> heed::Result<u8> {
|
||||||
|
// It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We
|
||||||
|
// identify 0 as being false, and anything else as true. The absence of a value is true,
|
||||||
|
// because by default, we authorize typos.
|
||||||
|
Ok(self
|
||||||
|
.main
|
||||||
|
.get::<_, Str, OwnedType<u8>>(txn, main_key::ONE_TYPO_WORD_LEN)?
|
||||||
|
.unwrap_or(DEFAULT_MIN_WORD_LEN_1_TYPO))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn put_min_word_len_1_typo(&self, txn: &mut RwTxn, val: u8) -> heed::Result<()> {
|
||||||
|
// It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We
|
||||||
|
// identify 0 as being false, and anything else as true. The absence of a value is true,
|
||||||
|
// because by default, we authorize typos.
|
||||||
|
self.main.put::<_, Str, OwnedType<u8>>(txn, main_key::ONE_TYPO_WORD_LEN, &val)?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn min_word_len_2_typo(&self, txn: &RoTxn) -> heed::Result<u8> {
|
||||||
|
// It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We
|
||||||
|
// identify 0 as being false, and anything else as true. The absence of a value is true,
|
||||||
|
// because by default, we authorize typos.
|
||||||
|
Ok(self
|
||||||
|
.main
|
||||||
|
.get::<_, Str, OwnedType<u8>>(txn, main_key::TWO_TYPOS_WORD_LEN)?
|
||||||
|
.unwrap_or(DEFAULT_MIN_WORD_LEN_2_TYPOS))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn put_min_word_len_2_typo(&self, txn: &mut RwTxn, val: u8) -> heed::Result<()> {
|
||||||
|
// It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We
|
||||||
|
// identify 0 as being false, and anything else as true. The absence of a value is true,
|
||||||
|
// because by default, we authorize typos.
|
||||||
|
self.main.put::<_, Str, OwnedType<u8>>(txn, main_key::TWO_TYPOS_WORD_LEN, &val)?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
@ -90,6 +90,8 @@ pub struct Settings<'a, 't, 'u, 'i> {
|
|||||||
synonyms: Setting<HashMap<String, Vec<String>>>,
|
synonyms: Setting<HashMap<String, Vec<String>>>,
|
||||||
primary_key: Setting<String>,
|
primary_key: Setting<String>,
|
||||||
authorize_typos: Setting<bool>,
|
authorize_typos: Setting<bool>,
|
||||||
|
min_2_typos_word_len: Setting<u8>,
|
||||||
|
min_1_typo_word_len: Setting<u8>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||||
@ -112,6 +114,8 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
primary_key: Setting::NotSet,
|
primary_key: Setting::NotSet,
|
||||||
authorize_typos: Setting::NotSet,
|
authorize_typos: Setting::NotSet,
|
||||||
indexer_config,
|
indexer_config,
|
||||||
|
min_2_typos_word_len: Setting::Reset,
|
||||||
|
min_1_typo_word_len: Setting::Reset,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -196,6 +200,22 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
self.authorize_typos = Setting::Reset;
|
self.authorize_typos = Setting::Reset;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn set_min_2_typos_word_len(&mut self, val: u8) {
|
||||||
|
self.min_2_typos_word_len = Setting::Set(val);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn reset_min_2_typos_word_len(&mut self) {
|
||||||
|
self.min_2_typos_word_len = Setting::Reset;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn set_min_1_typo_word_len(&mut self, val: u8) {
|
||||||
|
self.min_1_typo_word_len = Setting::Set(val);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn reset_min_1_typos_word_len(&mut self) {
|
||||||
|
self.min_1_typo_word_len = Setting::Reset;
|
||||||
|
}
|
||||||
|
|
||||||
fn reindex<F>(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> Result<()>
|
fn reindex<F>(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> Result<()>
|
||||||
where
|
where
|
||||||
F: Fn(UpdateIndexingStep) + Sync,
|
F: Fn(UpdateIndexingStep) + Sync,
|
||||||
@ -474,6 +494,38 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn update_min_typo_word_len(&mut self) -> Result<()> {
|
||||||
|
match (&self.min_1_typo_word_len, &self.min_2_typos_word_len) {
|
||||||
|
(Setting::Set(one), Setting::Set(two)) => {
|
||||||
|
if one < two {
|
||||||
|
self.index.put_min_word_len_1_typo(&mut self.wtxn, *one)?;
|
||||||
|
self.index.put_min_word_len_2_typo(&mut self.wtxn, *two)?;
|
||||||
|
} else {
|
||||||
|
return Err(UserError::InvalidMinTypoWordSetting(*one, *two).into());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
(Setting::Set(one), _) => {
|
||||||
|
let two = self.index.min_word_len_2_typo(&self.wtxn)?;
|
||||||
|
if *one < two {
|
||||||
|
self.index.put_min_word_len_1_typo(&mut self.wtxn, *one)?;
|
||||||
|
} else {
|
||||||
|
return Err(UserError::InvalidMinTypoWordSetting(*one, two).into());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
(_, Setting::Set(two)) => {
|
||||||
|
let one = self.index.min_word_len_1_typo(&self.wtxn)?;
|
||||||
|
if one < *two {
|
||||||
|
self.index.put_min_word_len_2_typo(&mut self.wtxn, *two)?;
|
||||||
|
} else {
|
||||||
|
return Err(UserError::InvalidMinTypoWordSetting(one, *two).into());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => (),
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
pub fn execute<F>(mut self, progress_callback: F) -> Result<()>
|
pub fn execute<F>(mut self, progress_callback: F) -> Result<()>
|
||||||
where
|
where
|
||||||
F: Fn(UpdateIndexingStep) + Sync,
|
F: Fn(UpdateIndexingStep) + Sync,
|
||||||
@ -490,6 +542,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
self.update_criteria()?;
|
self.update_criteria()?;
|
||||||
self.update_primary_key()?;
|
self.update_primary_key()?;
|
||||||
self.update_authorize_typos()?;
|
self.update_authorize_typos()?;
|
||||||
|
self.update_min_typo_word_len()?;
|
||||||
|
|
||||||
// If there is new faceted fields we indicate that we must reindex as we must
|
// If there is new faceted fields we indicate that we must reindex as we must
|
||||||
// index new fields as facets. It means that the distinct attribute,
|
// index new fields as facets. It means that the distinct attribute,
|
||||||
|
Loading…
Reference in New Issue
Block a user