mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-30 08:44:27 +01:00
rename min_word_len* to use plain letter numbers
This commit is contained in:
parent
4c4b336ecb
commit
66020cd923
@ -23,8 +23,8 @@ use crate::{
|
|||||||
Search, StrBEU32Codec, StrStrU8Codec, BEU32,
|
Search, StrBEU32Codec, StrStrU8Codec, BEU32,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub const DEFAULT_MIN_WORD_LEN_1_TYPO: u8 = 5;
|
pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5;
|
||||||
pub const DEFAULT_MIN_WORD_LEN_2_TYPOS: u8 = 9;
|
pub const DEFAULT_MIN_WORD_LEN_TWO_TYPOS: u8 = 9;
|
||||||
|
|
||||||
pub mod main_key {
|
pub mod main_key {
|
||||||
pub const CRITERIA_KEY: &str = "criteria";
|
pub const CRITERIA_KEY: &str = "criteria";
|
||||||
@ -892,17 +892,17 @@ impl Index {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn min_word_len_1_typo(&self, txn: &RoTxn) -> heed::Result<u8> {
|
pub fn min_word_len_one_typo(&self, txn: &RoTxn) -> heed::Result<u8> {
|
||||||
// It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We
|
// It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We
|
||||||
// identify 0 as being false, and anything else as true. The absence of a value is true,
|
// identify 0 as being false, and anything else as true. The absence of a value is true,
|
||||||
// because by default, we authorize typos.
|
// because by default, we authorize typos.
|
||||||
Ok(self
|
Ok(self
|
||||||
.main
|
.main
|
||||||
.get::<_, Str, OwnedType<u8>>(txn, main_key::ONE_TYPO_WORD_LEN)?
|
.get::<_, Str, OwnedType<u8>>(txn, main_key::ONE_TYPO_WORD_LEN)?
|
||||||
.unwrap_or(DEFAULT_MIN_WORD_LEN_1_TYPO))
|
.unwrap_or(DEFAULT_MIN_WORD_LEN_ONE_TYPO))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn put_min_word_len_1_typo(&self, txn: &mut RwTxn, val: u8) -> heed::Result<()> {
|
pub(crate) fn put_min_word_len_one_typo(&self, txn: &mut RwTxn, val: u8) -> heed::Result<()> {
|
||||||
// It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We
|
// It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We
|
||||||
// identify 0 as being false, and anything else as true. The absence of a value is true,
|
// identify 0 as being false, and anything else as true. The absence of a value is true,
|
||||||
// because by default, we authorize typos.
|
// because by default, we authorize typos.
|
||||||
@ -910,17 +910,17 @@ impl Index {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn min_word_len_2_typos(&self, txn: &RoTxn) -> heed::Result<u8> {
|
pub fn min_word_len_two_typos(&self, txn: &RoTxn) -> heed::Result<u8> {
|
||||||
// It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We
|
// It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We
|
||||||
// identify 0 as being false, and anything else as true. The absence of a value is true,
|
// identify 0 as being false, and anything else as true. The absence of a value is true,
|
||||||
// because by default, we authorize typos.
|
// because by default, we authorize typos.
|
||||||
Ok(self
|
Ok(self
|
||||||
.main
|
.main
|
||||||
.get::<_, Str, OwnedType<u8>>(txn, main_key::TWO_TYPOS_WORD_LEN)?
|
.get::<_, Str, OwnedType<u8>>(txn, main_key::TWO_TYPOS_WORD_LEN)?
|
||||||
.unwrap_or(DEFAULT_MIN_WORD_LEN_2_TYPOS))
|
.unwrap_or(DEFAULT_MIN_WORD_LEN_TWO_TYPOS))
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn put_min_word_len_2_typos(&self, txn: &mut RwTxn, val: u8) -> heed::Result<()> {
|
pub(crate) fn put_min_word_len_two_typos(&self, txn: &mut RwTxn, val: u8) -> heed::Result<()> {
|
||||||
// It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We
|
// It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We
|
||||||
// identify 0 as being false, and anything else as true. The absence of a value is true,
|
// identify 0 as being false, and anything else as true. The absence of a value is true,
|
||||||
// because by default, we authorize typos.
|
// because by default, we authorize typos.
|
||||||
@ -937,7 +937,7 @@ pub(crate) mod tests {
|
|||||||
use maplit::btreemap;
|
use maplit::btreemap;
|
||||||
use tempfile::TempDir;
|
use tempfile::TempDir;
|
||||||
|
|
||||||
use crate::index::{DEFAULT_MIN_WORD_LEN_1_TYPO, DEFAULT_MIN_WORD_LEN_2_TYPOS};
|
use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS};
|
||||||
use crate::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig};
|
use crate::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig};
|
||||||
use crate::Index;
|
use crate::Index;
|
||||||
|
|
||||||
@ -1071,16 +1071,16 @@ pub(crate) mod tests {
|
|||||||
let index = TempIndex::new();
|
let index = TempIndex::new();
|
||||||
let mut txn = index.write_txn().unwrap();
|
let mut txn = index.write_txn().unwrap();
|
||||||
|
|
||||||
assert_eq!(index.min_word_len_1_typo(&txn).unwrap(), DEFAULT_MIN_WORD_LEN_1_TYPO);
|
assert_eq!(index.min_word_len_one_typo(&txn).unwrap(), DEFAULT_MIN_WORD_LEN_ONE_TYPO);
|
||||||
assert_eq!(index.min_word_len_2_typos(&txn).unwrap(), DEFAULT_MIN_WORD_LEN_2_TYPOS);
|
assert_eq!(index.min_word_len_two_typos(&txn).unwrap(), DEFAULT_MIN_WORD_LEN_TWO_TYPOS);
|
||||||
|
|
||||||
index.put_min_word_len_1_typo(&mut txn, 3).unwrap();
|
index.put_min_word_len_one_typo(&mut txn, 3).unwrap();
|
||||||
index.put_min_word_len_2_typos(&mut txn, 15).unwrap();
|
index.put_min_word_len_two_typos(&mut txn, 15).unwrap();
|
||||||
|
|
||||||
txn.commit().unwrap();
|
txn.commit().unwrap();
|
||||||
|
|
||||||
let txn = index.read_txn().unwrap();
|
let txn = index.read_txn().unwrap();
|
||||||
assert_eq!(index.min_word_len_1_typo(&txn).unwrap(), 3);
|
assert_eq!(index.min_word_len_one_typo(&txn).unwrap(), 3);
|
||||||
assert_eq!(index.min_word_len_2_typos(&txn).unwrap(), 15);
|
assert_eq!(index.min_word_len_two_typos(&txn).unwrap(), 15);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -182,8 +182,8 @@ impl<'a> Context for QueryTreeBuilder<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn min_word_len_for_typo(&self) -> heed::Result<(u8, u8)> {
|
fn min_word_len_for_typo(&self) -> heed::Result<(u8, u8)> {
|
||||||
let one = self.index.min_word_len_1_typo(&self.rtxn)?;
|
let one = self.index.min_word_len_one_typo(&self.rtxn)?;
|
||||||
let two = self.index.min_word_len_2_typos(&self.rtxn)?;
|
let two = self.index.min_word_len_two_typos(&self.rtxn)?;
|
||||||
Ok((one, two))
|
Ok((one, two))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -267,8 +267,8 @@ fn split_best_frequency(ctx: &impl Context, word: &str) -> heed::Result<Option<O
|
|||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct TypoConfig {
|
pub struct TypoConfig {
|
||||||
pub max_typos: u8,
|
pub max_typos: u8,
|
||||||
pub word_len_1_typo: u8,
|
pub word_len_one_typo: u8,
|
||||||
pub word_len_2_typo: u8,
|
pub word_len_two_typo: u8,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return the `QueryKind` of a word depending on `authorize_typos`
|
/// Return the `QueryKind` of a word depending on `authorize_typos`
|
||||||
@ -276,9 +276,9 @@ pub struct TypoConfig {
|
|||||||
fn typos(word: String, authorize_typos: bool, config: TypoConfig) -> QueryKind {
|
fn typos(word: String, authorize_typos: bool, config: TypoConfig) -> QueryKind {
|
||||||
if authorize_typos {
|
if authorize_typos {
|
||||||
let count = word.chars().count().min(u8::MAX as usize) as u8;
|
let count = word.chars().count().min(u8::MAX as usize) as u8;
|
||||||
if (0..config.word_len_1_typo).contains(&count) {
|
if (0..config.word_len_one_typo).contains(&count) {
|
||||||
QueryKind::exact(word)
|
QueryKind::exact(word)
|
||||||
} else if (config.word_len_1_typo..config.word_len_2_typo).contains(&count) {
|
} else if (config.word_len_one_typo..config.word_len_two_typo).contains(&count) {
|
||||||
QueryKind::tolerant(1.min(config.max_typos), word)
|
QueryKind::tolerant(1.min(config.max_typos), word)
|
||||||
} else {
|
} else {
|
||||||
QueryKind::tolerant(2.min(config.max_typos), word)
|
QueryKind::tolerant(2.min(config.max_typos), word)
|
||||||
@ -332,8 +332,8 @@ fn create_query_tree(
|
|||||||
if let Some(child) = split_best_frequency(ctx, &word)? {
|
if let Some(child) = split_best_frequency(ctx, &word)? {
|
||||||
children.push(child);
|
children.push(child);
|
||||||
}
|
}
|
||||||
let (word_len_1_typo, word_len_2_typo) = ctx.min_word_len_for_typo()?;
|
let (word_len_one_typo, word_len_two_typo) = ctx.min_word_len_for_typo()?;
|
||||||
let config = TypoConfig { max_typos: 2, word_len_1_typo, word_len_2_typo };
|
let config = TypoConfig { max_typos: 2, word_len_one_typo, word_len_two_typo };
|
||||||
children.push(Operation::Query(Query {
|
children.push(Operation::Query(Query {
|
||||||
prefix,
|
prefix,
|
||||||
kind: typos(word, authorize_typos, config),
|
kind: typos(word, authorize_typos, config),
|
||||||
@ -383,9 +383,10 @@ fn create_query_tree(
|
|||||||
.collect();
|
.collect();
|
||||||
let mut operations = synonyms(ctx, &words)?.unwrap_or_default();
|
let mut operations = synonyms(ctx, &words)?.unwrap_or_default();
|
||||||
let concat = words.concat();
|
let concat = words.concat();
|
||||||
let (word_len_1_typo, word_len_2_typo) = ctx.min_word_len_for_typo()?;
|
let (word_len_one_typo, word_len_two_typo) =
|
||||||
|
ctx.min_word_len_for_typo()?;
|
||||||
let config =
|
let config =
|
||||||
TypoConfig { max_typos: 1, word_len_1_typo, word_len_2_typo };
|
TypoConfig { max_typos: 1, word_len_one_typo, word_len_two_typo };
|
||||||
let query = Query {
|
let query = Query {
|
||||||
prefix: is_prefix,
|
prefix: is_prefix,
|
||||||
kind: typos(concat, authorize_typos, config),
|
kind: typos(concat, authorize_typos, config),
|
||||||
@ -1223,7 +1224,7 @@ mod test {
|
|||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_min_word_len_typo() {
|
fn test_min_word_len_typo() {
|
||||||
let config = TypoConfig { max_typos: 2, word_len_1_typo: 5, word_len_2_typo: 7 };
|
let config = TypoConfig { max_typos: 2, word_len_one_typo: 5, word_len_two_typo: 7 };
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
typos("hello".to_string(), true, config.clone()),
|
typos("hello".to_string(), true, config.clone()),
|
||||||
|
@ -90,8 +90,8 @@ pub struct Settings<'a, 't, 'u, 'i> {
|
|||||||
synonyms: Setting<HashMap<String, Vec<String>>>,
|
synonyms: Setting<HashMap<String, Vec<String>>>,
|
||||||
primary_key: Setting<String>,
|
primary_key: Setting<String>,
|
||||||
authorize_typos: Setting<bool>,
|
authorize_typos: Setting<bool>,
|
||||||
min_2_typos_word_len: Setting<u8>,
|
min_word_len_two_typos: Setting<u8>,
|
||||||
min_1_typo_word_len: Setting<u8>,
|
min_word_len_one_typo: Setting<u8>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||||
@ -114,8 +114,8 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
primary_key: Setting::NotSet,
|
primary_key: Setting::NotSet,
|
||||||
authorize_typos: Setting::NotSet,
|
authorize_typos: Setting::NotSet,
|
||||||
indexer_config,
|
indexer_config,
|
||||||
min_2_typos_word_len: Setting::Reset,
|
min_word_len_two_typos: Setting::Reset,
|
||||||
min_1_typo_word_len: Setting::Reset,
|
min_word_len_one_typo: Setting::Reset,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -200,20 +200,20 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
self.authorize_typos = Setting::Reset;
|
self.authorize_typos = Setting::Reset;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn set_min_2_typos_word_len(&mut self, val: u8) {
|
pub fn set_min_word_len_two_typos(&mut self, val: u8) {
|
||||||
self.min_2_typos_word_len = Setting::Set(val);
|
self.min_word_len_two_typos = Setting::Set(val);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn reset_min_2_typos_word_len(&mut self) {
|
pub fn reset_min_word_len_two_typos(&mut self) {
|
||||||
self.min_2_typos_word_len = Setting::Reset;
|
self.min_word_len_two_typos = Setting::Reset;
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn set_min_1_typo_word_len(&mut self, val: u8) {
|
pub fn set_min_word_len_one_typo(&mut self, val: u8) {
|
||||||
self.min_1_typo_word_len = Setting::Set(val);
|
self.min_word_len_one_typo = Setting::Set(val);
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn reset_min_1_typos_word_len(&mut self) {
|
pub fn reset_min_word_len_one_typo(&mut self) {
|
||||||
self.min_1_typo_word_len = Setting::Reset;
|
self.min_word_len_one_typo = Setting::Reset;
|
||||||
}
|
}
|
||||||
|
|
||||||
fn reindex<F>(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> Result<()>
|
fn reindex<F>(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> Result<()>
|
||||||
@ -495,29 +495,29 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn update_min_typo_word_len(&mut self) -> Result<()> {
|
fn update_min_typo_word_len(&mut self) -> Result<()> {
|
||||||
match (&self.min_1_typo_word_len, &self.min_2_typos_word_len) {
|
match (&self.min_word_len_one_typo, &self.min_word_len_two_typos) {
|
||||||
(Setting::Set(one), Setting::Set(two)) => {
|
(Setting::Set(one), Setting::Set(two)) => {
|
||||||
if one > two {
|
if one > two {
|
||||||
return Err(UserError::InvalidMinTypoWordLenSetting(*one, *two).into());
|
return Err(UserError::InvalidMinTypoWordLenSetting(*one, *two).into());
|
||||||
} else {
|
} else {
|
||||||
self.index.put_min_word_len_1_typo(&mut self.wtxn, *one)?;
|
self.index.put_min_word_len_one_typo(&mut self.wtxn, *one)?;
|
||||||
self.index.put_min_word_len_2_typos(&mut self.wtxn, *two)?;
|
self.index.put_min_word_len_two_typos(&mut self.wtxn, *two)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
(Setting::Set(one), _) => {
|
(Setting::Set(one), _) => {
|
||||||
let two = self.index.min_word_len_2_typos(&self.wtxn)?;
|
let two = self.index.min_word_len_two_typos(&self.wtxn)?;
|
||||||
if *one > two {
|
if *one > two {
|
||||||
return Err(UserError::InvalidMinTypoWordLenSetting(*one, two).into());
|
return Err(UserError::InvalidMinTypoWordLenSetting(*one, two).into());
|
||||||
} else {
|
} else {
|
||||||
self.index.put_min_word_len_1_typo(&mut self.wtxn, *one)?;
|
self.index.put_min_word_len_one_typo(&mut self.wtxn, *one)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
(_, Setting::Set(two)) => {
|
(_, Setting::Set(two)) => {
|
||||||
let one = self.index.min_word_len_1_typo(&self.wtxn)?;
|
let one = self.index.min_word_len_one_typo(&self.wtxn)?;
|
||||||
if one > *two {
|
if one > *two {
|
||||||
return Err(UserError::InvalidMinTypoWordLenSetting(one, *two).into());
|
return Err(UserError::InvalidMinTypoWordLenSetting(one, *two).into());
|
||||||
} else {
|
} else {
|
||||||
self.index.put_min_word_len_2_typos(&mut self.wtxn, *two)?;
|
self.index.put_min_word_len_two_typos(&mut self.wtxn, *two)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
_ => (),
|
_ => (),
|
||||||
@ -1295,16 +1295,16 @@ mod tests {
|
|||||||
// Set the genres setting
|
// Set the genres setting
|
||||||
let mut txn = index.write_txn().unwrap();
|
let mut txn = index.write_txn().unwrap();
|
||||||
let mut builder = Settings::new(&mut txn, &index, &config);
|
let mut builder = Settings::new(&mut txn, &index, &config);
|
||||||
builder.set_min_1_typo_word_len(8);
|
builder.set_min_word_len_one_typo(8);
|
||||||
builder.set_min_2_typos_word_len(8);
|
builder.set_min_word_len_two_typos(8);
|
||||||
builder.execute(|_| ()).unwrap();
|
builder.execute(|_| ()).unwrap();
|
||||||
|
|
||||||
txn.commit().unwrap();
|
txn.commit().unwrap();
|
||||||
|
|
||||||
let txn = index.read_txn().unwrap();
|
let txn = index.read_txn().unwrap();
|
||||||
|
|
||||||
assert_eq!(index.min_word_len_1_typo(&txn).unwrap(), 8);
|
assert_eq!(index.min_word_len_one_typo(&txn).unwrap(), 8);
|
||||||
assert_eq!(index.min_word_len_2_typos(&txn).unwrap(), 8);
|
assert_eq!(index.min_word_len_two_typos(&txn).unwrap(), 8);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@ -1315,8 +1315,8 @@ mod tests {
|
|||||||
// Set the genres setting
|
// Set the genres setting
|
||||||
let mut txn = index.write_txn().unwrap();
|
let mut txn = index.write_txn().unwrap();
|
||||||
let mut builder = Settings::new(&mut txn, &index, &config);
|
let mut builder = Settings::new(&mut txn, &index, &config);
|
||||||
builder.set_min_1_typo_word_len(10);
|
builder.set_min_word_len_one_typo(10);
|
||||||
builder.set_min_2_typos_word_len(7);
|
builder.set_min_word_len_two_typos(7);
|
||||||
assert!(builder.execute(|_| ()).is_err());
|
assert!(builder.execute(|_| ()).is_err());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user