mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-10 23:18:55 +01:00
Merge #473
473: set minimum word len for typos r=MarinPostma a=MarinPostma this PR allows the configuration on the minimum word length for typos. The default values are the same as previously. ## steps - [x] introduce settings for the minimum word length for 1 and 2 typos - [x] update the settings update flow to set this setting - [x] create a structure `TypoConfig` to configure typo tolerance in the query builder - [x] in `typo`, use the configuration to create the appropriate query tree node. - [x] extend `Context` to return the setting for minimum word length for typos - [x] return correct error message for wrong settings. - [x] merge #469 Co-authored-by: ad hoc <postma.marin@protonmail.com>
This commit is contained in:
commit
48a5ce7434
@ -72,6 +72,7 @@ pub enum UserError {
|
|||||||
SerdeJson(serde_json::Error),
|
SerdeJson(serde_json::Error),
|
||||||
SortError(SortError),
|
SortError(SortError),
|
||||||
UnknownInternalDocumentId { document_id: DocumentId },
|
UnknownInternalDocumentId { document_id: DocumentId },
|
||||||
|
InvalidMinTypoWordLenSetting(u8, u8),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl From<io::Error> for Error {
|
impl From<io::Error> for Error {
|
||||||
@ -291,6 +292,7 @@ ranking rules settings to use the sort parameter at search time.",
|
|||||||
Self::UnknownInternalDocumentId { document_id } => {
|
Self::UnknownInternalDocumentId { document_id } => {
|
||||||
write!(f, "An unknown internal document id have been used: `{}`.", document_id)
|
write!(f, "An unknown internal document id have been used: `{}`.", document_id)
|
||||||
}
|
}
|
||||||
|
Self::InvalidMinTypoWordLenSetting(one, two) => write!(f, "`minWordSizeForTypos` setting is invalid. `oneTypo` and `twoTypos` fields should be between `0` and `255`, and `twoTypos` should be greater or equals to `oneTypo` but found `oneTypo: {}` and twoTypos: {}`.", one, two),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -23,6 +23,9 @@ use crate::{
|
|||||||
Search, StrBEU32Codec, StrStrU8Codec, BEU32,
|
Search, StrBEU32Codec, StrStrU8Codec, BEU32,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5;
|
||||||
|
pub const DEFAULT_MIN_WORD_LEN_TWO_TYPOS: u8 = 9;
|
||||||
|
|
||||||
pub mod main_key {
|
pub mod main_key {
|
||||||
pub const CRITERIA_KEY: &str = "criteria";
|
pub const CRITERIA_KEY: &str = "criteria";
|
||||||
pub const DISPLAYED_FIELDS_KEY: &str = "displayed-fields";
|
pub const DISPLAYED_FIELDS_KEY: &str = "displayed-fields";
|
||||||
@ -47,6 +50,8 @@ pub mod main_key {
|
|||||||
pub const CREATED_AT_KEY: &str = "created-at";
|
pub const CREATED_AT_KEY: &str = "created-at";
|
||||||
pub const UPDATED_AT_KEY: &str = "updated-at";
|
pub const UPDATED_AT_KEY: &str = "updated-at";
|
||||||
pub const AUTHORIZE_TYPOS: &str = "authorize-typos";
|
pub const AUTHORIZE_TYPOS: &str = "authorize-typos";
|
||||||
|
pub const ONE_TYPO_WORD_LEN: &str = "one-typo-word-len";
|
||||||
|
pub const TWO_TYPOS_WORD_LEN: &str = "two-typos-word-len";
|
||||||
}
|
}
|
||||||
|
|
||||||
pub mod db_name {
|
pub mod db_name {
|
||||||
@ -886,6 +891,42 @@ impl Index {
|
|||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn min_word_len_one_typo(&self, txn: &RoTxn) -> heed::Result<u8> {
|
||||||
|
// It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We
|
||||||
|
// identify 0 as being false, and anything else as true. The absence of a value is true,
|
||||||
|
// because by default, we authorize typos.
|
||||||
|
Ok(self
|
||||||
|
.main
|
||||||
|
.get::<_, Str, OwnedType<u8>>(txn, main_key::ONE_TYPO_WORD_LEN)?
|
||||||
|
.unwrap_or(DEFAULT_MIN_WORD_LEN_ONE_TYPO))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn put_min_word_len_one_typo(&self, txn: &mut RwTxn, val: u8) -> heed::Result<()> {
|
||||||
|
// It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We
|
||||||
|
// identify 0 as being false, and anything else as true. The absence of a value is true,
|
||||||
|
// because by default, we authorize typos.
|
||||||
|
self.main.put::<_, Str, OwnedType<u8>>(txn, main_key::ONE_TYPO_WORD_LEN, &val)?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn min_word_len_two_typos(&self, txn: &RoTxn) -> heed::Result<u8> {
|
||||||
|
// It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We
|
||||||
|
// identify 0 as being false, and anything else as true. The absence of a value is true,
|
||||||
|
// because by default, we authorize typos.
|
||||||
|
Ok(self
|
||||||
|
.main
|
||||||
|
.get::<_, Str, OwnedType<u8>>(txn, main_key::TWO_TYPOS_WORD_LEN)?
|
||||||
|
.unwrap_or(DEFAULT_MIN_WORD_LEN_TWO_TYPOS))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn put_min_word_len_two_typos(&self, txn: &mut RwTxn, val: u8) -> heed::Result<()> {
|
||||||
|
// It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We
|
||||||
|
// identify 0 as being false, and anything else as true. The absence of a value is true,
|
||||||
|
// because by default, we authorize typos.
|
||||||
|
self.main.put::<_, Str, OwnedType<u8>>(txn, main_key::TWO_TYPOS_WORD_LEN, &val)?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
@ -896,6 +937,7 @@ pub(crate) mod tests {
|
|||||||
use maplit::btreemap;
|
use maplit::btreemap;
|
||||||
use tempfile::TempDir;
|
use tempfile::TempDir;
|
||||||
|
|
||||||
|
use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS};
|
||||||
use crate::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig};
|
use crate::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig};
|
||||||
use crate::Index;
|
use crate::Index;
|
||||||
|
|
||||||
@ -1023,4 +1065,22 @@ pub(crate) mod tests {
|
|||||||
let txn = index.read_txn().unwrap();
|
let txn = index.read_txn().unwrap();
|
||||||
assert!(!index.authorize_typos(&txn).unwrap());
|
assert!(!index.authorize_typos(&txn).unwrap());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn set_min_word_len_for_typos() {
|
||||||
|
let index = TempIndex::new();
|
||||||
|
let mut txn = index.write_txn().unwrap();
|
||||||
|
|
||||||
|
assert_eq!(index.min_word_len_one_typo(&txn).unwrap(), DEFAULT_MIN_WORD_LEN_ONE_TYPO);
|
||||||
|
assert_eq!(index.min_word_len_two_typos(&txn).unwrap(), DEFAULT_MIN_WORD_LEN_TWO_TYPOS);
|
||||||
|
|
||||||
|
index.put_min_word_len_one_typo(&mut txn, 3).unwrap();
|
||||||
|
index.put_min_word_len_two_typos(&mut txn, 15).unwrap();
|
||||||
|
|
||||||
|
txn.commit().unwrap();
|
||||||
|
|
||||||
|
let txn = index.read_txn().unwrap();
|
||||||
|
assert_eq!(index.min_word_len_one_typo(&txn).unwrap(), 3);
|
||||||
|
assert_eq!(index.min_word_len_two_typos(&txn).unwrap(), 15);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -155,6 +155,8 @@ trait Context {
|
|||||||
None => Ok(None),
|
None => Ok(None),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
/// Returns the minimum word len for 1 and 2 typos.
|
||||||
|
fn min_word_len_for_typo(&self) -> heed::Result<(u8, u8)>;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The query tree builder is the interface to build a query tree.
|
/// The query tree builder is the interface to build a query tree.
|
||||||
@ -178,6 +180,12 @@ impl<'a> Context for QueryTreeBuilder<'a> {
|
|||||||
fn word_documents_count(&self, word: &str) -> heed::Result<Option<u64>> {
|
fn word_documents_count(&self, word: &str) -> heed::Result<Option<u64>> {
|
||||||
self.index.word_documents_count(self.rtxn, word)
|
self.index.word_documents_count(self.rtxn, word)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn min_word_len_for_typo(&self) -> heed::Result<(u8, u8)> {
|
||||||
|
let one = self.index.min_word_len_one_typo(&self.rtxn)?;
|
||||||
|
let two = self.index.min_word_len_two_typos(&self.rtxn)?;
|
||||||
|
Ok((one, two))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> QueryTreeBuilder<'a> {
|
impl<'a> QueryTreeBuilder<'a> {
|
||||||
@ -256,14 +264,24 @@ fn split_best_frequency(ctx: &impl Context, word: &str) -> heed::Result<Option<O
|
|||||||
Ok(best.map(|(_, left, right)| Operation::Phrase(vec![left.to_string(), right.to_string()])))
|
Ok(best.map(|(_, left, right)| Operation::Phrase(vec![left.to_string(), right.to_string()])))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct TypoConfig {
|
||||||
|
pub max_typos: u8,
|
||||||
|
pub word_len_one_typo: u8,
|
||||||
|
pub word_len_two_typo: u8,
|
||||||
|
}
|
||||||
|
|
||||||
/// Return the `QueryKind` of a word depending on `authorize_typos`
|
/// Return the `QueryKind` of a word depending on `authorize_typos`
|
||||||
/// and the provided word length.
|
/// and the provided word length.
|
||||||
fn typos(word: String, authorize_typos: bool, max_typos: u8) -> QueryKind {
|
fn typos(word: String, authorize_typos: bool, config: TypoConfig) -> QueryKind {
|
||||||
if authorize_typos {
|
if authorize_typos {
|
||||||
match word.chars().count() {
|
let count = word.chars().count().min(u8::MAX as usize) as u8;
|
||||||
0..=4 => QueryKind::exact(word),
|
if count < config.word_len_one_typo {
|
||||||
5..=8 => QueryKind::tolerant(1.min(max_typos), word),
|
QueryKind::exact(word)
|
||||||
_ => QueryKind::tolerant(2.min(max_typos), word),
|
} else if count < config.word_len_two_typo {
|
||||||
|
QueryKind::tolerant(1.min(config.max_typos), word)
|
||||||
|
} else {
|
||||||
|
QueryKind::tolerant(2.min(config.max_typos), word)
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
QueryKind::exact(word)
|
QueryKind::exact(word)
|
||||||
@ -314,9 +332,11 @@ fn create_query_tree(
|
|||||||
if let Some(child) = split_best_frequency(ctx, &word)? {
|
if let Some(child) = split_best_frequency(ctx, &word)? {
|
||||||
children.push(child);
|
children.push(child);
|
||||||
}
|
}
|
||||||
|
let (word_len_one_typo, word_len_two_typo) = ctx.min_word_len_for_typo()?;
|
||||||
|
let config = TypoConfig { max_typos: 2, word_len_one_typo, word_len_two_typo };
|
||||||
children.push(Operation::Query(Query {
|
children.push(Operation::Query(Query {
|
||||||
prefix,
|
prefix,
|
||||||
kind: typos(word, authorize_typos, 2),
|
kind: typos(word, authorize_typos, config),
|
||||||
}));
|
}));
|
||||||
Ok(Operation::or(false, children))
|
Ok(Operation::or(false, children))
|
||||||
}
|
}
|
||||||
@ -363,9 +383,13 @@ fn create_query_tree(
|
|||||||
.collect();
|
.collect();
|
||||||
let mut operations = synonyms(ctx, &words)?.unwrap_or_default();
|
let mut operations = synonyms(ctx, &words)?.unwrap_or_default();
|
||||||
let concat = words.concat();
|
let concat = words.concat();
|
||||||
|
let (word_len_one_typo, word_len_two_typo) =
|
||||||
|
ctx.min_word_len_for_typo()?;
|
||||||
|
let config =
|
||||||
|
TypoConfig { max_typos: 1, word_len_one_typo, word_len_two_typo };
|
||||||
let query = Query {
|
let query = Query {
|
||||||
prefix: is_prefix,
|
prefix: is_prefix,
|
||||||
kind: typos(concat, authorize_typos, 1),
|
kind: typos(concat, authorize_typos, config),
|
||||||
};
|
};
|
||||||
operations.push(Operation::Query(query));
|
operations.push(Operation::Query(query));
|
||||||
and_op_children.push(Operation::or(false, operations));
|
and_op_children.push(Operation::or(false, operations));
|
||||||
@ -541,6 +565,7 @@ mod test {
|
|||||||
use rand::{Rng, SeedableRng};
|
use rand::{Rng, SeedableRng};
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS};
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
struct TestContext {
|
struct TestContext {
|
||||||
@ -576,6 +601,10 @@ mod test {
|
|||||||
let words: Vec<_> = words.iter().map(|s| s.as_ref().to_owned()).collect();
|
let words: Vec<_> = words.iter().map(|s| s.as_ref().to_owned()).collect();
|
||||||
Ok(self.synonyms.get(&words).cloned())
|
Ok(self.synonyms.get(&words).cloned())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn min_word_len_for_typo(&self) -> heed::Result<(u8, u8)> {
|
||||||
|
Ok((DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for TestContext {
|
impl Default for TestContext {
|
||||||
@ -1193,4 +1222,24 @@ mod test {
|
|||||||
|
|
||||||
assert_eq!(expected, query_tree);
|
assert_eq!(expected, query_tree);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_min_word_len_typo() {
|
||||||
|
let config = TypoConfig { max_typos: 2, word_len_one_typo: 5, word_len_two_typo: 7 };
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
typos("hello".to_string(), true, config.clone()),
|
||||||
|
QueryKind::Tolerant { typo: 1, word: "hello".to_string() }
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
typos("hell".to_string(), true, config.clone()),
|
||||||
|
QueryKind::exact("hell".to_string())
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_eq!(
|
||||||
|
typos("verylongword".to_string(), true, config.clone()),
|
||||||
|
QueryKind::Tolerant { typo: 2, word: "verylongword".to_string() }
|
||||||
|
);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -14,7 +14,7 @@ use crate::update::index_documents::IndexDocumentsMethod;
|
|||||||
use crate::update::{ClearDocuments, IndexDocuments, UpdateIndexingStep};
|
use crate::update::{ClearDocuments, IndexDocuments, UpdateIndexingStep};
|
||||||
use crate::{FieldsIdsMap, Index, Result};
|
use crate::{FieldsIdsMap, Index, Result};
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq)]
|
#[derive(Debug, Clone, PartialEq, Copy)]
|
||||||
pub enum Setting<T> {
|
pub enum Setting<T> {
|
||||||
Set(T),
|
Set(T),
|
||||||
Reset,
|
Reset,
|
||||||
@ -90,6 +90,8 @@ pub struct Settings<'a, 't, 'u, 'i> {
|
|||||||
synonyms: Setting<HashMap<String, Vec<String>>>,
|
synonyms: Setting<HashMap<String, Vec<String>>>,
|
||||||
primary_key: Setting<String>,
|
primary_key: Setting<String>,
|
||||||
authorize_typos: Setting<bool>,
|
authorize_typos: Setting<bool>,
|
||||||
|
min_word_len_two_typos: Setting<u8>,
|
||||||
|
min_word_len_one_typo: Setting<u8>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||||
@ -112,6 +114,8 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
primary_key: Setting::NotSet,
|
primary_key: Setting::NotSet,
|
||||||
authorize_typos: Setting::NotSet,
|
authorize_typos: Setting::NotSet,
|
||||||
indexer_config,
|
indexer_config,
|
||||||
|
min_word_len_two_typos: Setting::Reset,
|
||||||
|
min_word_len_one_typo: Setting::Reset,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -196,6 +200,22 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
self.authorize_typos = Setting::Reset;
|
self.authorize_typos = Setting::Reset;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn set_min_word_len_two_typos(&mut self, val: u8) {
|
||||||
|
self.min_word_len_two_typos = Setting::Set(val);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn reset_min_word_len_two_typos(&mut self) {
|
||||||
|
self.min_word_len_two_typos = Setting::Reset;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn set_min_word_len_one_typo(&mut self, val: u8) {
|
||||||
|
self.min_word_len_one_typo = Setting::Set(val);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn reset_min_word_len_one_typo(&mut self) {
|
||||||
|
self.min_word_len_one_typo = Setting::Reset;
|
||||||
|
}
|
||||||
|
|
||||||
fn reindex<F>(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> Result<()>
|
fn reindex<F>(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> Result<()>
|
||||||
where
|
where
|
||||||
F: Fn(UpdateIndexingStep) + Sync,
|
F: Fn(UpdateIndexingStep) + Sync,
|
||||||
@ -474,6 +494,38 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn update_min_typo_word_len(&mut self) -> Result<()> {
|
||||||
|
match (self.min_word_len_one_typo, self.min_word_len_two_typos) {
|
||||||
|
(Setting::Set(one), Setting::Set(two)) => {
|
||||||
|
if one > two {
|
||||||
|
return Err(UserError::InvalidMinTypoWordLenSetting(one, two).into());
|
||||||
|
} else {
|
||||||
|
self.index.put_min_word_len_one_typo(&mut self.wtxn, one)?;
|
||||||
|
self.index.put_min_word_len_two_typos(&mut self.wtxn, two)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
(Setting::Set(one), _) => {
|
||||||
|
let two = self.index.min_word_len_two_typos(&self.wtxn)?;
|
||||||
|
if one > two {
|
||||||
|
return Err(UserError::InvalidMinTypoWordLenSetting(one, two).into());
|
||||||
|
} else {
|
||||||
|
self.index.put_min_word_len_one_typo(&mut self.wtxn, one)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
(_, Setting::Set(two)) => {
|
||||||
|
let one = self.index.min_word_len_one_typo(&self.wtxn)?;
|
||||||
|
if one > two {
|
||||||
|
return Err(UserError::InvalidMinTypoWordLenSetting(one, two).into());
|
||||||
|
} else {
|
||||||
|
self.index.put_min_word_len_two_typos(&mut self.wtxn, two)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => (),
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
pub fn execute<F>(mut self, progress_callback: F) -> Result<()>
|
pub fn execute<F>(mut self, progress_callback: F) -> Result<()>
|
||||||
where
|
where
|
||||||
F: Fn(UpdateIndexingStep) + Sync,
|
F: Fn(UpdateIndexingStep) + Sync,
|
||||||
@ -490,6 +542,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
self.update_criteria()?;
|
self.update_criteria()?;
|
||||||
self.update_primary_key()?;
|
self.update_primary_key()?;
|
||||||
self.update_authorize_typos()?;
|
self.update_authorize_typos()?;
|
||||||
|
self.update_min_typo_word_len()?;
|
||||||
|
|
||||||
// If there is new faceted fields we indicate that we must reindex as we must
|
// If there is new faceted fields we indicate that we must reindex as we must
|
||||||
// index new fields as facets. It means that the distinct attribute,
|
// index new fields as facets. It means that the distinct attribute,
|
||||||
@ -1233,4 +1286,37 @@ mod tests {
|
|||||||
builder.execute(|_| ()).unwrap();
|
builder.execute(|_| ()).unwrap();
|
||||||
assert!(!index.authorize_typos(&txn).unwrap());
|
assert!(!index.authorize_typos(&txn).unwrap());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn update_min_word_len_for_typo() {
|
||||||
|
let index = TempIndex::new();
|
||||||
|
let config = IndexerConfig::default();
|
||||||
|
|
||||||
|
// Set the genres setting
|
||||||
|
let mut txn = index.write_txn().unwrap();
|
||||||
|
let mut builder = Settings::new(&mut txn, &index, &config);
|
||||||
|
builder.set_min_word_len_one_typo(8);
|
||||||
|
builder.set_min_word_len_two_typos(8);
|
||||||
|
builder.execute(|_| ()).unwrap();
|
||||||
|
|
||||||
|
txn.commit().unwrap();
|
||||||
|
|
||||||
|
let txn = index.read_txn().unwrap();
|
||||||
|
|
||||||
|
assert_eq!(index.min_word_len_one_typo(&txn).unwrap(), 8);
|
||||||
|
assert_eq!(index.min_word_len_two_typos(&txn).unwrap(), 8);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn update_invalid_min_word_len_for_typo() {
|
||||||
|
let index = TempIndex::new();
|
||||||
|
let config = IndexerConfig::default();
|
||||||
|
|
||||||
|
// Set the genres setting
|
||||||
|
let mut txn = index.write_txn().unwrap();
|
||||||
|
let mut builder = Settings::new(&mut txn, &index, &config);
|
||||||
|
builder.set_min_word_len_one_typo(10);
|
||||||
|
builder.set_min_word_len_two_typos(7);
|
||||||
|
assert!(builder.execute(|_| ()).is_err());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -16,6 +16,7 @@ mod distinct;
|
|||||||
mod filters;
|
mod filters;
|
||||||
mod query_criteria;
|
mod query_criteria;
|
||||||
mod sort;
|
mod sort;
|
||||||
|
mod typo_tolerance;
|
||||||
|
|
||||||
pub const TEST_QUERY: &'static str = "hello world america";
|
pub const TEST_QUERY: &'static str = "hello world america";
|
||||||
|
|
||||||
|
95
milli/tests/search/typo_tolerance.rs
Normal file
95
milli/tests/search/typo_tolerance.rs
Normal file
@ -0,0 +1,95 @@
|
|||||||
|
use milli::update::{IndexerConfig, Settings};
|
||||||
|
use milli::{Criterion, Search};
|
||||||
|
use Criterion::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_typo_tolerance_one_typo() {
|
||||||
|
let criteria = [Typo];
|
||||||
|
let index = super::setup_search_index_with_criteria(&criteria);
|
||||||
|
|
||||||
|
// basic typo search with default typo settings
|
||||||
|
{
|
||||||
|
let txn = index.read_txn().unwrap();
|
||||||
|
|
||||||
|
let mut search = Search::new(&txn, &index);
|
||||||
|
search.query("zeal");
|
||||||
|
search.limit(10);
|
||||||
|
search.authorize_typos(true);
|
||||||
|
search.optional_words(true);
|
||||||
|
|
||||||
|
let result = search.execute().unwrap();
|
||||||
|
assert_eq!(result.documents_ids.len(), 1);
|
||||||
|
|
||||||
|
let mut search = Search::new(&txn, &index);
|
||||||
|
search.query("zean");
|
||||||
|
search.limit(10);
|
||||||
|
search.authorize_typos(true);
|
||||||
|
search.optional_words(true);
|
||||||
|
|
||||||
|
let result = search.execute().unwrap();
|
||||||
|
assert_eq!(result.documents_ids.len(), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut txn = index.write_txn().unwrap();
|
||||||
|
|
||||||
|
let config = IndexerConfig::default();
|
||||||
|
let mut builder = Settings::new(&mut txn, &index, &config);
|
||||||
|
builder.set_min_word_len_one_typo(4);
|
||||||
|
builder.execute(|_| ()).unwrap();
|
||||||
|
|
||||||
|
// typo is now supported for 4 letters words
|
||||||
|
let mut search = Search::new(&txn, &index);
|
||||||
|
search.query("zean");
|
||||||
|
search.limit(10);
|
||||||
|
search.authorize_typos(true);
|
||||||
|
search.optional_words(true);
|
||||||
|
|
||||||
|
let result = search.execute().unwrap();
|
||||||
|
assert_eq!(result.documents_ids.len(), 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_typo_tolerance_two_typo() {
|
||||||
|
let criteria = [Typo];
|
||||||
|
let index = super::setup_search_index_with_criteria(&criteria);
|
||||||
|
|
||||||
|
// basic typo search with default typo settings
|
||||||
|
{
|
||||||
|
let txn = index.read_txn().unwrap();
|
||||||
|
|
||||||
|
let mut search = Search::new(&txn, &index);
|
||||||
|
search.query("zealand");
|
||||||
|
search.limit(10);
|
||||||
|
search.authorize_typos(true);
|
||||||
|
search.optional_words(true);
|
||||||
|
|
||||||
|
let result = search.execute().unwrap();
|
||||||
|
assert_eq!(result.documents_ids.len(), 1);
|
||||||
|
|
||||||
|
let mut search = Search::new(&txn, &index);
|
||||||
|
search.query("zealemd");
|
||||||
|
search.limit(10);
|
||||||
|
search.authorize_typos(true);
|
||||||
|
search.optional_words(true);
|
||||||
|
|
||||||
|
let result = search.execute().unwrap();
|
||||||
|
assert_eq!(result.documents_ids.len(), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut txn = index.write_txn().unwrap();
|
||||||
|
|
||||||
|
let config = IndexerConfig::default();
|
||||||
|
let mut builder = Settings::new(&mut txn, &index, &config);
|
||||||
|
builder.set_min_word_len_two_typos(7);
|
||||||
|
builder.execute(|_| ()).unwrap();
|
||||||
|
|
||||||
|
// typo is now supported for 4 letters words
|
||||||
|
let mut search = Search::new(&txn, &index);
|
||||||
|
search.query("zealemd");
|
||||||
|
search.limit(10);
|
||||||
|
search.authorize_typos(true);
|
||||||
|
search.optional_words(true);
|
||||||
|
|
||||||
|
let result = search.execute().unwrap();
|
||||||
|
assert_eq!(result.documents_ids.len(), 1);
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user