mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-30 08:44:27 +01:00
Merge #469
469: add authorize typo setting r=Kerollmops a=MarinPostma This PR adds support for an authorize typo settings. This makes is possible to disable typos for a whole index. Typos are enabled by default. Co-authored-by: ad hoc <postma.marin@protonmail.com>
This commit is contained in:
commit
d2d930dd3f
@ -46,6 +46,7 @@ pub mod main_key {
|
|||||||
pub const WORDS_PREFIXES_FST_KEY: &str = "words-prefixes-fst";
|
pub const WORDS_PREFIXES_FST_KEY: &str = "words-prefixes-fst";
|
||||||
pub const CREATED_AT_KEY: &str = "created-at";
|
pub const CREATED_AT_KEY: &str = "created-at";
|
||||||
pub const UPDATED_AT_KEY: &str = "updated-at";
|
pub const UPDATED_AT_KEY: &str = "updated-at";
|
||||||
|
pub const AUTHORIZE_TYPOS: &str = "authorize-typos";
|
||||||
}
|
}
|
||||||
|
|
||||||
pub mod db_name {
|
pub mod db_name {
|
||||||
@ -866,6 +867,25 @@ impl Index {
|
|||||||
) -> heed::Result<()> {
|
) -> heed::Result<()> {
|
||||||
self.main.put::<_, Str, SerdeJson<OffsetDateTime>>(wtxn, main_key::UPDATED_AT_KEY, &time)
|
self.main.put::<_, Str, SerdeJson<OffsetDateTime>>(wtxn, main_key::UPDATED_AT_KEY, &time)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn authorize_typos(&self, txn: &RoTxn) -> heed::Result<bool> {
|
||||||
|
// It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We
|
||||||
|
// identify 0 as being false, and anything else as true. The absence of a value is true,
|
||||||
|
// because by default, we authorize typos.
|
||||||
|
match self.main.get::<_, Str, OwnedType<u8>>(txn, main_key::AUTHORIZE_TYPOS)? {
|
||||||
|
Some(0) => Ok(false),
|
||||||
|
_ => Ok(true),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn put_authorize_typos(&self, txn: &mut RwTxn, flag: bool) -> heed::Result<()> {
|
||||||
|
// It is not possible to put a bool in heed with OwnedType, so we put a u8 instead. We
|
||||||
|
// identify 0 as being false, and anything else as true. The absence of a value is true,
|
||||||
|
// because by default, we authorize typos.
|
||||||
|
self.main.put::<_, Str, OwnedType<u8>>(txn, main_key::AUTHORIZE_TYPOS, &(flag as u8))?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
@ -989,4 +1009,18 @@ pub(crate) mod tests {
|
|||||||
}
|
}
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn put_and_retrieve_disable_typo() {
|
||||||
|
let index = TempIndex::new();
|
||||||
|
let mut txn = index.write_txn().unwrap();
|
||||||
|
// default value is true
|
||||||
|
assert!(index.authorize_typos(&txn).unwrap());
|
||||||
|
// set to false
|
||||||
|
index.put_authorize_typos(&mut txn, false).unwrap();
|
||||||
|
txn.commit().unwrap();
|
||||||
|
|
||||||
|
let txn = index.read_txn().unwrap();
|
||||||
|
assert!(!index.authorize_typos(&txn).unwrap());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -105,6 +105,12 @@ impl<'a> Search<'a> {
|
|||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn is_typo_authorized(&self) -> Result<bool> {
|
||||||
|
let index_authorizes_typos = self.index.authorize_typos(self.rtxn)?;
|
||||||
|
// only authorize typos if both the index and the query allow it.
|
||||||
|
Ok(self.authorize_typos && index_authorizes_typos)
|
||||||
|
}
|
||||||
|
|
||||||
pub fn execute(&self) -> Result<SearchResult> {
|
pub fn execute(&self) -> Result<SearchResult> {
|
||||||
// We create the query tree by spliting the query into tokens.
|
// We create the query tree by spliting the query into tokens.
|
||||||
let before = Instant::now();
|
let before = Instant::now();
|
||||||
@ -112,7 +118,9 @@ impl<'a> Search<'a> {
|
|||||||
Some(query) => {
|
Some(query) => {
|
||||||
let mut builder = QueryTreeBuilder::new(self.rtxn, self.index);
|
let mut builder = QueryTreeBuilder::new(self.rtxn, self.index);
|
||||||
builder.optional_words(self.optional_words);
|
builder.optional_words(self.optional_words);
|
||||||
builder.authorize_typos(self.authorize_typos);
|
|
||||||
|
builder.authorize_typos(self.is_typo_authorized()?);
|
||||||
|
|
||||||
builder.words_limit(self.words_limit);
|
builder.words_limit(self.words_limit);
|
||||||
// We make sure that the analyzer is aware of the stop words
|
// We make sure that the analyzer is aware of the stop words
|
||||||
// this ensures that the query builder is able to properly remove them.
|
// this ensures that the query builder is able to properly remove them.
|
||||||
@ -360,3 +368,34 @@ pub fn build_dfa(word: &str, typos: u8, is_prefix: bool) -> DFA {
|
|||||||
lev.build_dfa(word)
|
lev.build_dfa(word)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test {
|
||||||
|
use super::*;
|
||||||
|
use crate::index::tests::TempIndex;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_is_authorized_typos() {
|
||||||
|
let index = TempIndex::new();
|
||||||
|
let mut txn = index.write_txn().unwrap();
|
||||||
|
|
||||||
|
let mut search = Search::new(&txn, &index);
|
||||||
|
|
||||||
|
// default is authorized
|
||||||
|
assert!(search.is_typo_authorized().unwrap());
|
||||||
|
|
||||||
|
search.authorize_typos(false);
|
||||||
|
assert!(!search.is_typo_authorized().unwrap());
|
||||||
|
|
||||||
|
index.put_authorize_typos(&mut txn, false).unwrap();
|
||||||
|
txn.commit().unwrap();
|
||||||
|
|
||||||
|
let txn = index.read_txn().unwrap();
|
||||||
|
let mut search = Search::new(&txn, &index);
|
||||||
|
|
||||||
|
assert!(!search.is_typo_authorized().unwrap());
|
||||||
|
|
||||||
|
search.authorize_typos(true);
|
||||||
|
assert!(!search.is_typo_authorized().unwrap());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -191,7 +191,6 @@ impl<'a> QueryTreeBuilder<'a> {
|
|||||||
/// generated forcing all query words to be present in each matching documents
|
/// generated forcing all query words to be present in each matching documents
|
||||||
/// (the criterion `words` will be ignored).
|
/// (the criterion `words` will be ignored).
|
||||||
/// default value if not called: `true`
|
/// default value if not called: `true`
|
||||||
#[allow(unused)]
|
|
||||||
pub fn optional_words(&mut self, optional_words: bool) -> &mut Self {
|
pub fn optional_words(&mut self, optional_words: bool) -> &mut Self {
|
||||||
self.optional_words = optional_words;
|
self.optional_words = optional_words;
|
||||||
self
|
self
|
||||||
@ -201,7 +200,6 @@ impl<'a> QueryTreeBuilder<'a> {
|
|||||||
/// forcing all query words to match documents without any typo
|
/// forcing all query words to match documents without any typo
|
||||||
/// (the criterion `typo` will be ignored).
|
/// (the criterion `typo` will be ignored).
|
||||||
/// default value if not called: `true`
|
/// default value if not called: `true`
|
||||||
#[allow(unused)]
|
|
||||||
pub fn authorize_typos(&mut self, authorize_typos: bool) -> &mut Self {
|
pub fn authorize_typos(&mut self, authorize_typos: bool) -> &mut Self {
|
||||||
self.authorize_typos = authorize_typos;
|
self.authorize_typos = authorize_typos;
|
||||||
self
|
self
|
||||||
|
@ -89,6 +89,7 @@ pub struct Settings<'a, 't, 'u, 'i> {
|
|||||||
distinct_field: Setting<String>,
|
distinct_field: Setting<String>,
|
||||||
synonyms: Setting<HashMap<String, Vec<String>>>,
|
synonyms: Setting<HashMap<String, Vec<String>>>,
|
||||||
primary_key: Setting<String>,
|
primary_key: Setting<String>,
|
||||||
|
authorize_typos: Setting<bool>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
||||||
@ -109,6 +110,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
distinct_field: Setting::NotSet,
|
distinct_field: Setting::NotSet,
|
||||||
synonyms: Setting::NotSet,
|
synonyms: Setting::NotSet,
|
||||||
primary_key: Setting::NotSet,
|
primary_key: Setting::NotSet,
|
||||||
|
authorize_typos: Setting::NotSet,
|
||||||
indexer_config,
|
indexer_config,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -186,6 +188,14 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
self.primary_key = Setting::Set(primary_key);
|
self.primary_key = Setting::Set(primary_key);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn set_autorize_typos(&mut self, val: bool) {
|
||||||
|
self.authorize_typos = Setting::Set(val);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn reset_authorize_typos(&mut self) {
|
||||||
|
self.authorize_typos = Setting::Reset;
|
||||||
|
}
|
||||||
|
|
||||||
fn reindex<F>(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> Result<()>
|
fn reindex<F>(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> Result<()>
|
||||||
where
|
where
|
||||||
F: Fn(UpdateIndexingStep) + Sync,
|
F: Fn(UpdateIndexingStep) + Sync,
|
||||||
@ -450,6 +460,20 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn update_authorize_typos(&mut self) -> Result<()> {
|
||||||
|
match self.authorize_typos {
|
||||||
|
Setting::Set(flag) => {
|
||||||
|
self.index.put_authorize_typos(self.wtxn, flag)?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
Setting::Reset => {
|
||||||
|
self.index.put_authorize_typos(self.wtxn, true)?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
Setting::NotSet => Ok(()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub fn execute<F>(mut self, progress_callback: F) -> Result<()>
|
pub fn execute<F>(mut self, progress_callback: F) -> Result<()>
|
||||||
where
|
where
|
||||||
F: Fn(UpdateIndexingStep) + Sync,
|
F: Fn(UpdateIndexingStep) + Sync,
|
||||||
@ -465,6 +489,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
self.update_distinct_field()?;
|
self.update_distinct_field()?;
|
||||||
self.update_criteria()?;
|
self.update_criteria()?;
|
||||||
self.update_primary_key()?;
|
self.update_primary_key()?;
|
||||||
|
self.update_authorize_typos()?;
|
||||||
|
|
||||||
// If there is new faceted fields we indicate that we must reindex as we must
|
// If there is new faceted fields we indicate that we must reindex as we must
|
||||||
// index new fields as facets. It means that the distinct attribute,
|
// index new fields as facets. It means that the distinct attribute,
|
||||||
@ -493,6 +518,7 @@ mod tests {
|
|||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::error::Error;
|
use crate::error::Error;
|
||||||
|
use crate::index::tests::TempIndex;
|
||||||
use crate::update::IndexDocuments;
|
use crate::update::IndexDocuments;
|
||||||
use crate::{Criterion, Filter, SearchResult};
|
use crate::{Criterion, Filter, SearchResult};
|
||||||
|
|
||||||
@ -1193,4 +1219,18 @@ mod tests {
|
|||||||
let line = std::str::from_utf8(content.get(fid).unwrap()).unwrap();
|
let line = std::str::from_utf8(content.get(fid).unwrap()).unwrap();
|
||||||
assert_eq!(line, r#""Star Wars""#);
|
assert_eq!(line, r#""Star Wars""#);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_disable_typo() {
|
||||||
|
let index = TempIndex::new();
|
||||||
|
|
||||||
|
let mut txn = index.write_txn().unwrap();
|
||||||
|
let config = IndexerConfig::default();
|
||||||
|
|
||||||
|
assert!(index.authorize_typos(&txn).unwrap());
|
||||||
|
let mut builder = Settings::new(&mut txn, &index, &config);
|
||||||
|
builder.set_autorize_typos(false);
|
||||||
|
builder.execute(|_| ()).unwrap();
|
||||||
|
assert!(!index.authorize_typos(&txn).unwrap());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user