Be able to set and reset settings

This commit is contained in:
ManyTheFish 2023-07-24 17:00:18 +02:00
parent 0597a97c84
commit d8d12d5979
10 changed files with 1088 additions and 140 deletions

View file

@ -1,5 +1,5 @@
use std::borrow::Cow;
use std::collections::{HashMap, HashSet};
use std::collections::{BTreeSet, HashMap, HashSet};
use std::fs::File;
use std::mem::size_of;
use std::path::Path;
@ -60,6 +60,9 @@ pub mod main_key {
pub const USER_DEFINED_SEARCHABLE_FIELDS_KEY: &str = "user-defined-searchable-fields";
pub const SOFT_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "soft-external-documents-ids";
pub const STOP_WORDS_KEY: &str = "stop-words";
pub const NON_SEPARATOR_TOKENS_KEY: &str = "non-separator-tokens";
pub const SEPARATOR_TOKENS_KEY: &str = "separator-tokens";
pub const DICTIONARY_KEY: &str = "dictionary";
pub const STRING_FACETED_DOCUMENTS_IDS_PREFIX: &str = "string-faceted-documents-ids";
pub const SYNONYMS_KEY: &str = "synonyms";
pub const WORDS_FST_KEY: &str = "words-fst";
@ -1048,6 +1051,90 @@ impl Index {
}
}
/* non separator tokens */
pub(crate) fn put_non_separator_tokens(
&self,
wtxn: &mut RwTxn,
set: &BTreeSet<String>,
) -> heed::Result<()> {
self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::NON_SEPARATOR_TOKENS_KEY, set)
}
pub(crate) fn delete_non_separator_tokens(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, main_key::NON_SEPARATOR_TOKENS_KEY)
}
pub fn non_separator_tokens<'t>(&self, rtxn: &'t RoTxn) -> Result<Option<BTreeSet<String>>> {
Ok(self.main.get::<_, Str, SerdeBincode<BTreeSet<String>>>(
rtxn,
main_key::NON_SEPARATOR_TOKENS_KEY,
)?)
}
/* separator tokens */
pub(crate) fn put_separator_tokens(
&self,
wtxn: &mut RwTxn,
set: &BTreeSet<String>,
) -> heed::Result<()> {
self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::SEPARATOR_TOKENS_KEY, set)
}
pub(crate) fn delete_separator_tokens(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, main_key::SEPARATOR_TOKENS_KEY)
}
pub fn separator_tokens<'t>(&self, rtxn: &'t RoTxn) -> Result<Option<BTreeSet<String>>> {
Ok(self
.main
.get::<_, Str, SerdeBincode<BTreeSet<String>>>(rtxn, main_key::SEPARATOR_TOKENS_KEY)?)
}
/* separators easing method */
pub(crate) fn allowed_separators<'t>(
&self,
rtxn: &'t RoTxn,
) -> Result<Option<BTreeSet<String>>> {
let default_separators =
charabia::separators::DEFAULT_SEPARATORS.iter().map(|s| s.to_string());
let mut separators: Option<BTreeSet<_>> = None;
if let Some(mut separator_tokens) = self.separator_tokens(rtxn)? {
separator_tokens.extend(default_separators.clone());
separators = Some(separator_tokens);
}
if let Some(non_separator_tokens) = self.non_separator_tokens(rtxn)? {
separators = separators
.or_else(|| Some(default_separators.collect()))
.map(|separators| &separators - &non_separator_tokens);
}
Ok(separators)
}
/* dictionary */
pub(crate) fn put_dictionary(
&self,
wtxn: &mut RwTxn,
set: &BTreeSet<String>,
) -> heed::Result<()> {
self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::DICTIONARY_KEY, set)
}
pub(crate) fn delete_dictionary(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(wtxn, main_key::DICTIONARY_KEY)
}
pub fn dictionary<'t>(&self, rtxn: &'t RoTxn) -> Result<Option<BTreeSet<String>>> {
Ok(self
.main
.get::<_, Str, SerdeBincode<BTreeSet<String>>>(rtxn, main_key::DICTIONARY_KEY)?)
}
/* synonyms */
pub(crate) fn put_synonyms(

View file

@ -112,6 +112,9 @@ pub struct Settings<'a, 't, 'u, 'i> {
sortable_fields: Setting<HashSet<String>>,
criteria: Setting<Vec<Criterion>>,
stop_words: Setting<BTreeSet<String>>,
non_separator_tokens: Setting<BTreeSet<String>>,
separator_tokens: Setting<BTreeSet<String>>,
dictionary: Setting<BTreeSet<String>>,
distinct_field: Setting<String>,
synonyms: Setting<HashMap<String, Vec<String>>>,
primary_key: Setting<String>,
@ -141,6 +144,9 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
sortable_fields: Setting::NotSet,
criteria: Setting::NotSet,
stop_words: Setting::NotSet,
non_separator_tokens: Setting::NotSet,
separator_tokens: Setting::NotSet,
dictionary: Setting::NotSet,
distinct_field: Setting::NotSet,
synonyms: Setting::NotSet,
primary_key: Setting::NotSet,
@ -205,6 +211,39 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
if stop_words.is_empty() { Setting::Reset } else { Setting::Set(stop_words) }
}
pub fn reset_non_separator_tokens(&mut self) {
self.non_separator_tokens = Setting::Reset;
}
pub fn set_non_separator_tokens(&mut self, non_separator_tokens: BTreeSet<String>) {
self.non_separator_tokens = if non_separator_tokens.is_empty() {
Setting::Reset
} else {
Setting::Set(non_separator_tokens)
}
}
pub fn reset_separator_tokens(&mut self) {
self.separator_tokens = Setting::Reset;
}
pub fn set_separator_tokens(&mut self, separator_tokens: BTreeSet<String>) {
self.separator_tokens = if separator_tokens.is_empty() {
Setting::Reset
} else {
Setting::Set(separator_tokens)
}
}
pub fn reset_dictionary(&mut self) {
self.dictionary = Setting::Reset;
}
pub fn set_dictionary(&mut self, dictionary: BTreeSet<String>) {
self.dictionary =
if dictionary.is_empty() { Setting::Reset } else { Setting::Set(dictionary) }
}
pub fn reset_distinct_field(&mut self) {
self.distinct_field = Setting::Reset;
}
@ -451,6 +490,60 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
}
}
fn update_non_separator_tokens(&mut self) -> Result<bool> {
match self.non_separator_tokens {
Setting::Set(ref non_separator_tokens) => {
let current = self.index.non_separator_tokens(self.wtxn)?;
// Does the new list differ from the previous one?
if current.map_or(true, |current| &current != non_separator_tokens) {
self.index.put_non_separator_tokens(self.wtxn, &non_separator_tokens)?;
Ok(true)
} else {
Ok(false)
}
}
Setting::Reset => Ok(self.index.delete_non_separator_tokens(self.wtxn)?),
Setting::NotSet => Ok(false),
}
}
fn update_separator_tokens(&mut self) -> Result<bool> {
match self.separator_tokens {
Setting::Set(ref separator_tokens) => {
let current = self.index.separator_tokens(self.wtxn)?;
// Does the new list differ from the previous one?
if current.map_or(true, |current| &current != separator_tokens) {
self.index.put_separator_tokens(self.wtxn, &separator_tokens)?;
Ok(true)
} else {
Ok(false)
}
}
Setting::Reset => Ok(self.index.delete_separator_tokens(self.wtxn)?),
Setting::NotSet => Ok(false),
}
}
fn update_dictionary(&mut self) -> Result<bool> {
match self.dictionary {
Setting::Set(ref dictionary) => {
let current = self.index.dictionary(self.wtxn)?;
// Does the new list differ from the previous one?
if current.map_or(true, |current| &current != dictionary) {
self.index.put_dictionary(self.wtxn, &dictionary)?;
Ok(true)
} else {
Ok(false)
}
}
Setting::Reset => Ok(self.index.delete_dictionary(self.wtxn)?),
Setting::NotSet => Ok(false),
}
}
fn update_synonyms(&mut self) -> Result<bool> {
match self.synonyms {
Setting::Set(ref synonyms) => {
@ -756,11 +849,17 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
let faceted_updated = old_faceted_fields != new_faceted_fields;
let stop_words_updated = self.update_stop_words()?;
let non_separator_tokens_updated = self.update_non_separator_tokens()?;
let separator_tokens_updated = self.update_separator_tokens()?;
let dictionary_updated = self.update_dictionary()?;
let synonyms_updated = self.update_synonyms()?;
let searchable_updated = self.update_searchable()?;
let exact_attributes_updated = self.update_exact_attributes()?;
if stop_words_updated
|| non_separator_tokens_updated
|| separator_tokens_updated
|| dictionary_updated
|| faceted_updated
|| synonyms_updated
|| searchable_updated
@ -1539,6 +1638,9 @@ mod tests {
sortable_fields,
criteria,
stop_words,
non_separator_tokens,
separator_tokens,
dictionary,
distinct_field,
synonyms,
primary_key,
@ -1557,6 +1659,9 @@ mod tests {
assert!(matches!(sortable_fields, Setting::NotSet));
assert!(matches!(criteria, Setting::NotSet));
assert!(matches!(stop_words, Setting::NotSet));
assert!(matches!(non_separator_tokens, Setting::NotSet));
assert!(matches!(separator_tokens, Setting::NotSet));
assert!(matches!(dictionary, Setting::NotSet));
assert!(matches!(distinct_field, Setting::NotSet));
assert!(matches!(synonyms, Setting::NotSet));
assert!(matches!(primary_key, Setting::NotSet));