966 lines
35 KiB
Rust
Raw Normal View History

use std::collections::{BTreeMap, BTreeSet};
2023-01-11 12:33:56 +01:00
use std::convert::Infallible;
use std::fmt;
use std::marker::PhantomData;
use std::num::NonZeroUsize;
use std::ops::{ControlFlow, Deref};
2023-01-11 12:33:56 +01:00
use std::str::FromStr;
2023-02-13 18:45:13 +01:00
use deserr::{DeserializeError, Deserr, ErrorKind, MergeWithError, ValuePointerRef};
2022-10-13 15:02:59 +02:00
use fst::IntoStreamer;
use milli::index::IndexEmbeddingConfig;
use milli::proximity::ProximityPrecision;
2023-01-11 12:33:56 +01:00
use milli::update::Setting;
use milli::{Criterion, CriterionError, Index, DEFAULT_VALUES_PER_FACET};
use serde::{Deserialize, Serialize, Serializer};
use crate::deserr::DeserrJsonError;
2023-01-11 17:10:32 +01:00
use crate::error::deserr_codes::*;
use crate::facet_values_sort::FacetValuesSort;
use crate::locales::LocalizedAttributesRuleView;
2023-06-28 14:23:03 +02:00
/// The maximum number of results that the engine
2022-10-13 15:02:59 +02:00
/// will be able to return in one search call.
pub const DEFAULT_PAGINATION_MAX_TOTAL_HITS: usize = 1000;
fn serialize_with_wildcard<S>(
field: &Setting<Vec<String>>,
s: S,
) -> std::result::Result<S::Ok, S::Error>
where
S: Serializer,
{
let wildcard = vec!["*".to_string()];
match field {
Setting::Set(value) => Some(value),
Setting::Reset => Some(&wildcard),
Setting::NotSet => None,
}
.serialize(s)
}
#[derive(Clone, Default, Debug, Serialize, PartialEq, Eq)]
pub struct Checked;
#[derive(Clone, Default, Debug, Serialize, Deserialize, PartialEq, Eq)]
pub struct Unchecked;
2023-02-13 18:45:13 +01:00
impl<E> Deserr<E> for Unchecked
where
E: DeserializeError,
{
fn deserialize_from_value<V: deserr::IntoValue>(
_value: deserr::Value<V>,
_location: deserr::ValuePointerRef,
) -> Result<Self, E> {
unreachable!()
}
}
2023-01-11 12:33:56 +01:00
fn validate_min_word_size_for_typo_setting<E: DeserializeError>(
s: MinWordSizeTyposSetting,
location: ValuePointerRef,
) -> Result<MinWordSizeTyposSetting, E> {
if let (Setting::Set(one), Setting::Set(two)) = (s.one_typo, s.two_typos) {
if one > two {
return Err(deserr::take_cf_content(E::error::<Infallible>(None, ErrorKind::Unexpected { msg: format!("`minWordSizeForTypos` setting is invalid. `oneTypo` and `twoTypos` fields should be between `0` and `255`, and `twoTypos` should be greater or equals to `oneTypo` but found `oneTypo: {one}` and twoTypos: {two}`.") }, location)));
2023-01-11 12:33:56 +01:00
}
}
Ok(s)
}
2023-02-13 18:45:13 +01:00
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)]
2023-01-11 12:33:56 +01:00
#[serde(deny_unknown_fields, rename_all = "camelCase")]
#[deserr(deny_unknown_fields, rename_all = camelCase, validate = validate_min_word_size_for_typo_setting -> DeserrJsonError<InvalidSettingsTypoTolerance>)]
pub struct MinWordSizeTyposSetting {
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default)]
pub one_typo: Setting<u8>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default)]
pub two_typos: Setting<u8>,
}
2023-02-13 18:45:13 +01:00
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)]
2023-01-11 12:33:56 +01:00
#[serde(deny_unknown_fields, rename_all = "camelCase")]
#[deserr(deny_unknown_fields, rename_all = camelCase, where_predicate = __Deserr_E: deserr::MergeWithError<DeserrJsonError<InvalidSettingsTypoTolerance>>)]
pub struct TypoSettings {
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default)]
pub enabled: Setting<bool>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsTypoTolerance>)]
pub min_word_size_for_typos: Setting<MinWordSizeTyposSetting>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default)]
pub disable_on_words: Setting<BTreeSet<String>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default)]
pub disable_on_attributes: Setting<BTreeSet<String>>,
}
2023-02-13 18:45:13 +01:00
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)]
2023-01-11 12:33:56 +01:00
#[serde(deny_unknown_fields, rename_all = "camelCase")]
#[deserr(rename_all = camelCase, deny_unknown_fields)]
pub struct FacetingSettings {
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default)]
pub max_values_per_facet: Setting<usize>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default)]
pub sort_facet_values_by: Setting<BTreeMap<String, FacetValuesSort>>,
}
2023-02-13 18:45:13 +01:00
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)]
2023-01-11 12:33:56 +01:00
#[serde(deny_unknown_fields, rename_all = "camelCase")]
#[deserr(rename_all = camelCase, deny_unknown_fields)]
pub struct PaginationSettings {
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default)]
pub max_total_hits: Setting<usize>,
}
impl MergeWithError<milli::CriterionError> for DeserrJsonError<InvalidSettingsRankingRules> {
2023-01-11 12:33:56 +01:00
fn merge(
_self_: Option<Self>,
other: milli::CriterionError,
merge_location: ValuePointerRef,
2023-02-13 18:45:13 +01:00
) -> ControlFlow<Self, Self> {
2023-01-11 12:33:56 +01:00
Self::error::<Infallible>(
None,
ErrorKind::Unexpected { msg: other.to_string() },
merge_location,
)
}
}
/// Holds all the settings for an index. `T` can either be `Checked` if they represents settings
/// whose validity is guaranteed, or `Unchecked` if they need to be validated. In the later case, a
/// call to `check` will return a `Settings<Checked>` from a `Settings<Unchecked>`.
2023-02-13 18:45:13 +01:00
#[derive(Debug, Clone, Default, Serialize, Deserialize, PartialEq, Eq, Deserr)]
2023-01-11 12:33:56 +01:00
#[serde(
deny_unknown_fields,
rename_all = "camelCase",
bound(serialize = "T: Serialize", deserialize = "T: Deserialize<'static>")
)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
pub struct Settings<T> {
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsDisplayedAttributes>)]
pub displayed_attributes: WildcardSetting,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsSearchableAttributes>)]
pub searchable_attributes: WildcardSetting,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsFilterableAttributes>)]
pub filterable_attributes: Setting<BTreeSet<String>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsSortableAttributes>)]
pub sortable_attributes: Setting<BTreeSet<String>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsRankingRules>)]
2023-01-11 12:33:56 +01:00
pub ranking_rules: Setting<Vec<RankingRuleView>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsStopWords>)]
pub stop_words: Setting<BTreeSet<String>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
2023-07-24 17:00:18 +02:00
#[deserr(default, error = DeserrJsonError<InvalidSettingsNonSeparatorTokens>)]
pub non_separator_tokens: Setting<BTreeSet<String>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsSeparatorTokens>)]
pub separator_tokens: Setting<BTreeSet<String>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsDictionary>)]
pub dictionary: Setting<BTreeSet<String>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsSynonyms>)]
pub synonyms: Setting<BTreeMap<String, Vec<String>>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsDistinctAttribute>)]
pub distinct_attribute: Setting<String>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsProximityPrecision>)]
pub proximity_precision: Setting<ProximityPrecisionView>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsTypoTolerance>)]
pub typo_tolerance: Setting<TypoSettings>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsFaceting>)]
pub faceting: Setting<FacetingSettings>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsPagination>)]
pub pagination: Setting<PaginationSettings>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsEmbedders>)]
pub embedders: Setting<BTreeMap<String, Setting<milli::vector::settings::EmbeddingSettings>>>,
2024-03-11 18:24:21 +01:00
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
2024-03-19 11:14:28 +01:00
#[deserr(default, error = DeserrJsonError<InvalidSettingsSearchCutoffMs>)]
pub search_cutoff_ms: Setting<u64>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
#[deserr(default, error = DeserrJsonError<InvalidSettingsLocalizedAttributes>)]
pub localized_attributes: Setting<Vec<LocalizedAttributesRuleView>>,
#[serde(skip)]
2023-01-11 12:33:56 +01:00
#[deserr(skip)]
pub _kind: PhantomData<T>,
}
impl<T> Settings<T> {
pub fn hide_secrets(&mut self) {
let Setting::Set(embedders) = &mut self.embedders else {
return;
};
for mut embedder in embedders.values_mut() {
let Setting::Set(embedder) = &mut embedder else {
continue;
};
let Setting::Set(api_key) = &mut embedder.api_key else {
continue;
};
Self::hide_secret(api_key);
}
}
fn hide_secret(secret: &mut String) {
match secret.len() {
x if x < 10 => {
secret.replace_range(.., "XXX...");
}
x if x < 20 => {
secret.replace_range(2.., "XXXX...");
}
x if x < 30 => {
secret.replace_range(3.., "XXXXX...");
}
_x => {
secret.replace_range(5.., "XXXXXX...");
}
}
}
}
impl Settings<Checked> {
pub fn cleared() -> Settings<Checked> {
Settings {
displayed_attributes: Setting::Reset.into(),
searchable_attributes: Setting::Reset.into(),
filterable_attributes: Setting::Reset,
sortable_attributes: Setting::Reset,
ranking_rules: Setting::Reset,
stop_words: Setting::Reset,
synonyms: Setting::Reset,
2023-07-24 17:00:18 +02:00
non_separator_tokens: Setting::Reset,
separator_tokens: Setting::Reset,
dictionary: Setting::Reset,
distinct_attribute: Setting::Reset,
proximity_precision: Setting::Reset,
typo_tolerance: Setting::Reset,
faceting: Setting::Reset,
pagination: Setting::Reset,
embedders: Setting::Reset,
search_cutoff_ms: Setting::Reset,
localized_attributes: Setting::Reset,
_kind: PhantomData,
}
}
pub fn into_unchecked(self) -> Settings<Unchecked> {
let Self {
displayed_attributes,
searchable_attributes,
filterable_attributes,
sortable_attributes,
ranking_rules,
stop_words,
2023-07-24 17:00:18 +02:00
non_separator_tokens,
separator_tokens,
dictionary,
synonyms,
distinct_attribute,
proximity_precision,
typo_tolerance,
faceting,
pagination,
embedders,
search_cutoff_ms,
localized_attributes: localized_attributes_rules,
_kind,
} = self;
Settings {
displayed_attributes,
searchable_attributes,
filterable_attributes,
sortable_attributes,
ranking_rules,
stop_words,
2023-07-24 17:00:18 +02:00
non_separator_tokens,
separator_tokens,
dictionary,
synonyms,
distinct_attribute,
proximity_precision,
typo_tolerance,
faceting,
pagination,
embedders,
search_cutoff_ms,
localized_attributes: localized_attributes_rules,
_kind: PhantomData,
}
}
}
impl Settings<Unchecked> {
pub fn check(self) -> Settings<Checked> {
let displayed_attributes = match self.displayed_attributes.0 {
Setting::Set(fields) => {
if fields.iter().any(|f| f == "*") {
Setting::Reset
} else {
Setting::Set(fields)
}
}
otherwise => otherwise,
};
let searchable_attributes = match self.searchable_attributes.0 {
Setting::Set(fields) => {
if fields.iter().any(|f| f == "*") {
Setting::Reset
} else {
Setting::Set(fields)
}
}
otherwise => otherwise,
};
Settings {
displayed_attributes: displayed_attributes.into(),
searchable_attributes: searchable_attributes.into(),
filterable_attributes: self.filterable_attributes,
sortable_attributes: self.sortable_attributes,
ranking_rules: self.ranking_rules,
stop_words: self.stop_words,
synonyms: self.synonyms,
2023-07-24 17:00:18 +02:00
non_separator_tokens: self.non_separator_tokens,
separator_tokens: self.separator_tokens,
dictionary: self.dictionary,
distinct_attribute: self.distinct_attribute,
proximity_precision: self.proximity_precision,
typo_tolerance: self.typo_tolerance,
faceting: self.faceting,
pagination: self.pagination,
embedders: self.embedders,
search_cutoff_ms: self.search_cutoff_ms,
localized_attributes: self.localized_attributes,
_kind: PhantomData,
}
}
pub fn validate(self) -> Result<Self, milli::Error> {
self.validate_embedding_settings()
}
fn validate_embedding_settings(mut self) -> Result<Self, milli::Error> {
let Setting::Set(mut configs) = self.embedders else { return Ok(self) };
for (name, config) in configs.iter_mut() {
let config_to_check = std::mem::take(config);
let checked_config = milli::update::validate_embedding_settings(config_to_check, name)?;
*config = checked_config
}
self.embedders = Setting::Set(configs);
Ok(self)
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
#[serde(deny_unknown_fields)]
#[serde(rename_all = "camelCase")]
pub struct Facets {
pub level_group_size: Option<NonZeroUsize>,
pub min_level_size: Option<NonZeroUsize>,
}
pub fn apply_settings_to_builder(
settings: &Settings<Checked>,
builder: &mut milli::update::Settings,
) {
2024-03-11 18:24:21 +01:00
let Settings {
displayed_attributes,
searchable_attributes,
filterable_attributes,
sortable_attributes,
ranking_rules,
stop_words,
non_separator_tokens,
separator_tokens,
dictionary,
synonyms,
distinct_attribute,
proximity_precision,
typo_tolerance,
faceting,
pagination,
embedders,
search_cutoff_ms,
localized_attributes: localized_attributes_rules,
2024-03-11 18:24:21 +01:00
_kind,
} = settings;
match searchable_attributes.deref() {
Setting::Set(ref names) => builder.set_searchable_fields(names.clone()),
Setting::Reset => builder.reset_searchable_fields(),
Setting::NotSet => (),
}
match displayed_attributes.deref() {
Setting::Set(ref names) => builder.set_displayed_fields(names.clone()),
Setting::Reset => builder.reset_displayed_fields(),
Setting::NotSet => (),
}
2024-03-11 18:24:21 +01:00
match filterable_attributes {
Setting::Set(ref facets) => {
builder.set_filterable_fields(facets.clone().into_iter().collect())
}
Setting::Reset => builder.reset_filterable_fields(),
Setting::NotSet => (),
}
2024-03-11 18:24:21 +01:00
match sortable_attributes {
Setting::Set(ref fields) => builder.set_sortable_fields(fields.iter().cloned().collect()),
Setting::Reset => builder.reset_sortable_fields(),
Setting::NotSet => (),
}
2024-03-11 18:24:21 +01:00
match ranking_rules {
2023-01-11 12:33:56 +01:00
Setting::Set(ref criteria) => {
builder.set_criteria(criteria.iter().map(|c| c.clone().into()).collect())
}
Setting::Reset => builder.reset_criteria(),
Setting::NotSet => (),
}
2024-03-11 18:24:21 +01:00
match stop_words {
Setting::Set(ref stop_words) => builder.set_stop_words(stop_words.clone()),
Setting::Reset => builder.reset_stop_words(),
Setting::NotSet => (),
}
2024-03-11 18:24:21 +01:00
match non_separator_tokens {
2023-07-24 17:00:18 +02:00
Setting::Set(ref non_separator_tokens) => {
builder.set_non_separator_tokens(non_separator_tokens.clone())
}
Setting::Reset => builder.reset_non_separator_tokens(),
Setting::NotSet => (),
}
2024-03-11 18:24:21 +01:00
match separator_tokens {
2023-07-24 17:00:18 +02:00
Setting::Set(ref separator_tokens) => {
builder.set_separator_tokens(separator_tokens.clone())
}
Setting::Reset => builder.reset_separator_tokens(),
Setting::NotSet => (),
}
2024-03-11 18:24:21 +01:00
match dictionary {
2023-07-24 17:00:18 +02:00
Setting::Set(ref dictionary) => builder.set_dictionary(dictionary.clone()),
Setting::Reset => builder.reset_dictionary(),
Setting::NotSet => (),
}
2024-03-11 18:24:21 +01:00
match synonyms {
Setting::Set(ref synonyms) => builder.set_synonyms(synonyms.clone().into_iter().collect()),
Setting::Reset => builder.reset_synonyms(),
Setting::NotSet => (),
}
2024-03-11 18:24:21 +01:00
match distinct_attribute {
Setting::Set(ref attr) => builder.set_distinct_field(attr.clone()),
Setting::Reset => builder.reset_distinct_field(),
Setting::NotSet => (),
}
2024-03-11 18:24:21 +01:00
match proximity_precision {
Setting::Set(ref precision) => builder.set_proximity_precision((*precision).into()),
Setting::Reset => builder.reset_proximity_precision(),
Setting::NotSet => (),
}
match localized_attributes_rules {
Setting::Set(ref rules) => builder
.set_localized_attributes_rules(rules.iter().cloned().map(|r| r.into()).collect()),
Setting::Reset => builder.reset_localized_attributes_rules(),
Setting::NotSet => (),
}
2024-03-11 18:24:21 +01:00
match typo_tolerance {
Setting::Set(ref value) => {
match value.enabled {
Setting::Set(val) => builder.set_autorize_typos(val),
Setting::Reset => builder.reset_authorize_typos(),
Setting::NotSet => (),
}
match value.min_word_size_for_typos {
Setting::Set(ref setting) => {
match setting.one_typo {
Setting::Set(val) => builder.set_min_word_len_one_typo(val),
Setting::Reset => builder.reset_min_word_len_one_typo(),
Setting::NotSet => (),
}
match setting.two_typos {
Setting::Set(val) => builder.set_min_word_len_two_typos(val),
Setting::Reset => builder.reset_min_word_len_two_typos(),
Setting::NotSet => (),
}
}
Setting::Reset => {
builder.reset_min_word_len_one_typo();
builder.reset_min_word_len_two_typos();
}
Setting::NotSet => (),
}
match value.disable_on_words {
Setting::Set(ref words) => {
builder.set_exact_words(words.clone());
}
Setting::Reset => builder.reset_exact_words(),
Setting::NotSet => (),
}
match value.disable_on_attributes {
Setting::Set(ref words) => {
builder.set_exact_attributes(words.iter().cloned().collect())
}
Setting::Reset => builder.reset_exact_attributes(),
Setting::NotSet => (),
}
}
Setting::Reset => {
// all typo settings need to be reset here.
builder.reset_authorize_typos();
builder.reset_min_word_len_one_typo();
builder.reset_min_word_len_two_typos();
builder.reset_exact_words();
builder.reset_exact_attributes();
}
Setting::NotSet => (),
}
2024-03-11 18:24:21 +01:00
match faceting {
Setting::Set(FacetingSettings { max_values_per_facet, sort_facet_values_by }) => {
match max_values_per_facet {
Setting::Set(val) => builder.set_max_values_per_facet(*val),
Setting::Reset => builder.reset_max_values_per_facet(),
Setting::NotSet => (),
}
match sort_facet_values_by {
Setting::Set(val) => builder.set_sort_facet_values_by(
val.iter().map(|(name, order)| (name.clone(), (*order).into())).collect(),
),
Setting::Reset => builder.reset_sort_facet_values_by(),
Setting::NotSet => (),
}
}
Setting::Reset => {
builder.reset_max_values_per_facet();
builder.reset_sort_facet_values_by();
}
Setting::NotSet => (),
}
2024-03-11 18:24:21 +01:00
match pagination {
Setting::Set(ref value) => match value.max_total_hits {
Setting::Set(val) => builder.set_pagination_max_total_hits(val),
Setting::Reset => builder.reset_pagination_max_total_hits(),
Setting::NotSet => (),
},
Setting::Reset => builder.reset_pagination_max_total_hits(),
Setting::NotSet => (),
}
2024-03-11 18:24:21 +01:00
match embedders {
Setting::Set(value) => builder.set_embedder_settings(value.clone()),
Setting::Reset => builder.reset_embedder_settings(),
Setting::NotSet => (),
}
2024-03-11 18:24:21 +01:00
match search_cutoff_ms {
2024-03-11 18:24:21 +01:00
Setting::Set(cutoff) => builder.set_search_cutoff(*cutoff),
Setting::Reset => builder.reset_search_cutoff(),
Setting::NotSet => (),
}
}
pub enum SecretPolicy {
RevealSecrets,
HideSecrets,
}
2022-10-13 15:02:59 +02:00
pub fn settings(
index: &Index,
rtxn: &crate::heed::RoTxn,
secret_policy: SecretPolicy,
2022-10-13 15:02:59 +02:00
) -> Result<Settings<Checked>, milli::Error> {
2022-10-20 18:00:07 +02:00
let displayed_attributes =
index.displayed_fields(rtxn)?.map(|fields| fields.into_iter().map(String::from).collect());
2022-10-13 15:02:59 +02:00
let searchable_attributes = index
.user_defined_searchable_fields(rtxn)?
.map(|fields| fields.into_iter().map(String::from).collect());
let filterable_attributes = index.filterable_fields(rtxn)?.into_iter().collect();
let sortable_attributes = index.sortable_fields(rtxn)?.into_iter().collect();
2023-01-11 12:33:56 +01:00
let criteria = index.criteria(rtxn)?;
2022-10-13 15:02:59 +02:00
let stop_words = index
.stop_words(rtxn)?
.map(|stop_words| -> Result<BTreeSet<_>, milli::Error> {
Ok(stop_words.stream().into_strs()?.into_iter().collect())
})
.transpose()?
.unwrap_or_default();
2023-07-24 17:00:18 +02:00
let non_separator_tokens = index.non_separator_tokens(rtxn)?.unwrap_or_default();
let separator_tokens = index.separator_tokens(rtxn)?.unwrap_or_default();
let dictionary = index.dictionary(rtxn)?.unwrap_or_default();
2022-10-13 15:02:59 +02:00
let distinct_field = index.distinct_field(rtxn)?.map(String::from);
let proximity_precision = index.proximity_precision(rtxn)?.map(ProximityPrecisionView::from);
2023-07-27 14:12:23 +02:00
let synonyms = index.user_defined_synonyms(rtxn)?;
2022-10-13 15:02:59 +02:00
let min_typo_word_len = MinWordSizeTyposSetting {
one_typo: Setting::Set(index.min_word_len_one_typo(rtxn)?),
two_typos: Setting::Set(index.min_word_len_two_typos(rtxn)?),
};
let disabled_words = match index.exact_words(rtxn)? {
Some(fst) => fst.into_stream().into_strs()?.into_iter().collect(),
None => BTreeSet::new(),
};
2022-10-20 18:00:07 +02:00
let disabled_attributes = index.exact_attributes(rtxn)?.into_iter().map(String::from).collect();
2022-10-13 15:02:59 +02:00
let typo_tolerance = TypoSettings {
enabled: Setting::Set(index.authorize_typos(rtxn)?),
min_word_size_for_typos: Setting::Set(min_typo_word_len),
disable_on_words: Setting::Set(disabled_words),
disable_on_attributes: Setting::Set(disabled_attributes),
};
let faceting = FacetingSettings {
max_values_per_facet: Setting::Set(
2023-11-27 11:52:22 +01:00
index
.max_values_per_facet(rtxn)?
.map(|x| x as usize)
.unwrap_or(DEFAULT_VALUES_PER_FACET),
2022-10-13 15:02:59 +02:00
),
sort_facet_values_by: Setting::Set(
index
.sort_facet_values_by(rtxn)?
.into_iter()
.map(|(name, sort)| (name, sort.into()))
.collect(),
),
2022-10-13 15:02:59 +02:00
};
let pagination = PaginationSettings {
max_total_hits: Setting::Set(
2023-11-27 11:52:22 +01:00
index
.pagination_max_total_hits(rtxn)?
.map(|x| x as usize)
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS),
2022-10-13 15:02:59 +02:00
),
};
let embedders: BTreeMap<_, _> = index
.embedding_configs(rtxn)?
.into_iter()
.map(|IndexEmbeddingConfig { name, config, .. }| (name, Setting::Set(config.into())))
.collect();
let embedders = if embedders.is_empty() { Setting::NotSet } else { Setting::Set(embedders) };
let search_cutoff_ms = index.search_cutoff(rtxn)?;
2024-03-11 18:24:21 +01:00
let localized_attributes_rules = index.localized_attributes_rules(rtxn)?;
let mut settings = Settings {
2022-10-13 15:02:59 +02:00
displayed_attributes: match displayed_attributes {
Some(attrs) => Setting::Set(attrs),
None => Setting::Reset,
}
.into(),
2022-10-13 15:02:59 +02:00
searchable_attributes: match searchable_attributes {
2024-04-25 10:36:10 +02:00
Some(attrs) => Setting::Set(attrs),
2022-10-13 15:02:59 +02:00
None => Setting::Reset,
}
.into(),
2022-10-13 15:02:59 +02:00
filterable_attributes: Setting::Set(filterable_attributes),
sortable_attributes: Setting::Set(sortable_attributes),
2023-01-11 12:33:56 +01:00
ranking_rules: Setting::Set(criteria.iter().map(|c| c.clone().into()).collect()),
2022-10-13 15:02:59 +02:00
stop_words: Setting::Set(stop_words),
2023-07-24 17:00:18 +02:00
non_separator_tokens: Setting::Set(non_separator_tokens),
separator_tokens: Setting::Set(separator_tokens),
dictionary: Setting::Set(dictionary),
2022-10-13 15:02:59 +02:00
distinct_attribute: match distinct_field {
Some(field) => Setting::Set(field),
None => Setting::Reset,
},
proximity_precision: Setting::Set(proximity_precision.unwrap_or_default()),
2022-10-13 15:02:59 +02:00
synonyms: Setting::Set(synonyms),
typo_tolerance: Setting::Set(typo_tolerance),
faceting: Setting::Set(faceting),
pagination: Setting::Set(pagination),
embedders,
search_cutoff_ms: match search_cutoff_ms {
2024-03-11 18:24:21 +01:00
Some(cutoff) => Setting::Set(cutoff),
None => Setting::Reset,
},
localized_attributes: match localized_attributes_rules {
Some(rules) => Setting::Set(rules.into_iter().map(|r| r.into()).collect()),
None => Setting::Reset,
},
2022-10-13 15:02:59 +02:00
_kind: PhantomData,
};
if let SecretPolicy::HideSecrets = secret_policy {
settings.hide_secrets()
}
Ok(settings)
2022-10-13 15:02:59 +02:00
}
2023-02-13 18:45:13 +01:00
#[derive(Debug, Clone, PartialEq, Eq, Deserr)]
#[deserr(try_from(&String) = FromStr::from_str -> CriterionError)]
2023-01-11 12:33:56 +01:00
pub enum RankingRuleView {
/// Sorted by decreasing number of matched query terms.
/// Query words at the front of an attribute is considered better than if it was at the back.
Words,
/// Sorted by increasing number of typos.
Typo,
/// Sorted by increasing distance between matched query terms.
Proximity,
/// Documents with quey words contained in more important
/// attributes are considered better.
Attribute,
/// Dynamically sort at query time the documents. None, one or multiple Asc/Desc sortable
/// attributes can be used in place of this criterion at query time.
Sort,
/// Sorted by the similarity of the matched words with the query words.
Exactness,
/// Sorted by the increasing value of the field specified.
Asc(String),
/// Sorted by the decreasing value of the field specified.
Desc(String),
}
impl Serialize for RankingRuleView {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serializer.serialize_str(&format!("{}", Criterion::from(self.clone())))
}
}
impl<'de> Deserialize<'de> for RankingRuleView {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
struct Visitor;
impl<'de> serde::de::Visitor<'de> for Visitor {
type Value = RankingRuleView;
fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
write!(formatter, "the name of a valid ranking rule (string)")
}
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
where
E: serde::de::Error,
{
let criterion = Criterion::from_str(v).map_err(|_| {
E::invalid_value(serde::de::Unexpected::Str(v), &"a valid ranking rule")
})?;
Ok(RankingRuleView::from(criterion))
}
}
deserializer.deserialize_str(Visitor)
}
}
impl FromStr for RankingRuleView {
type Err = <Criterion as FromStr>::Err;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(RankingRuleView::from(Criterion::from_str(s)?))
}
}
impl fmt::Display for RankingRuleView {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
fmt::Display::fmt(&Criterion::from(self.clone()), f)
}
}
impl From<Criterion> for RankingRuleView {
fn from(value: Criterion) -> Self {
match value {
Criterion::Words => RankingRuleView::Words,
Criterion::Typo => RankingRuleView::Typo,
Criterion::Proximity => RankingRuleView::Proximity,
Criterion::Attribute => RankingRuleView::Attribute,
Criterion::Sort => RankingRuleView::Sort,
Criterion::Exactness => RankingRuleView::Exactness,
Criterion::Asc(x) => RankingRuleView::Asc(x),
Criterion::Desc(x) => RankingRuleView::Desc(x),
}
}
}
impl From<RankingRuleView> for Criterion {
fn from(value: RankingRuleView) -> Self {
match value {
RankingRuleView::Words => Criterion::Words,
RankingRuleView::Typo => Criterion::Typo,
RankingRuleView::Proximity => Criterion::Proximity,
RankingRuleView::Attribute => Criterion::Attribute,
RankingRuleView::Sort => Criterion::Sort,
RankingRuleView::Exactness => Criterion::Exactness,
RankingRuleView::Asc(x) => Criterion::Asc(x),
RankingRuleView::Desc(x) => Criterion::Desc(x),
}
}
}
#[derive(Default, Debug, Clone, Copy, PartialEq, Eq, Deserr, Serialize, Deserialize)]
#[serde(deny_unknown_fields, rename_all = "camelCase")]
#[deserr(error = DeserrJsonError<InvalidSettingsProximityPrecision>, rename_all = camelCase, deny_unknown_fields)]
pub enum ProximityPrecisionView {
#[default]
ByWord,
ByAttribute,
}
impl From<ProximityPrecision> for ProximityPrecisionView {
fn from(value: ProximityPrecision) -> Self {
match value {
ProximityPrecision::ByWord => ProximityPrecisionView::ByWord,
ProximityPrecision::ByAttribute => ProximityPrecisionView::ByAttribute,
}
}
}
impl From<ProximityPrecisionView> for ProximityPrecision {
fn from(value: ProximityPrecisionView) -> Self {
match value {
ProximityPrecisionView::ByWord => ProximityPrecision::ByWord,
ProximityPrecisionView::ByAttribute => ProximityPrecision::ByAttribute,
}
}
}
#[derive(Debug, Clone, Default, Deserialize, PartialEq, Eq)]
pub struct WildcardSetting(Setting<Vec<String>>);
impl From<Setting<Vec<String>>> for WildcardSetting {
fn from(setting: Setting<Vec<String>>) -> Self {
Self(setting)
}
}
impl Serialize for WildcardSetting {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: Serializer,
{
serialize_with_wildcard(&self.0, serializer)
}
}
impl<E: deserr::DeserializeError> Deserr<E> for WildcardSetting {
fn deserialize_from_value<V: deserr::IntoValue>(
value: deserr::Value<V>,
location: ValuePointerRef<'_>,
) -> Result<Self, E> {
Ok(Self(Setting::deserialize_from_value(value, location)?))
}
}
impl std::ops::Deref for WildcardSetting {
type Target = Setting<Vec<String>>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
#[cfg(test)]
pub(crate) mod test {
use super::*;
#[test]
fn test_setting_check() {
// test no changes
let settings = Settings {
displayed_attributes: Setting::Set(vec![String::from("hello")]).into(),
searchable_attributes: Setting::Set(vec![String::from("hello")]).into(),
filterable_attributes: Setting::NotSet,
sortable_attributes: Setting::NotSet,
ranking_rules: Setting::NotSet,
stop_words: Setting::NotSet,
2023-07-24 17:00:18 +02:00
non_separator_tokens: Setting::NotSet,
separator_tokens: Setting::NotSet,
dictionary: Setting::NotSet,
synonyms: Setting::NotSet,
distinct_attribute: Setting::NotSet,
proximity_precision: Setting::NotSet,
typo_tolerance: Setting::NotSet,
faceting: Setting::NotSet,
pagination: Setting::NotSet,
embedders: Setting::NotSet,
localized_attributes: Setting::NotSet,
search_cutoff_ms: Setting::NotSet,
_kind: PhantomData::<Unchecked>,
};
let checked = settings.clone().check();
assert_eq!(settings.displayed_attributes, checked.displayed_attributes);
2022-10-20 18:00:07 +02:00
assert_eq!(settings.searchable_attributes, checked.searchable_attributes);
// test wildcard
// test no changes
let settings = Settings {
displayed_attributes: Setting::Set(vec![String::from("*")]).into(),
searchable_attributes: Setting::Set(vec![String::from("hello"), String::from("*")])
.into(),
filterable_attributes: Setting::NotSet,
sortable_attributes: Setting::NotSet,
ranking_rules: Setting::NotSet,
stop_words: Setting::NotSet,
2023-07-24 17:00:18 +02:00
non_separator_tokens: Setting::NotSet,
separator_tokens: Setting::NotSet,
dictionary: Setting::NotSet,
synonyms: Setting::NotSet,
distinct_attribute: Setting::NotSet,
proximity_precision: Setting::NotSet,
typo_tolerance: Setting::NotSet,
faceting: Setting::NotSet,
pagination: Setting::NotSet,
embedders: Setting::NotSet,
localized_attributes: Setting::NotSet,
search_cutoff_ms: Setting::NotSet,
_kind: PhantomData::<Unchecked>,
};
let checked = settings.check();
assert_eq!(checked.displayed_attributes, Setting::Reset.into());
assert_eq!(checked.searchable_attributes, Setting::Reset.into());
}
}