mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 04:17:10 +02:00
Test and implement settings opt-out
This commit is contained in:
parent
5560452ef9
commit
d66dc363ed
36 changed files with 1018 additions and 94 deletions
|
@ -70,6 +70,8 @@ pub mod main_key {
|
|||
pub const EMBEDDING_CONFIGS: &str = "embedding_configs";
|
||||
pub const SEARCH_CUTOFF: &str = "search_cutoff";
|
||||
pub const LOCALIZED_ATTRIBUTES_RULES: &str = "localized_attributes_rules";
|
||||
pub const FACET_SEARCH: &str = "facet_search";
|
||||
pub const PREFIX_SEARCH: &str = "prefix_search";
|
||||
}
|
||||
|
||||
pub mod db_name {
|
||||
|
@ -1233,6 +1235,10 @@ impl Index {
|
|||
)
|
||||
}
|
||||
|
||||
pub(crate) fn delete_words_prefixes_fst(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
|
||||
self.main.remap_key_type::<Str>().delete(wtxn, main_key::WORDS_PREFIXES_FST_KEY)
|
||||
}
|
||||
|
||||
/// Returns the FST which is the words prefixes dictionary of the engine.
|
||||
pub fn words_prefixes_fst<'t>(&self, rtxn: &'t RoTxn<'t>) -> Result<fst::Set<Cow<'t, [u8]>>> {
|
||||
match self.main.remap_types::<Str, Bytes>().get(rtxn, main_key::WORDS_PREFIXES_FST_KEY)? {
|
||||
|
@ -1562,6 +1568,41 @@ impl Index {
|
|||
self.main.remap_key_type::<Str>().delete(txn, main_key::PROXIMITY_PRECISION)
|
||||
}
|
||||
|
||||
pub fn prefix_search(&self, txn: &RoTxn<'_>) -> heed::Result<Option<PrefixSearch>> {
|
||||
self.main.remap_types::<Str, SerdeBincode<PrefixSearch>>().get(txn, main_key::PREFIX_SEARCH)
|
||||
}
|
||||
|
||||
pub(crate) fn put_prefix_search(
|
||||
&self,
|
||||
txn: &mut RwTxn<'_>,
|
||||
val: PrefixSearch,
|
||||
) -> heed::Result<()> {
|
||||
self.main.remap_types::<Str, SerdeBincode<PrefixSearch>>().put(
|
||||
txn,
|
||||
main_key::PREFIX_SEARCH,
|
||||
&val,
|
||||
)
|
||||
}
|
||||
|
||||
pub(crate) fn delete_prefix_search(&self, txn: &mut RwTxn<'_>) -> heed::Result<bool> {
|
||||
self.main.remap_key_type::<Str>().delete(txn, main_key::PREFIX_SEARCH)
|
||||
}
|
||||
|
||||
pub fn facet_search(&self, txn: &RoTxn<'_>) -> heed::Result<bool> {
|
||||
self.main
|
||||
.remap_types::<Str, SerdeBincode<bool>>()
|
||||
.get(txn, main_key::FACET_SEARCH)
|
||||
.map(|v| v.unwrap_or(true))
|
||||
}
|
||||
|
||||
pub(crate) fn put_facet_search(&self, txn: &mut RwTxn<'_>, val: bool) -> heed::Result<()> {
|
||||
self.main.remap_types::<Str, SerdeBincode<bool>>().put(txn, main_key::FACET_SEARCH, &val)
|
||||
}
|
||||
|
||||
pub(crate) fn delete_facet_search(&self, txn: &mut RwTxn<'_>) -> heed::Result<bool> {
|
||||
self.main.remap_key_type::<Str>().delete(txn, main_key::FACET_SEARCH)
|
||||
}
|
||||
|
||||
pub fn localized_attributes_rules(
|
||||
&self,
|
||||
rtxn: &RoTxn<'_>,
|
||||
|
@ -1647,10 +1688,14 @@ impl Index {
|
|||
Ok(res)
|
||||
}
|
||||
|
||||
pub fn prefix_settings(&self, _rtxn: &RoTxn<'_>) -> Result<PrefixSettings> {
|
||||
pub fn prefix_settings(&self, rtxn: &RoTxn<'_>) -> Result<PrefixSettings> {
|
||||
let compute_prefixes = self.prefix_search(rtxn)?.unwrap_or_default();
|
||||
Ok(PrefixSettings {
|
||||
compute_prefixes: true,
|
||||
compute_prefixes,
|
||||
max_prefix_length: 4,
|
||||
#[cfg(not(test))]
|
||||
prefix_count_threshold: 100,
|
||||
#[cfg(test)]
|
||||
prefix_count_threshold: 100,
|
||||
})
|
||||
}
|
||||
|
@ -1665,9 +1710,17 @@ pub struct IndexEmbeddingConfig {
|
|||
|
||||
#[derive(Debug, Deserialize, Serialize)]
|
||||
pub struct PrefixSettings {
|
||||
pub prefix_count_threshold: u64,
|
||||
pub prefix_count_threshold: usize,
|
||||
pub max_prefix_length: usize,
|
||||
pub compute_prefixes: bool,
|
||||
pub compute_prefixes: PrefixSearch,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub enum PrefixSearch {
|
||||
#[default]
|
||||
IndexingTime,
|
||||
Disabled,
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
|
|
|
@ -49,6 +49,7 @@ pub use self::geo_sort::Strategy as GeoSortStrategy;
|
|||
use self::graph_based_ranking_rule::Words;
|
||||
use self::interner::Interned;
|
||||
use self::vector_sort::VectorSort;
|
||||
use crate::index::PrefixSearch;
|
||||
use crate::localized_attributes_rules::LocalizedFieldIds;
|
||||
use crate::score_details::{ScoreDetails, ScoringStrategy};
|
||||
use crate::search::new::distinct::apply_distinct_rule;
|
||||
|
@ -68,6 +69,7 @@ pub struct SearchContext<'ctx> {
|
|||
pub term_interner: Interner<QueryTerm>,
|
||||
pub phrase_docids: PhraseDocIdsCache,
|
||||
pub restricted_fids: Option<RestrictedFids>,
|
||||
pub prefix_search: PrefixSearch,
|
||||
}
|
||||
|
||||
impl<'ctx> SearchContext<'ctx> {
|
||||
|
@ -85,6 +87,8 @@ impl<'ctx> SearchContext<'ctx> {
|
|||
}
|
||||
}
|
||||
|
||||
let prefix_search = index.prefix_search(txn)?.unwrap_or_default();
|
||||
|
||||
Ok(Self {
|
||||
index,
|
||||
txn,
|
||||
|
@ -94,9 +98,14 @@ impl<'ctx> SearchContext<'ctx> {
|
|||
term_interner: <_>::default(),
|
||||
phrase_docids: <_>::default(),
|
||||
restricted_fids: None,
|
||||
prefix_search,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn is_prefix_search_allowed(&self) -> bool {
|
||||
self.prefix_search != PrefixSearch::Disabled
|
||||
}
|
||||
|
||||
pub fn attributes_to_search_on(
|
||||
&mut self,
|
||||
attributes_to_search_on: &'ctx [String],
|
||||
|
|
|
@ -28,6 +28,7 @@ pub fn located_query_terms_from_tokens(
|
|||
words_limit: Option<usize>,
|
||||
) -> Result<ExtractedTokens> {
|
||||
let nbr_typos = number_of_typos_allowed(ctx)?;
|
||||
let allow_prefix_search = ctx.is_prefix_search_allowed();
|
||||
|
||||
let mut query_terms = Vec::new();
|
||||
|
||||
|
@ -94,7 +95,7 @@ pub fn located_query_terms_from_tokens(
|
|||
ctx,
|
||||
word,
|
||||
nbr_typos(word),
|
||||
true,
|
||||
allow_prefix_search,
|
||||
false,
|
||||
)?;
|
||||
let located_term = LocatedQueryTerm {
|
||||
|
|
|
@ -173,6 +173,13 @@ impl<'i> FacetsUpdate<'i> {
|
|||
}
|
||||
|
||||
match self.normalized_delta_data {
|
||||
_ if !self.index.facet_search(wtxn)? => {
|
||||
// If facet search is disabled, we don't need to compute facet search databases.
|
||||
// We clear the facet search databases.
|
||||
self.index.facet_id_string_fst.clear(wtxn)?;
|
||||
self.index.facet_id_normalized_string_strings.clear(wtxn)?;
|
||||
return Ok(());
|
||||
}
|
||||
Some(data) => index_facet_search(wtxn, data, self.index),
|
||||
None => Ok(()),
|
||||
}
|
||||
|
|
|
@ -34,10 +34,12 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
|
|||
extract_facet_string_docids_settings(docid_fid_facet_string, indexer, settings_diff)
|
||||
} else {
|
||||
let localized_field_ids = &settings_diff.new.localized_faceted_fields_ids;
|
||||
let facet_search = settings_diff.new.facet_search;
|
||||
extract_facet_string_docids_document_update(
|
||||
docid_fid_facet_string,
|
||||
indexer,
|
||||
localized_field_ids,
|
||||
facet_search,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
@ -51,6 +53,7 @@ fn extract_facet_string_docids_document_update<R: io::Read + io::Seek>(
|
|||
docid_fid_facet_string: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
localized_field_ids: &LocalizedFieldIds,
|
||||
facet_search: bool,
|
||||
) -> Result<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)> {
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
|
@ -96,7 +99,7 @@ fn extract_facet_string_docids_document_update<R: io::Read + io::Seek>(
|
|||
let normalized_value = str::from_utf8(normalized_value_bytes)?;
|
||||
|
||||
// Facet search normalization
|
||||
{
|
||||
if facet_search {
|
||||
let locales = localized_field_ids.locales(field_id);
|
||||
let hyper_normalized_value = normalize_facet_string(normalized_value, locales);
|
||||
|
||||
|
@ -179,8 +182,10 @@ fn extract_facet_string_docids_settings<R: io::Read + io::Seek>(
|
|||
let new_locales = settings_diff.new.localized_faceted_fields_ids.locales(field_id);
|
||||
|
||||
let are_same_locales = old_locales == new_locales;
|
||||
let reindex_facet_search =
|
||||
settings_diff.new.facet_search && !settings_diff.old.facet_search;
|
||||
|
||||
if is_same_value && are_same_locales {
|
||||
if is_same_value && are_same_locales && !reindex_facet_search {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -191,18 +196,26 @@ fn extract_facet_string_docids_settings<R: io::Read + io::Seek>(
|
|||
let normalized_value = str::from_utf8(normalized_value_bytes)?;
|
||||
|
||||
// Facet search normalization
|
||||
{
|
||||
let old_hyper_normalized_value = normalize_facet_string(normalized_value, old_locales);
|
||||
let new_hyper_normalized_value = if are_same_locales {
|
||||
&old_hyper_normalized_value
|
||||
if settings_diff.new.facet_search {
|
||||
let new_hyper_normalized_value = normalize_facet_string(normalized_value, new_locales);
|
||||
let old_hyper_normalized_value;
|
||||
let old_hyper_normalized_value = if !settings_diff.old.facet_search
|
||||
|| deladd_reader.get(DelAdd::Deletion).is_none()
|
||||
{
|
||||
// if the facet search is disabled in the old settings or if no facet string is deleted,
|
||||
// we don't need to normalize the facet string.
|
||||
None
|
||||
} else if are_same_locales {
|
||||
Some(&new_hyper_normalized_value)
|
||||
} else {
|
||||
&normalize_facet_string(normalized_value, new_locales)
|
||||
old_hyper_normalized_value = normalize_facet_string(normalized_value, old_locales);
|
||||
Some(&old_hyper_normalized_value)
|
||||
};
|
||||
|
||||
let set = BTreeSet::from_iter(std::iter::once(normalized_value));
|
||||
|
||||
// if the facet string is the same, we can put the deletion and addition in the same obkv.
|
||||
if old_hyper_normalized_value == new_hyper_normalized_value.as_str() {
|
||||
if old_hyper_normalized_value == Some(&new_hyper_normalized_value) {
|
||||
// nothing to do if we delete and re-add the value.
|
||||
if is_same_value {
|
||||
continue;
|
||||
|
@ -222,7 +235,7 @@ fn extract_facet_string_docids_settings<R: io::Read + io::Seek>(
|
|||
} else {
|
||||
// if the facet string is different, we need to insert the deletion and addition in different obkv because the related key is different.
|
||||
// deletion
|
||||
if deladd_reader.get(DelAdd::Deletion).is_some() {
|
||||
if let Some(old_hyper_normalized_value) = old_hyper_normalized_value {
|
||||
// insert old value
|
||||
let val = SerdeJson::bytes_encode(&set).map_err(heed::Error::Encoding)?;
|
||||
buffer.clear();
|
||||
|
|
|
@ -80,7 +80,7 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
|
|||
let new_faceted_fids: BTreeSet<_> =
|
||||
settings_diff.new.faceted_fields_ids.iter().copied().collect();
|
||||
|
||||
if !settings_diff.settings_update_only || old_faceted_fids != new_faceted_fids {
|
||||
if !settings_diff.settings_update_only || settings_diff.reindex_facets() {
|
||||
let mut cursor = obkv_documents.into_cursor()?;
|
||||
while let Some((docid_bytes, value)) = cursor.move_on_next()? {
|
||||
let obkv = obkv::KvReader::from_slice(value);
|
||||
|
@ -112,8 +112,10 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
|
|||
(field_id, None, add_value)
|
||||
}
|
||||
EitherOrBoth::Both(&field_id, _) => {
|
||||
// during settings update, recompute the changing settings only.
|
||||
if settings_diff.settings_update_only {
|
||||
// during settings update, recompute the changing settings only unless a global change is detected.
|
||||
if settings_diff.settings_update_only
|
||||
&& !settings_diff.global_facet_settings_changed()
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
|
@ -29,6 +29,7 @@ pub use self::transform::{Transform, TransformOutput};
|
|||
use super::new::StdResult;
|
||||
use crate::documents::{obkv_to_object, DocumentsBatchReader};
|
||||
use crate::error::{Error, InternalError};
|
||||
use crate::index::{PrefixSearch, PrefixSettings};
|
||||
use crate::thread_pool_no_abort::ThreadPoolNoAbortBuilder;
|
||||
pub use crate::update::index_documents::helpers::CursorClonableMmap;
|
||||
use crate::update::{
|
||||
|
@ -82,8 +83,6 @@ pub struct IndexDocuments<'t, 'i, 'a, FP, FA> {
|
|||
|
||||
#[derive(Default, Debug, Clone)]
|
||||
pub struct IndexDocumentsConfig {
|
||||
pub words_prefix_threshold: Option<u32>,
|
||||
pub max_prefix_length: Option<usize>,
|
||||
pub words_positions_level_group_size: Option<NonZeroU32>,
|
||||
pub words_positions_min_level_size: Option<NonZeroU32>,
|
||||
pub update_method: IndexDocumentsMethod,
|
||||
|
@ -565,14 +564,32 @@ where
|
|||
self.index.words_prefixes_fst(self.wtxn)?.map_data(|cow| cow.into_owned())?;
|
||||
|
||||
// Run the words prefixes update operation.
|
||||
let mut builder = WordsPrefixesFst::new(self.wtxn, self.index);
|
||||
if let Some(value) = self.config.words_prefix_threshold {
|
||||
builder.threshold(value);
|
||||
let PrefixSettings { prefix_count_threshold, max_prefix_length, compute_prefixes } =
|
||||
self.index.prefix_settings(self.wtxn)?;
|
||||
|
||||
// If the prefix search is enabled at indexing time, we compute the prefixes.
|
||||
if compute_prefixes == PrefixSearch::IndexingTime {
|
||||
let mut builder = WordsPrefixesFst::new(self.wtxn, self.index);
|
||||
builder.threshold(prefix_count_threshold);
|
||||
builder.max_prefix_length(max_prefix_length);
|
||||
builder.execute()?;
|
||||
} else {
|
||||
// If the prefix search is disabled at indexing time, we delete the previous words prefixes fst.
|
||||
// And all the associated docids databases.
|
||||
self.index.delete_words_prefixes_fst(self.wtxn)?;
|
||||
self.index.word_prefix_docids.clear(self.wtxn)?;
|
||||
self.index.exact_word_prefix_docids.clear(self.wtxn)?;
|
||||
self.index.word_prefix_position_docids.clear(self.wtxn)?;
|
||||
self.index.word_prefix_fid_docids.clear(self.wtxn)?;
|
||||
|
||||
databases_seen += 3;
|
||||
(self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
|
||||
databases_seen,
|
||||
total_databases: TOTAL_POSTING_DATABASE_COUNT,
|
||||
});
|
||||
|
||||
return Ok(());
|
||||
}
|
||||
if let Some(value) = self.config.max_prefix_length {
|
||||
builder.max_prefix_length(value);
|
||||
}
|
||||
builder.execute()?;
|
||||
|
||||
if (self.should_abort)() {
|
||||
return Err(Error::InternalError(InternalError::AbortedIndexation));
|
||||
|
|
|
@ -667,14 +667,23 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||
let is_primary_key = |id: FieldId| -> bool { settings_diff.primary_key_id == Some(id) };
|
||||
|
||||
// If only a faceted field has been added, keep only this field.
|
||||
let must_reindex_facets = settings_diff.reindex_facets();
|
||||
let necessary_faceted_field = |id: FieldId| -> bool {
|
||||
let field_name = settings_diff.new.fields_ids_map.name(id).unwrap();
|
||||
must_reindex_facets
|
||||
&& modified_faceted_fields
|
||||
.iter()
|
||||
.any(|long| is_faceted_by(long, field_name) || is_faceted_by(field_name, long))
|
||||
};
|
||||
let global_facet_settings_changed = settings_diff.global_facet_settings_changed();
|
||||
let facet_fids_changed = settings_diff.facet_fids_changed();
|
||||
let necessary_faceted_field =
|
||||
|id: FieldId| -> bool {
|
||||
let field_name = settings_diff.new.fields_ids_map.name(id).unwrap();
|
||||
if global_facet_settings_changed {
|
||||
settings_diff.new.user_defined_faceted_fields.iter().any(|long| {
|
||||
is_faceted_by(long, field_name) || is_faceted_by(field_name, long)
|
||||
})
|
||||
} else if facet_fids_changed {
|
||||
modified_faceted_fields.iter().any(|long| {
|
||||
is_faceted_by(long, field_name) || is_faceted_by(field_name, long)
|
||||
})
|
||||
} else {
|
||||
false
|
||||
}
|
||||
};
|
||||
|
||||
// Alway provide all fields when vectors are involved because
|
||||
// we need the fields for the prompt/templating.
|
||||
|
|
|
@ -445,7 +445,10 @@ where
|
|||
|
||||
(indexing_context.send_progress)(Progress::from_step(Step::PostProcessingFacets));
|
||||
|
||||
compute_facet_search_database(index, wtxn, global_fields_ids_map)?;
|
||||
if index.facet_search(wtxn)? {
|
||||
compute_facet_search_database(index, wtxn, global_fields_ids_map)?;
|
||||
}
|
||||
|
||||
compute_facet_level_database(index, wtxn, facet_field_ids_delta)?;
|
||||
|
||||
(indexing_context.send_progress)(Progress::from_step(Step::PostProcessingWords));
|
||||
|
|
|
@ -80,12 +80,12 @@ pub struct PrefixDelta {
|
|||
}
|
||||
|
||||
struct PrefixFstBuilder {
|
||||
prefix_count_threshold: u64,
|
||||
prefix_count_threshold: usize,
|
||||
max_prefix_length: usize,
|
||||
/// TODO: Replace the full memory allocation
|
||||
prefix_fst_builders: Vec<SetBuilder<Vec<u8>>>,
|
||||
current_prefix: Vec<Prefix>,
|
||||
current_prefix_count: Vec<u64>,
|
||||
current_prefix_count: Vec<usize>,
|
||||
modified_prefixes: HashSet<Prefix>,
|
||||
current_prefix_is_modified: Vec<bool>,
|
||||
}
|
||||
|
@ -95,7 +95,7 @@ impl PrefixFstBuilder {
|
|||
let PrefixSettings { prefix_count_threshold, max_prefix_length, compute_prefixes } =
|
||||
prefix_settings;
|
||||
|
||||
if !compute_prefixes {
|
||||
if compute_prefixes != crate::index::PrefixSearch::IndexingTime {
|
||||
return None;
|
||||
}
|
||||
|
||||
|
|
|
@ -17,7 +17,8 @@ use super::IndexerConfig;
|
|||
use crate::criterion::Criterion;
|
||||
use crate::error::UserError;
|
||||
use crate::index::{
|
||||
IndexEmbeddingConfig, DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS,
|
||||
IndexEmbeddingConfig, PrefixSearch, DEFAULT_MIN_WORD_LEN_ONE_TYPO,
|
||||
DEFAULT_MIN_WORD_LEN_TWO_TYPOS,
|
||||
};
|
||||
use crate::order_by_map::OrderByMap;
|
||||
use crate::prompt::default_max_bytes;
|
||||
|
@ -177,6 +178,8 @@ pub struct Settings<'a, 't, 'i> {
|
|||
embedder_settings: Setting<BTreeMap<String, Setting<EmbeddingSettings>>>,
|
||||
search_cutoff: Setting<u64>,
|
||||
localized_attributes_rules: Setting<Vec<LocalizedAttributesRule>>,
|
||||
prefix_search: Setting<PrefixSearch>,
|
||||
facet_search: Setting<bool>,
|
||||
}
|
||||
|
||||
impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
||||
|
@ -212,6 +215,8 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||
embedder_settings: Setting::NotSet,
|
||||
search_cutoff: Setting::NotSet,
|
||||
localized_attributes_rules: Setting::NotSet,
|
||||
prefix_search: Setting::NotSet,
|
||||
facet_search: Setting::NotSet,
|
||||
indexer_config,
|
||||
}
|
||||
}
|
||||
|
@ -418,6 +423,22 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||
self.localized_attributes_rules = Setting::Reset;
|
||||
}
|
||||
|
||||
pub fn set_prefix_search(&mut self, value: PrefixSearch) {
|
||||
self.prefix_search = Setting::Set(value);
|
||||
}
|
||||
|
||||
pub fn reset_prefix_search(&mut self) {
|
||||
self.prefix_search = Setting::Reset;
|
||||
}
|
||||
|
||||
pub fn set_facet_search(&mut self, value: bool) {
|
||||
self.facet_search = Setting::Set(value);
|
||||
}
|
||||
|
||||
pub fn reset_facet_search(&mut self) {
|
||||
self.facet_search = Setting::Reset;
|
||||
}
|
||||
|
||||
#[tracing::instrument(
|
||||
level = "trace"
|
||||
skip(self, progress_callback, should_abort, settings_diff),
|
||||
|
@ -944,7 +965,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||
false
|
||||
} else {
|
||||
self.index.put_proximity_precision(self.wtxn, new)?;
|
||||
true
|
||||
old.is_some() || new != ProximityPrecision::default()
|
||||
}
|
||||
}
|
||||
Setting::Reset => self.index.delete_proximity_precision(self.wtxn)?,
|
||||
|
@ -954,6 +975,42 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||
Ok(changed)
|
||||
}
|
||||
|
||||
fn update_prefix_search(&mut self) -> Result<bool> {
|
||||
let changed = match self.prefix_search {
|
||||
Setting::Set(new) => {
|
||||
let old = self.index.prefix_search(self.wtxn)?;
|
||||
if old == Some(new) {
|
||||
false
|
||||
} else {
|
||||
self.index.put_prefix_search(self.wtxn, new)?;
|
||||
old.is_some() || new != PrefixSearch::default()
|
||||
}
|
||||
}
|
||||
Setting::Reset => self.index.delete_prefix_search(self.wtxn)?,
|
||||
Setting::NotSet => false,
|
||||
};
|
||||
|
||||
Ok(changed)
|
||||
}
|
||||
|
||||
fn update_facet_search(&mut self) -> Result<bool> {
|
||||
let changed = match self.facet_search {
|
||||
Setting::Set(new) => {
|
||||
let old = self.index.facet_search(self.wtxn)?;
|
||||
if old == new {
|
||||
false
|
||||
} else {
|
||||
self.index.put_facet_search(self.wtxn, new)?;
|
||||
true
|
||||
}
|
||||
}
|
||||
Setting::Reset => self.index.delete_facet_search(self.wtxn)?,
|
||||
Setting::NotSet => false,
|
||||
};
|
||||
|
||||
Ok(changed)
|
||||
}
|
||||
|
||||
fn update_embedding_configs(&mut self) -> Result<BTreeMap<String, EmbedderAction>> {
|
||||
match std::mem::take(&mut self.embedder_settings) {
|
||||
Setting::Set(configs) => self.update_embedding_configs_set(configs),
|
||||
|
@ -1203,6 +1260,8 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
|
|||
self.update_searchable()?;
|
||||
self.update_exact_attributes()?;
|
||||
self.update_proximity_precision()?;
|
||||
self.update_prefix_search()?;
|
||||
self.update_facet_search()?;
|
||||
self.update_localized_attributes_rules()?;
|
||||
|
||||
let embedding_config_updates = self.update_embedding_configs()?;
|
||||
|
@ -1282,6 +1341,7 @@ impl InnerIndexSettingsDiff {
|
|||
|| old_settings.allowed_separators != new_settings.allowed_separators
|
||||
|| old_settings.dictionary != new_settings.dictionary
|
||||
|| old_settings.proximity_precision != new_settings.proximity_precision
|
||||
|| old_settings.prefix_search != new_settings.prefix_search
|
||||
|| old_settings.localized_searchable_fields_ids
|
||||
!= new_settings.localized_searchable_fields_ids
|
||||
};
|
||||
|
@ -1372,7 +1432,7 @@ impl InnerIndexSettingsDiff {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn reindex_facets(&self) -> bool {
|
||||
pub fn facet_fids_changed(&self) -> bool {
|
||||
let existing_fields = &self.new.existing_fields;
|
||||
if existing_fields.iter().any(|field| field.contains('.')) {
|
||||
return true;
|
||||
|
@ -1392,7 +1452,15 @@ impl InnerIndexSettingsDiff {
|
|||
}
|
||||
|
||||
(existing_fields - old_faceted_fields) != (existing_fields - new_faceted_fields)
|
||||
|| self.old.localized_faceted_fields_ids != self.new.localized_faceted_fields_ids
|
||||
}
|
||||
|
||||
pub fn global_facet_settings_changed(&self) -> bool {
|
||||
self.old.localized_faceted_fields_ids != self.new.localized_faceted_fields_ids
|
||||
|| self.old.facet_search != self.new.facet_search
|
||||
}
|
||||
|
||||
pub fn reindex_facets(&self) -> bool {
|
||||
self.facet_fids_changed() || self.global_facet_settings_changed()
|
||||
}
|
||||
|
||||
pub fn reindex_vectors(&self) -> bool {
|
||||
|
@ -1432,6 +1500,8 @@ pub(crate) struct InnerIndexSettings {
|
|||
pub non_faceted_fields_ids: Vec<FieldId>,
|
||||
pub localized_searchable_fields_ids: LocalizedFieldIds,
|
||||
pub localized_faceted_fields_ids: LocalizedFieldIds,
|
||||
pub prefix_search: PrefixSearch,
|
||||
pub facet_search: bool,
|
||||
}
|
||||
|
||||
impl InnerIndexSettings {
|
||||
|
@ -1457,6 +1527,8 @@ impl InnerIndexSettings {
|
|||
Some(embedding_configs) => embedding_configs,
|
||||
None => embedders(index.embedding_configs(rtxn)?)?,
|
||||
};
|
||||
let prefix_search = index.prefix_search(rtxn)?.unwrap_or_default();
|
||||
let facet_search = index.facet_search(rtxn)?;
|
||||
let existing_fields: HashSet<_> = index
|
||||
.field_distribution(rtxn)?
|
||||
.into_iter()
|
||||
|
@ -1514,6 +1586,8 @@ impl InnerIndexSettings {
|
|||
non_faceted_fields_ids: vectors_fids.clone(),
|
||||
localized_searchable_fields_ids,
|
||||
localized_faceted_fields_ids,
|
||||
prefix_search,
|
||||
facet_search,
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -2721,6 +2795,8 @@ mod tests {
|
|||
embedder_settings,
|
||||
search_cutoff,
|
||||
localized_attributes_rules,
|
||||
prefix_search,
|
||||
facet_search,
|
||||
} = settings;
|
||||
assert!(matches!(searchable_fields, Setting::NotSet));
|
||||
assert!(matches!(displayed_fields, Setting::NotSet));
|
||||
|
@ -2746,6 +2822,8 @@ mod tests {
|
|||
assert!(matches!(embedder_settings, Setting::NotSet));
|
||||
assert!(matches!(search_cutoff, Setting::NotSet));
|
||||
assert!(matches!(localized_attributes_rules, Setting::NotSet));
|
||||
assert!(matches!(prefix_search, Setting::NotSet));
|
||||
assert!(matches!(facet_search, Setting::NotSet));
|
||||
})
|
||||
.unwrap();
|
||||
}
|
||||
|
|
|
@ -9,7 +9,7 @@ use crate::{Index, Result, SmallString32};
|
|||
pub struct WordsPrefixesFst<'t, 'i> {
|
||||
wtxn: &'t mut RwTxn<'i>,
|
||||
index: &'i Index,
|
||||
threshold: u32,
|
||||
threshold: usize,
|
||||
max_prefix_length: usize,
|
||||
}
|
||||
|
||||
|
@ -24,8 +24,8 @@ impl<'t, 'i> WordsPrefixesFst<'t, 'i> {
|
|||
///
|
||||
/// Default value is 100. This value must be higher than 50 and will be clamped
|
||||
/// to this bound otherwise.
|
||||
pub fn threshold(&mut self, value: u32) -> &mut Self {
|
||||
self.threshold = value.max(50);
|
||||
pub fn threshold(&mut self, value: usize) -> &mut Self {
|
||||
self.threshold = value;
|
||||
self
|
||||
}
|
||||
|
||||
|
@ -34,7 +34,7 @@ impl<'t, 'i> WordsPrefixesFst<'t, 'i> {
|
|||
/// Default value is `4` bytes. This value must be between 1 and 25 will be clamped
|
||||
/// to these bounds, otherwise.
|
||||
pub fn max_prefix_length(&mut self, value: usize) -> &mut Self {
|
||||
self.max_prefix_length = value.clamp(1, 25);
|
||||
self.max_prefix_length = value;
|
||||
self
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue