mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 04:17:10 +02:00
Merge #5091
5091: Settings opt out r=Kerollmops a=ManyTheFish # Pull Request Related PRD: https://www.notion.so/meilisearch/API-usage-Settings-to-opt-out-indexing-features-fff4b06b651f8108ade3f858aeb16b14?pvs=4 ## Related issue Fixes #4979 - [x] Add setting opt-out - [x] Add analytics - [x] Add tests Co-authored-by: ManyTheFish <many@meilisearch.com> Co-authored-by: Many the fish <many@meilisearch.com>
This commit is contained in:
commit
d0b2c0a523
36 changed files with 1032 additions and 114 deletions
|
@ -34,10 +34,12 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
|
|||
extract_facet_string_docids_settings(docid_fid_facet_string, indexer, settings_diff)
|
||||
} else {
|
||||
let localized_field_ids = &settings_diff.new.localized_faceted_fields_ids;
|
||||
let facet_search = settings_diff.new.facet_search;
|
||||
extract_facet_string_docids_document_update(
|
||||
docid_fid_facet_string,
|
||||
indexer,
|
||||
localized_field_ids,
|
||||
facet_search,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
@ -51,6 +53,7 @@ fn extract_facet_string_docids_document_update<R: io::Read + io::Seek>(
|
|||
docid_fid_facet_string: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
localized_field_ids: &LocalizedFieldIds,
|
||||
facet_search: bool,
|
||||
) -> Result<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)> {
|
||||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
|
@ -96,7 +99,7 @@ fn extract_facet_string_docids_document_update<R: io::Read + io::Seek>(
|
|||
let normalized_value = str::from_utf8(normalized_value_bytes)?;
|
||||
|
||||
// Facet search normalization
|
||||
{
|
||||
if facet_search {
|
||||
let locales = localized_field_ids.locales(field_id);
|
||||
let hyper_normalized_value = normalize_facet_string(normalized_value, locales);
|
||||
|
||||
|
@ -179,8 +182,10 @@ fn extract_facet_string_docids_settings<R: io::Read + io::Seek>(
|
|||
let new_locales = settings_diff.new.localized_faceted_fields_ids.locales(field_id);
|
||||
|
||||
let are_same_locales = old_locales == new_locales;
|
||||
let reindex_facet_search =
|
||||
settings_diff.new.facet_search && !settings_diff.old.facet_search;
|
||||
|
||||
if is_same_value && are_same_locales {
|
||||
if is_same_value && are_same_locales && !reindex_facet_search {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -191,18 +196,26 @@ fn extract_facet_string_docids_settings<R: io::Read + io::Seek>(
|
|||
let normalized_value = str::from_utf8(normalized_value_bytes)?;
|
||||
|
||||
// Facet search normalization
|
||||
{
|
||||
let old_hyper_normalized_value = normalize_facet_string(normalized_value, old_locales);
|
||||
let new_hyper_normalized_value = if are_same_locales {
|
||||
&old_hyper_normalized_value
|
||||
if settings_diff.new.facet_search {
|
||||
let new_hyper_normalized_value = normalize_facet_string(normalized_value, new_locales);
|
||||
let old_hyper_normalized_value;
|
||||
let old_hyper_normalized_value = if !settings_diff.old.facet_search
|
||||
|| deladd_reader.get(DelAdd::Deletion).is_none()
|
||||
{
|
||||
// if the facet search is disabled in the old settings or if no facet string is deleted,
|
||||
// we don't need to normalize the facet string.
|
||||
None
|
||||
} else if are_same_locales {
|
||||
Some(&new_hyper_normalized_value)
|
||||
} else {
|
||||
&normalize_facet_string(normalized_value, new_locales)
|
||||
old_hyper_normalized_value = normalize_facet_string(normalized_value, old_locales);
|
||||
Some(&old_hyper_normalized_value)
|
||||
};
|
||||
|
||||
let set = BTreeSet::from_iter(std::iter::once(normalized_value));
|
||||
|
||||
// if the facet string is the same, we can put the deletion and addition in the same obkv.
|
||||
if old_hyper_normalized_value == new_hyper_normalized_value.as_str() {
|
||||
if old_hyper_normalized_value == Some(&new_hyper_normalized_value) {
|
||||
// nothing to do if we delete and re-add the value.
|
||||
if is_same_value {
|
||||
continue;
|
||||
|
@ -222,7 +235,7 @@ fn extract_facet_string_docids_settings<R: io::Read + io::Seek>(
|
|||
} else {
|
||||
// if the facet string is different, we need to insert the deletion and addition in different obkv because the related key is different.
|
||||
// deletion
|
||||
if deladd_reader.get(DelAdd::Deletion).is_some() {
|
||||
if let Some(old_hyper_normalized_value) = old_hyper_normalized_value {
|
||||
// insert old value
|
||||
let val = SerdeJson::bytes_encode(&set).map_err(heed::Error::Encoding)?;
|
||||
buffer.clear();
|
||||
|
|
|
@ -80,7 +80,7 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
|
|||
let new_faceted_fids: BTreeSet<_> =
|
||||
settings_diff.new.faceted_fields_ids.iter().copied().collect();
|
||||
|
||||
if !settings_diff.settings_update_only || old_faceted_fids != new_faceted_fids {
|
||||
if !settings_diff.settings_update_only || settings_diff.reindex_facets() {
|
||||
let mut cursor = obkv_documents.into_cursor()?;
|
||||
while let Some((docid_bytes, value)) = cursor.move_on_next()? {
|
||||
let obkv = obkv::KvReader::from_slice(value);
|
||||
|
@ -112,8 +112,10 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
|
|||
(field_id, None, add_value)
|
||||
}
|
||||
EitherOrBoth::Both(&field_id, _) => {
|
||||
// during settings update, recompute the changing settings only.
|
||||
if settings_diff.settings_update_only {
|
||||
// during settings update, recompute the changing settings only unless a global change is detected.
|
||||
if settings_diff.settings_update_only
|
||||
&& !settings_diff.global_facet_settings_changed()
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
|
@ -29,6 +29,7 @@ pub use self::transform::{Transform, TransformOutput};
|
|||
use super::new::StdResult;
|
||||
use crate::documents::{obkv_to_object, DocumentsBatchReader};
|
||||
use crate::error::{Error, InternalError};
|
||||
use crate::index::{PrefixSearch, PrefixSettings};
|
||||
use crate::thread_pool_no_abort::ThreadPoolNoAbortBuilder;
|
||||
pub use crate::update::index_documents::helpers::CursorClonableMmap;
|
||||
use crate::update::{
|
||||
|
@ -82,8 +83,6 @@ pub struct IndexDocuments<'t, 'i, 'a, FP, FA> {
|
|||
|
||||
#[derive(Default, Debug, Clone)]
|
||||
pub struct IndexDocumentsConfig {
|
||||
pub words_prefix_threshold: Option<u32>,
|
||||
pub max_prefix_length: Option<usize>,
|
||||
pub words_positions_level_group_size: Option<NonZeroU32>,
|
||||
pub words_positions_min_level_size: Option<NonZeroU32>,
|
||||
pub update_method: IndexDocumentsMethod,
|
||||
|
@ -565,14 +564,32 @@ where
|
|||
self.index.words_prefixes_fst(self.wtxn)?.map_data(|cow| cow.into_owned())?;
|
||||
|
||||
// Run the words prefixes update operation.
|
||||
let mut builder = WordsPrefixesFst::new(self.wtxn, self.index);
|
||||
if let Some(value) = self.config.words_prefix_threshold {
|
||||
builder.threshold(value);
|
||||
let PrefixSettings { prefix_count_threshold, max_prefix_length, compute_prefixes } =
|
||||
self.index.prefix_settings(self.wtxn)?;
|
||||
|
||||
// If the prefix search is enabled at indexing time, we compute the prefixes.
|
||||
if compute_prefixes == PrefixSearch::IndexingTime {
|
||||
let mut builder = WordsPrefixesFst::new(self.wtxn, self.index);
|
||||
builder.threshold(prefix_count_threshold);
|
||||
builder.max_prefix_length(max_prefix_length);
|
||||
builder.execute()?;
|
||||
} else {
|
||||
// If the prefix search is disabled at indexing time, we delete the previous words prefixes fst.
|
||||
// And all the associated docids databases.
|
||||
self.index.delete_words_prefixes_fst(self.wtxn)?;
|
||||
self.index.word_prefix_docids.clear(self.wtxn)?;
|
||||
self.index.exact_word_prefix_docids.clear(self.wtxn)?;
|
||||
self.index.word_prefix_position_docids.clear(self.wtxn)?;
|
||||
self.index.word_prefix_fid_docids.clear(self.wtxn)?;
|
||||
|
||||
databases_seen += 3;
|
||||
(self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
|
||||
databases_seen,
|
||||
total_databases: TOTAL_POSTING_DATABASE_COUNT,
|
||||
});
|
||||
|
||||
return Ok(());
|
||||
}
|
||||
if let Some(value) = self.config.max_prefix_length {
|
||||
builder.max_prefix_length(value);
|
||||
}
|
||||
builder.execute()?;
|
||||
|
||||
if (self.should_abort)() {
|
||||
return Err(Error::InternalError(InternalError::AbortedIndexation));
|
||||
|
|
|
@ -667,14 +667,23 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||
let is_primary_key = |id: FieldId| -> bool { settings_diff.primary_key_id == Some(id) };
|
||||
|
||||
// If only a faceted field has been added, keep only this field.
|
||||
let must_reindex_facets = settings_diff.reindex_facets();
|
||||
let necessary_faceted_field = |id: FieldId| -> bool {
|
||||
let field_name = settings_diff.new.fields_ids_map.name(id).unwrap();
|
||||
must_reindex_facets
|
||||
&& modified_faceted_fields
|
||||
.iter()
|
||||
.any(|long| is_faceted_by(long, field_name) || is_faceted_by(field_name, long))
|
||||
};
|
||||
let global_facet_settings_changed = settings_diff.global_facet_settings_changed();
|
||||
let facet_fids_changed = settings_diff.facet_fids_changed();
|
||||
let necessary_faceted_field =
|
||||
|id: FieldId| -> bool {
|
||||
let field_name = settings_diff.new.fields_ids_map.name(id).unwrap();
|
||||
if global_facet_settings_changed {
|
||||
settings_diff.new.user_defined_faceted_fields.iter().any(|long| {
|
||||
is_faceted_by(long, field_name) || is_faceted_by(field_name, long)
|
||||
})
|
||||
} else if facet_fids_changed {
|
||||
modified_faceted_fields.iter().any(|long| {
|
||||
is_faceted_by(long, field_name) || is_faceted_by(field_name, long)
|
||||
})
|
||||
} else {
|
||||
false
|
||||
}
|
||||
};
|
||||
|
||||
// Alway provide all fields when vectors are involved because
|
||||
// we need the fields for the prompt/templating.
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue