Merge pull request #5509 from meilisearch/release-v1.14.0-tmp

Bring back changes from v1.14.0 to main
2025-07-04 04:17:10 +02:00 · 2025-04-14 13:59:23 +00:00 · 2025-04-14 13:59:23 +00:00 · a500fa053c
commit a500fa053c
parent 235556d699 61db56f785
43 changed files with 1047 additions and 508 deletions
--- a/crates/milli/src/database_stats.rs
+++ b/crates/milli/src/database_stats.rs
@ -1,8 +1,13 @@
-use heed::types::Bytes;
+use std::mem;
+
 use heed::Database;
+use heed::DatabaseStat;
 use heed::RoTxn;
+use heed::Unspecified;
 use serde::{Deserialize, Serialize};

+use crate::BEU32;
+
 #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
 #[serde(rename_all = "camelCase")]
 /// The stats of a database.
@ -20,58 +25,24 @@ impl DatabaseStats {
    ///
    /// This function iterates over the whole database and computes the stats.
    /// It is not efficient and should be cached somewhere.
-    pub(crate) fn new(database: Database<Bytes, Bytes>, rtxn: &RoTxn<'_>) -> heed::Result<Self> {
-        let mut database_stats =
-            Self { number_of_entries: 0, total_key_size: 0, total_value_size: 0 };
+    pub(crate) fn new(
+        database: Database<BEU32, Unspecified>,
+        rtxn: &RoTxn<'_>,
+    ) -> heed::Result<Self> {
+        let DatabaseStat { page_size, depth: _, branch_pages, leaf_pages, overflow_pages, entries } =
+            database.stat(rtxn)?;

-        let mut iter = database.iter(rtxn)?;
-        while let Some((key, value)) = iter.next().transpose()? {
-            let key_size = key.len() as u64;
-            let value_size = value.len() as u64;
-            database_stats.total_key_size += key_size;
-            database_stats.total_value_size += value_size;
-        }
+        // We first take the total size without overflow pages as the overflow pages contains the values and only that.
+        let total_size = (branch_pages + leaf_pages + overflow_pages) * page_size as usize;
+        // We compute an estimated size for the keys.
+        let total_key_size = entries * (mem::size_of::<u32>() + 4);
+        let total_value_size = total_size - total_key_size;

-        database_stats.number_of_entries = database.len(rtxn)?;
-
-        Ok(database_stats)
-    }
-
-    /// Recomputes the stats of the database and returns the new stats.
-    ///
-    /// This function is used to update the stats of the database when some keys are modified.
-    /// It is more efficient than the `new` function because it does not iterate over the whole database but only the modified keys comparing the before and after states.
-    pub(crate) fn recompute<I, K>(
-        mut stats: Self,
-        database: Database<Bytes, Bytes>,
-        before_rtxn: &RoTxn<'_>,
-        after_rtxn: &RoTxn<'_>,
-        modified_keys: I,
-    ) -> heed::Result<Self>
-    where
-        I: IntoIterator<Item = K>,
-        K: AsRef<[u8]>,
-    {
-        for key in modified_keys {
-            let key = key.as_ref();
-            if let Some(value) = database.get(after_rtxn, key)? {
-                let key_size = key.len() as u64;
-                let value_size = value.len() as u64;
-                stats.total_key_size = stats.total_key_size.saturating_add(key_size);
-                stats.total_value_size = stats.total_value_size.saturating_add(value_size);
-            }
-
-            if let Some(value) = database.get(before_rtxn, key)? {
-                let key_size = key.len() as u64;
-                let value_size = value.len() as u64;
-                stats.total_key_size = stats.total_key_size.saturating_sub(key_size);
-                stats.total_value_size = stats.total_value_size.saturating_sub(value_size);
-            }
-        }
-
-        stats.number_of_entries = database.len(after_rtxn)?;
-
-        Ok(stats)
+        Ok(Self {
+            number_of_entries: entries as u64,
+            total_key_size: total_key_size as u64,
+            total_value_size: total_value_size as u64,
+        })
    }

    pub fn average_key_size(&self) -> u64 {
@ -86,6 +57,10 @@ impl DatabaseStats {
        self.number_of_entries
    }

+    pub fn total_size(&self) -> u64 {
+        self.total_key_size + self.total_value_size
+    }
+
    pub fn total_key_size(&self) -> u64 {
        self.total_key_size
    }
--- a/crates/milli/src/error.rs
+++ b/crates/milli/src/error.rs
@ -154,6 +154,14 @@ and can not be more than 511 bytes.", .document_id.to_string()
    InvalidGeoField(#[from] Box<GeoError>),
    #[error("Invalid vector dimensions: expected: `{}`, found: `{}`.", .expected, .found)]
    InvalidVectorDimensions { expected: usize, found: usize },
+    #[error("Invalid vector dimensions in document with id `{document_id}` in `._vectors.{embedder_name}`.\n  - note: embedding #{embedding_index} has dimensions {found}\n  - note: embedder `{embedder_name}` requires {expected}")]
+    InvalidIndexingVectorDimensions {
+        embedder_name: String,
+        document_id: String,
+        embedding_index: usize,
+        expected: usize,
+        found: usize,
+    },
    #[error("The `_vectors` field in the document with id: `{document_id}` is not an object. Was expecting an object with a key for each embedder with manually provided vectors, but instead got `{value}`")]
    InvalidVectorsMapType { document_id: String, value: Value },
    #[error("Bad embedder configuration in the document with id: `{document_id}`. {error}")]
--- a/crates/milli/src/index.rs
+++ b/crates/milli/src/index.rs
@ -3,8 +3,9 @@ use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
 use std::fs::File;
 use std::path::Path;

-use heed::{types::*, WithoutTls};
+use heed::{types::*, DatabaseStat, WithoutTls};
 use heed::{CompactionOption, Database, RoTxn, RwTxn, Unspecified};
+use indexmap::IndexMap;
 use roaring::RoaringBitmap;
 use rstar::RTree;
 use serde::{Deserialize, Serialize};
@ -410,38 +411,6 @@ impl Index {
        Ok(count.unwrap_or_default())
    }

-    /// Updates the stats of the documents database based on the previous stats and the modified docids.
-    pub fn update_documents_stats(
-        &self,
-        wtxn: &mut RwTxn<'_>,
-        modified_docids: roaring::RoaringBitmap,
-    ) -> Result<()> {
-        let before_rtxn = self.read_txn()?;
-        let document_stats = match self.documents_stats(&before_rtxn)? {
-            Some(before_stats) => DatabaseStats::recompute(
-                before_stats,
-                self.documents.remap_types(),
-                &before_rtxn,
-                wtxn,
-                modified_docids.iter().map(|docid| docid.to_be_bytes()),
-            )?,
-            None => {
-                // This should never happen when there are already documents in the index, the documents stats should be present.
-                // If it happens, it means that the index was not properly initialized/upgraded.
-                debug_assert_eq!(
-                    self.documents.len(&before_rtxn)?,
-                    0,
-                    "The documents stats should be present when there are documents in the index"
-                );
-                tracing::warn!("No documents stats found, creating new ones");
-                DatabaseStats::new(self.documents.remap_types(), &*wtxn)?
-            }
-        };
-
-        self.put_documents_stats(wtxn, document_stats)?;
-        Ok(())
-    }
-
    /// Writes the stats of the documents database.
    pub fn put_documents_stats(
        &self,
@ -1755,6 +1724,122 @@ impl Index {
        }
        Ok(stats)
    }
+
+    /// Check if the word is indexed in the index.
+    ///
+    /// This function checks if the word is indexed in the index by looking at the word_docids and exact_word_docids.
+    ///
+    /// # Arguments
+    ///
+    /// * `rtxn`: The read transaction.
+    /// * `word`: The word to check.
+    pub fn contains_word(&self, rtxn: &RoTxn<'_>, word: &str) -> Result<bool> {
+        Ok(self.word_docids.remap_data_type::<DecodeIgnore>().get(rtxn, word)?.is_some()
+            || self.exact_word_docids.remap_data_type::<DecodeIgnore>().get(rtxn, word)?.is_some())
+    }
+
+    /// Returns the sizes in bytes of each of the index database at the given rtxn.
+    pub fn database_sizes(&self, rtxn: &RoTxn<'_>) -> heed::Result<IndexMap<&'static str, usize>> {
+        let Self {
+            env: _,
+            main,
+            external_documents_ids,
+            word_docids,
+            exact_word_docids,
+            word_prefix_docids,
+            exact_word_prefix_docids,
+            word_pair_proximity_docids,
+            word_position_docids,
+            word_fid_docids,
+            word_prefix_position_docids,
+            word_prefix_fid_docids,
+            field_id_word_count_docids,
+            facet_id_f64_docids,
+            facet_id_string_docids,
+            facet_id_normalized_string_strings,
+            facet_id_string_fst,
+            facet_id_exists_docids,
+            facet_id_is_null_docids,
+            facet_id_is_empty_docids,
+            field_id_docid_facet_f64s,
+            field_id_docid_facet_strings,
+            vector_arroy,
+            embedder_category_id,
+            documents,
+        } = self;
+
+        fn compute_size(stats: DatabaseStat) -> usize {
+            let DatabaseStat {
+                page_size,
+                depth: _,
+                branch_pages,
+                leaf_pages,
+                overflow_pages,
+                entries: _,
+            } = stats;
+
+            (branch_pages + leaf_pages + overflow_pages) * page_size as usize
+        }
+
+        let mut sizes = IndexMap::new();
+        sizes.insert("main", main.stat(rtxn).map(compute_size)?);
+        sizes
+            .insert("external_documents_ids", external_documents_ids.stat(rtxn).map(compute_size)?);
+        sizes.insert("word_docids", word_docids.stat(rtxn).map(compute_size)?);
+        sizes.insert("exact_word_docids", exact_word_docids.stat(rtxn).map(compute_size)?);
+        sizes.insert("word_prefix_docids", word_prefix_docids.stat(rtxn).map(compute_size)?);
+        sizes.insert(
+            "exact_word_prefix_docids",
+            exact_word_prefix_docids.stat(rtxn).map(compute_size)?,
+        );
+        sizes.insert(
+            "word_pair_proximity_docids",
+            word_pair_proximity_docids.stat(rtxn).map(compute_size)?,
+        );
+        sizes.insert("word_position_docids", word_position_docids.stat(rtxn).map(compute_size)?);
+        sizes.insert("word_fid_docids", word_fid_docids.stat(rtxn).map(compute_size)?);
+        sizes.insert(
+            "word_prefix_position_docids",
+            word_prefix_position_docids.stat(rtxn).map(compute_size)?,
+        );
+        sizes
+            .insert("word_prefix_fid_docids", word_prefix_fid_docids.stat(rtxn).map(compute_size)?);
+        sizes.insert(
+            "field_id_word_count_docids",
+            field_id_word_count_docids.stat(rtxn).map(compute_size)?,
+        );
+        sizes.insert("facet_id_f64_docids", facet_id_f64_docids.stat(rtxn).map(compute_size)?);
+        sizes
+            .insert("facet_id_string_docids", facet_id_string_docids.stat(rtxn).map(compute_size)?);
+        sizes.insert(
+            "facet_id_normalized_string_strings",
+            facet_id_normalized_string_strings.stat(rtxn).map(compute_size)?,
+        );
+        sizes.insert("facet_id_string_fst", facet_id_string_fst.stat(rtxn).map(compute_size)?);
+        sizes
+            .insert("facet_id_exists_docids", facet_id_exists_docids.stat(rtxn).map(compute_size)?);
+        sizes.insert(
+            "facet_id_is_null_docids",
+            facet_id_is_null_docids.stat(rtxn).map(compute_size)?,
+        );
+        sizes.insert(
+            "facet_id_is_empty_docids",
+            facet_id_is_empty_docids.stat(rtxn).map(compute_size)?,
+        );
+        sizes.insert(
+            "field_id_docid_facet_f64s",
+            field_id_docid_facet_f64s.stat(rtxn).map(compute_size)?,
+        );
+        sizes.insert(
+            "field_id_docid_facet_strings",
+            field_id_docid_facet_strings.stat(rtxn).map(compute_size)?,
+        );
+        sizes.insert("vector_arroy", vector_arroy.stat(rtxn).map(compute_size)?);
+        sizes.insert("embedder_category_id", embedder_category_id.stat(rtxn).map(compute_size)?);
+        sizes.insert("documents", documents.stat(rtxn).map(compute_size)?);
+
+        Ok(sizes)
+    }
 }

 #[derive(Debug, Deserialize, Serialize)]
--- a/crates/milli/src/progress.rs
+++ b/crates/milli/src/progress.rs
@ -190,8 +190,18 @@ macro_rules! make_atomic_progress {
    };
 }

-make_atomic_progress!(Document alias AtomicDocumentStep => "document" );
-make_atomic_progress!(Payload alias AtomicPayloadStep => "payload" );
+make_atomic_progress!(Document alias AtomicDocumentStep => "document");
+make_atomic_progress!(Payload alias AtomicPayloadStep => "payload");
+
+make_enum_progress! {
+    pub enum MergingWordCache {
+        WordDocids,
+        WordFieldIdDocids,
+        ExactWordDocids,
+        WordPositionDocids,
+        FieldIdWordCountDocids,
+    }
+}

 #[derive(Debug, Serialize, Clone, ToSchema)]
 #[serde(rename_all = "camelCase")]
--- a/crates/milli/src/search/new/bucket_sort.rs
+++ b/crates/milli/src/search/new/bucket_sort.rs
@ -173,16 +173,18 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
                ranking_rule_scores.push(ScoreDetails::Skipped);

                // remove candidates from the universe without adding them to result if their score is below the threshold
-                if let Some(ranking_score_threshold) = ranking_score_threshold {
-                    let current_score = ScoreDetails::global_score(ranking_rule_scores.iter());
-                    if current_score < ranking_score_threshold {
-                        all_candidates -= bucket | &ranking_rule_universes[cur_ranking_rule_index];
-                        back!();
-                        continue;
-                    }
-                }
+                let is_below_threshold =
+                    ranking_score_threshold.is_some_and(|ranking_score_threshold| {
+                        let current_score = ScoreDetails::global_score(ranking_rule_scores.iter());
+                        current_score < ranking_score_threshold
+                    });

-                maybe_add_to_results!(bucket);
+                if is_below_threshold {
+                    all_candidates -= &bucket;
+                    all_candidates -= &ranking_rule_universes[cur_ranking_rule_index];
+                } else {
+                    maybe_add_to_results!(bucket);
+                }

                ranking_rule_scores.pop();

@ -237,23 +239,24 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
        );

        // remove candidates from the universe without adding them to result if their score is below the threshold
-        if let Some(ranking_score_threshold) = ranking_score_threshold {
+        let is_below_threshold = ranking_score_threshold.is_some_and(|ranking_score_threshold| {
            let current_score = ScoreDetails::global_score(ranking_rule_scores.iter());
-            if current_score < ranking_score_threshold {
-                all_candidates -=
-                    next_bucket.candidates | &ranking_rule_universes[cur_ranking_rule_index];
-                back!();
-                continue;
-            }
-        }
+            current_score < ranking_score_threshold
+        });

        ranking_rule_universes[cur_ranking_rule_index] -= &next_bucket.candidates;

        if cur_ranking_rule_index == ranking_rules_len - 1
            || (scoring_strategy == ScoringStrategy::Skip && next_bucket.candidates.len() <= 1)
            || cur_offset + (next_bucket.candidates.len() as usize) < from
+            || is_below_threshold
        {
-            maybe_add_to_results!(next_bucket.candidates);
+            if is_below_threshold {
+                all_candidates -= &next_bucket.candidates;
+                all_candidates -= &ranking_rule_universes[cur_ranking_rule_index];
+            } else {
+                maybe_add_to_results!(next_bucket.candidates);
+            }
            ranking_rule_scores.pop();
            continue;
        }
--- a/crates/milli/src/search/new/query_term/compute_derivations.rs
+++ b/crates/milli/src/search/new/query_term/compute_derivations.rs
@ -1,10 +1,12 @@
 use std::borrow::Cow;
+use std::cmp::Ordering;
 use std::collections::BTreeSet;
 use std::ops::ControlFlow;

 use fst::automaton::Str;
-use fst::{Automaton, IntoStreamer, Streamer};
+use fst::{IntoStreamer, Streamer};
 use heed::types::DecodeIgnore;
+use itertools::{merge_join_by, EitherOrBoth};

 use super::{OneTypoTerm, Phrase, QueryTerm, ZeroTypoTerm};
 use crate::search::fst_utils::{Complement, Intersection, StartsWith, Union};
@ -16,16 +18,10 @@ use crate::{Result, MAX_WORD_LENGTH};

 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
 pub enum NumberOfTypos {
-    Zero,
    One,
    Two,
 }

-pub enum ZeroOrOneTypo {
-    Zero,
-    One,
-}
-
 impl Interned<QueryTerm> {
    pub fn compute_fully_if_needed(self, ctx: &mut SearchContext<'_>) -> Result<()> {
        let s = ctx.term_interner.get_mut(self);
@ -47,34 +43,45 @@ impl Interned<QueryTerm> {
 }

 fn find_zero_typo_prefix_derivations(
+    ctx: &mut SearchContext<'_>,
    word_interned: Interned<String>,
-    fst: fst::Set<Cow<'_, [u8]>>,
-    word_interner: &mut DedupInterner<String>,
    mut visit: impl FnMut(Interned<String>) -> Result<ControlFlow<()>>,
 ) -> Result<()> {
-    let word = word_interner.get(word_interned).to_owned();
+    let word = ctx.word_interner.get(word_interned).to_owned();
    let word = word.as_str();
-    let prefix = Str::new(word).starts_with();
-    let mut stream = fst.search(prefix).into_stream();

-    while let Some(derived_word) = stream.next() {
-        let derived_word = std::str::from_utf8(derived_word)?.to_owned();
-        let derived_word_interned = word_interner.insert(derived_word);
-        if derived_word_interned != word_interned {
-            let cf = visit(derived_word_interned)?;
-            if cf.is_break() {
-                break;
+    let words =
+        ctx.index.word_docids.remap_data_type::<DecodeIgnore>().prefix_iter(ctx.txn, word)?;
+    let exact_words =
+        ctx.index.exact_word_docids.remap_data_type::<DecodeIgnore>().prefix_iter(ctx.txn, word)?;
+
+    for eob in merge_join_by(words, exact_words, |lhs, rhs| match (lhs, rhs) {
+        (Ok((word, _)), Ok((exact_word, _))) => word.cmp(exact_word),
+        (Err(_), _) | (_, Err(_)) => Ordering::Equal,
+    }) {
+        match eob {
+            EitherOrBoth::Both(kv, _) | EitherOrBoth::Left(kv) | EitherOrBoth::Right(kv) => {
+                let (derived_word, _) = kv?;
+                let derived_word = derived_word.to_string();
+                let derived_word_interned = ctx.word_interner.insert(derived_word);
+                if derived_word_interned != word_interned {
+                    let cf = visit(derived_word_interned)?;
+                    if cf.is_break() {
+                        break;
+                    }
+                }
            }
        }
    }
+
    Ok(())
 }

-fn find_zero_one_typo_derivations(
+fn find_one_typo_derivations(
    ctx: &mut SearchContext<'_>,
    word_interned: Interned<String>,
    is_prefix: bool,
-    mut visit: impl FnMut(Interned<String>, ZeroOrOneTypo) -> Result<ControlFlow<()>>,
+    mut visit: impl FnMut(Interned<String>) -> Result<ControlFlow<()>>,
 ) -> Result<()> {
    let fst = ctx.get_words_fst()?;
    let word = ctx.word_interner.get(word_interned).to_owned();
@ -89,16 +96,9 @@ fn find_zero_one_typo_derivations(
        let derived_word = ctx.word_interner.insert(derived_word.to_owned());
        let d = dfa.distance(state.1);
        match d.to_u8() {
-            0 => {
-                if derived_word != word_interned {
-                    let cf = visit(derived_word, ZeroOrOneTypo::Zero)?;
-                    if cf.is_break() {
-                        break;
-                    }
-                }
-            }
+            0 => (),
            1 => {
-                let cf = visit(derived_word, ZeroOrOneTypo::One)?;
+                let cf = visit(derived_word)?;
                if cf.is_break() {
                    break;
                }
@ -111,7 +111,7 @@ fn find_zero_one_typo_derivations(
    Ok(())
 }

-fn find_zero_one_two_typo_derivations(
+fn find_one_two_typo_derivations(
    word_interned: Interned<String>,
    is_prefix: bool,
    fst: fst::Set<Cow<'_, [u8]>>,
@ -144,14 +144,7 @@ fn find_zero_one_two_typo_derivations(
            // correct distance
            let d = second_dfa.distance((state.1).0);
            match d.to_u8() {
-                0 => {
-                    if derived_word_interned != word_interned {
-                        let cf = visit(derived_word_interned, NumberOfTypos::Zero)?;
-                        if cf.is_break() {
-                            break;
-                        }
-                    }
-                }
+                0 => (),
                1 => {
                    let cf = visit(derived_word_interned, NumberOfTypos::One)?;
                    if cf.is_break() {
@ -194,8 +187,6 @@ pub fn partially_initialized_term_from_word(
        });
    }

-    let fst = ctx.index.words_fst(ctx.txn)?;
-
    let use_prefix_db = is_prefix
        && (ctx
            .index
@ -215,24 +206,19 @@ pub fn partially_initialized_term_from_word(
    let mut zero_typo = None;
    let mut prefix_of = BTreeSet::new();

-    if fst.contains(word) || ctx.index.exact_word_docids.get(ctx.txn, word)?.is_some() {
+    if ctx.index.contains_word(ctx.txn, word)? {
        zero_typo = Some(word_interned);
    }

    if is_prefix && use_prefix_db.is_none() {
-        find_zero_typo_prefix_derivations(
-            word_interned,
-            fst,
-            &mut ctx.word_interner,
-            |derived_word| {
-                if prefix_of.len() < limits::MAX_PREFIX_COUNT {
-                    prefix_of.insert(derived_word);
-                    Ok(ControlFlow::Continue(()))
-                } else {
-                    Ok(ControlFlow::Break(()))
-                }
-            },
-        )?;
+        find_zero_typo_prefix_derivations(ctx, word_interned, |derived_word| {
+            if prefix_of.len() < limits::MAX_PREFIX_COUNT {
+                prefix_of.insert(derived_word);
+                Ok(ControlFlow::Continue(()))
+            } else {
+                Ok(ControlFlow::Break(()))
+            }
+        })?;
    }
    let synonyms = ctx.index.synonyms(ctx.txn)?;
    let mut synonym_word_count = 0;
@ -295,18 +281,13 @@ impl Interned<QueryTerm> {
        let mut one_typo_words = BTreeSet::new();

        if *max_nbr_typos > 0 {
-            find_zero_one_typo_derivations(ctx, original, is_prefix, |derived_word, nbr_typos| {
-                match nbr_typos {
-                    ZeroOrOneTypo::Zero => {}
-                    ZeroOrOneTypo::One => {
-                        if one_typo_words.len() < limits::MAX_ONE_TYPO_COUNT {
-                            one_typo_words.insert(derived_word);
-                        } else {
-                            return Ok(ControlFlow::Break(()));
-                        }
-                    }
+            find_one_typo_derivations(ctx, original, is_prefix, |derived_word| {
+                if one_typo_words.len() < limits::MAX_ONE_TYPO_COUNT {
+                    one_typo_words.insert(derived_word);
+                    Ok(ControlFlow::Continue(()))
+                } else {
+                    Ok(ControlFlow::Break(()))
                }
-                Ok(ControlFlow::Continue(()))
            })?;
        }

@ -357,7 +338,7 @@ impl Interned<QueryTerm> {
        let mut two_typo_words = BTreeSet::new();

        if *max_nbr_typos > 0 {
-            find_zero_one_two_typo_derivations(
+            find_one_two_typo_derivations(
                *original,
                *is_prefix,
                ctx.index.words_fst(ctx.txn)?,
@ -370,7 +351,6 @@ impl Interned<QueryTerm> {
                        return Ok(ControlFlow::Break(()));
                    }
                    match nbr_typos {
-                        NumberOfTypos::Zero => {}
                        NumberOfTypos::One => {
                            if one_typo_words.len() < limits::MAX_ONE_TYPO_COUNT {
                                one_typo_words.insert(derived_word);
--- a/crates/milli/src/update/index_documents/mod.rs
+++ b/crates/milli/src/update/index_documents/mod.rs
@ -28,6 +28,7 @@ pub use self::helpers::*;
 pub use self::transform::{Transform, TransformOutput};
 use super::facet::clear_facet_levels_based_on_settings_diff;
 use super::new::StdResult;
+use crate::database_stats::DatabaseStats;
 use crate::documents::{obkv_to_object, DocumentsBatchReader};
 use crate::error::{Error, InternalError};
 use crate::index::{PrefixSearch, PrefixSettings};
@ -476,7 +477,8 @@ where

        if !settings_diff.settings_update_only {
            // Update the stats of the documents database when there is a document update.
-            self.index.update_documents_stats(self.wtxn, modified_docids)?;
+            let stats = DatabaseStats::new(self.index.documents.remap_data_type(), self.wtxn)?;
+            self.index.put_documents_stats(self.wtxn, stats)?;
        }
        // We write the field distribution into the main database
        self.index.put_field_distribution(self.wtxn, &field_distribution)?;
--- a/crates/milli/src/update/new/document_change.rs
+++ b/crates/milli/src/update/new/document_change.rs
@ -1,5 +1,6 @@
 use bumpalo::Bump;
 use heed::RoTxn;
+use serde_json::Value;

 use super::document::{
    Document as _, DocumentFromDb, DocumentFromVersions, MergedDocument, Versions,
@ -10,7 +11,7 @@ use super::vector_document::{
 use crate::attribute_patterns::PatternMatch;
 use crate::documents::FieldIdMapper;
 use crate::vector::EmbeddingConfigs;
-use crate::{DocumentId, Index, Result};
+use crate::{DocumentId, Index, InternalError, Result};

 pub enum DocumentChange<'doc> {
    Deletion(Deletion<'doc>),
@ -243,6 +244,29 @@ impl<'doc> Update<'doc> {
        Ok(has_deleted_fields)
    }

+    /// Returns `true` if the geo fields have changed.
+    pub fn has_changed_for_geo_fields<'t, Mapper: FieldIdMapper>(
+        &self,
+        rtxn: &'t RoTxn,
+        index: &'t Index,
+        mapper: &'t Mapper,
+    ) -> Result<bool> {
+        let current = self.current(rtxn, index, mapper)?;
+        let current_geo = current.geo_field()?;
+        let updated_geo = self.only_changed_fields().geo_field()?;
+        match (current_geo, updated_geo) {
+            (Some(current_geo), Some(updated_geo)) => {
+                let current: Value =
+                    serde_json::from_str(current_geo.get()).map_err(InternalError::SerdeJson)?;
+                let updated: Value =
+                    serde_json::from_str(updated_geo.get()).map_err(InternalError::SerdeJson)?;
+                Ok(current != updated)
+            }
+            (None, None) => Ok(false),
+            _ => Ok(true),
+        }
+    }
+
    pub fn only_changed_vectors(
        &self,
        doc_alloc: &'doc Bump,
--- a/crates/milli/src/update/new/extract/faceted/extract_facets.rs
+++ b/crates/milli/src/update/new/extract/faceted/extract_facets.rs
@ -117,7 +117,7 @@ impl FacetedDocidsExtractor {
                },
            ),
            DocumentChange::Update(inner) => {
-                if !inner.has_changed_for_fields(
+                let has_changed = inner.has_changed_for_fields(
                    &mut |field_name| {
                        match_faceted_field(
                            field_name,
@ -130,7 +130,10 @@ impl FacetedDocidsExtractor {
                    rtxn,
                    index,
                    context.db_fields_ids_map,
-                )? {
+                )?;
+                let has_changed_for_geo_fields =
+                    inner.has_changed_for_geo_fields(rtxn, index, context.db_fields_ids_map)?;
+                if !has_changed && !has_changed_for_geo_fields {
                    return Ok(());
                }

--- a/crates/milli/src/update/new/extract/vectors/mod.rs
+++ b/crates/milli/src/update/new/extract/vectors/mod.rs
@ -121,6 +121,7 @@ impl<'extractor> Extractor<'extractor> for EmbeddingExtractor<'_, '_> {
                            // do we have set embeddings?
                            if let Some(embeddings) = new_vectors.embeddings {
                                chunks.set_vectors(
+                                    update.external_document_id(),
                                    update.docid(),
                                    embeddings
                                        .into_vec(&context.doc_alloc, embedder_name)
@ -128,7 +129,7 @@ impl<'extractor> Extractor<'extractor> for EmbeddingExtractor<'_, '_> {
                                            document_id: update.external_document_id().to_string(),
                                            error: error.to_string(),
                                        })?,
-                                );
+                                )?;
                            } else if new_vectors.regenerate {
                                let new_rendered = prompt.render_document(
                                    update.external_document_id(),
@ -209,6 +210,7 @@ impl<'extractor> Extractor<'extractor> for EmbeddingExtractor<'_, '_> {
                            chunks.set_regenerate(insertion.docid(), new_vectors.regenerate);
                            if let Some(embeddings) = new_vectors.embeddings {
                                chunks.set_vectors(
+                                    insertion.external_document_id(),
                                    insertion.docid(),
                                    embeddings
                                        .into_vec(&context.doc_alloc, embedder_name)
@ -218,7 +220,7 @@ impl<'extractor> Extractor<'extractor> for EmbeddingExtractor<'_, '_> {
                                                .to_string(),
                                            error: error.to_string(),
                                        })?,
-                                );
+                                )?;
                            } else if new_vectors.regenerate {
                                let rendered = prompt.render_document(
                                    insertion.external_document_id(),
@ -273,6 +275,7 @@ struct Chunks<'a, 'b, 'extractor> {
    embedder: &'a Embedder,
    embedder_id: u8,
    embedder_name: &'a str,
+    dimensions: usize,
    prompt: &'a Prompt,
    possible_embedding_mistakes: &'a PossibleEmbeddingMistakes,
    user_provided: &'a RefCell<EmbeddingExtractorData<'extractor>>,
@ -297,6 +300,7 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
        let capacity = embedder.prompt_count_in_chunk_hint() * embedder.chunk_count_hint();
        let texts = BVec::with_capacity_in(capacity, doc_alloc);
        let ids = BVec::with_capacity_in(capacity, doc_alloc);
+        let dimensions = embedder.dimensions();
        Self {
            texts,
            ids,
@ -309,6 +313,7 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
            embedder_name,
            user_provided,
            has_manual_generation: None,
+            dimensions,
        }
    }

@ -490,7 +495,25 @@ impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
        }
    }

-    fn set_vectors(&self, docid: DocumentId, embeddings: Vec<Embedding>) {
+    fn set_vectors(
+        &self,
+        external_docid: &'a str,
+        docid: DocumentId,
+        embeddings: Vec<Embedding>,
+    ) -> Result<()> {
+        for (embedding_index, embedding) in embeddings.iter().enumerate() {
+            if embedding.len() != self.dimensions {
+                return Err(UserError::InvalidIndexingVectorDimensions {
+                    expected: self.dimensions,
+                    found: embedding.len(),
+                    embedder_name: self.embedder_name.to_string(),
+                    document_id: external_docid.to_string(),
+                    embedding_index,
+                }
+                .into());
+            }
+        }
        self.sender.set_vectors(docid, self.embedder_id, embeddings).unwrap();
+        Ok(())
    }
 }
--- a/crates/milli/src/update/new/indexer/extract.rs
+++ b/crates/milli/src/update/new/indexer/extract.rs
@ -13,6 +13,7 @@ use super::super::thread_local::{FullySend, ThreadLocal};
 use super::super::FacetFieldIdsDelta;
 use super::document_changes::{extract, DocumentChanges, IndexingContext};
 use crate::index::IndexEmbeddingConfig;
+use crate::progress::MergingWordCache;
 use crate::proximity::ProximityPrecision;
 use crate::update::new::extract::EmbeddingExtractor;
 use crate::update::new::merger::merge_and_send_rtree;
@ -96,6 +97,7 @@ where
        {
            let span = tracing::trace_span!(target: "indexing::documents::merge", parent: &indexer_span, "faceted");
            let _entered = span.enter();
+            indexing_context.progress.update_progress(IndexingStep::MergingFacetCaches);

            facet_field_ids_delta = merge_and_send_facet_docids(
                caches,
@ -117,7 +119,6 @@ where
        } = {
            let span = tracing::trace_span!(target: "indexing::documents::extract", "word_docids");
            let _entered = span.enter();
-
            WordDocidsExtractors::run_extraction(
                document_changes,
                indexing_context,
@ -126,9 +127,13 @@ where
            )?
        };

+        indexing_context.progress.update_progress(IndexingStep::MergingWordCaches);
+
        {
            let span = tracing::trace_span!(target: "indexing::documents::merge", "word_docids");
            let _entered = span.enter();
+            indexing_context.progress.update_progress(MergingWordCache::WordDocids);
+
            merge_and_send_docids(
                word_docids,
                index.word_docids.remap_types(),
@ -142,6 +147,8 @@ where
            let span =
                tracing::trace_span!(target: "indexing::documents::merge", "word_fid_docids");
            let _entered = span.enter();
+            indexing_context.progress.update_progress(MergingWordCache::WordFieldIdDocids);
+
            merge_and_send_docids(
                word_fid_docids,
                index.word_fid_docids.remap_types(),
@ -155,6 +162,8 @@ where
            let span =
                tracing::trace_span!(target: "indexing::documents::merge", "exact_word_docids");
            let _entered = span.enter();
+            indexing_context.progress.update_progress(MergingWordCache::ExactWordDocids);
+
            merge_and_send_docids(
                exact_word_docids,
                index.exact_word_docids.remap_types(),
@ -168,6 +177,8 @@ where
            let span =
                tracing::trace_span!(target: "indexing::documents::merge", "word_position_docids");
            let _entered = span.enter();
+            indexing_context.progress.update_progress(MergingWordCache::WordPositionDocids);
+
            merge_and_send_docids(
                word_position_docids,
                index.word_position_docids.remap_types(),
@ -181,6 +192,8 @@ where
            let span =
                tracing::trace_span!(target: "indexing::documents::merge", "fid_word_count_docids");
            let _entered = span.enter();
+            indexing_context.progress.update_progress(MergingWordCache::FieldIdWordCountDocids);
+
            merge_and_send_docids(
                fid_word_count_docids,
                index.field_id_word_count_docids.remap_types(),
@ -210,6 +223,7 @@ where
        {
            let span = tracing::trace_span!(target: "indexing::documents::merge", "word_pair_proximity_docids");
            let _entered = span.enter();
+            indexing_context.progress.update_progress(IndexingStep::MergingWordProximity);

            merge_and_send_docids(
                caches,
--- a/crates/milli/src/update/new/indexer/mod.rs
+++ b/crates/milli/src/update/new/indexer/mod.rs
@ -234,7 +234,6 @@ where
        embedders,
        field_distribution,
        document_ids,
-        modified_docids,
    )?;

    Ok(congestion)
--- a/crates/milli/src/update/new/indexer/post_processing.rs
+++ b/crates/milli/src/update/new/indexer/post_processing.rs
@ -7,12 +7,13 @@ use itertools::{merge_join_by, EitherOrBoth};
 use super::document_changes::IndexingContext;
 use crate::facet::FacetType;
 use crate::index::main_key::{WORDS_FST_KEY, WORDS_PREFIXES_FST_KEY};
+use crate::progress::Progress;
 use crate::update::del_add::DelAdd;
 use crate::update::facet::new_incremental::FacetsUpdateIncremental;
 use crate::update::facet::{FACET_GROUP_SIZE, FACET_MAX_GROUP_SIZE, FACET_MIN_LEVEL_SIZE};
 use crate::update::new::facet_search_builder::FacetSearchBuilder;
 use crate::update::new::merger::FacetFieldIdDelta;
-use crate::update::new::steps::IndexingStep;
+use crate::update::new::steps::{IndexingStep, PostProcessingFacets, PostProcessingWords};
 use crate::update::new::word_fst_builder::{PrefixData, PrefixDelta, WordFstBuilder};
 use crate::update::new::words_prefix_docids::{
    compute_exact_word_prefix_docids, compute_word_prefix_docids, compute_word_prefix_fid_docids,
@ -33,11 +34,23 @@ where
 {
    let index = indexing_context.index;
    indexing_context.progress.update_progress(IndexingStep::PostProcessingFacets);
-    compute_facet_level_database(index, wtxn, facet_field_ids_delta, &mut global_fields_ids_map)?;
-    compute_facet_search_database(index, wtxn, global_fields_ids_map)?;
+    compute_facet_level_database(
+        index,
+        wtxn,
+        facet_field_ids_delta,
+        &mut global_fields_ids_map,
+        indexing_context.progress,
+    )?;
+    compute_facet_search_database(index, wtxn, global_fields_ids_map, indexing_context.progress)?;
    indexing_context.progress.update_progress(IndexingStep::PostProcessingWords);
-    if let Some(prefix_delta) = compute_word_fst(index, wtxn)? {
-        compute_prefix_database(index, wtxn, prefix_delta, indexing_context.grenad_parameters)?;
+    if let Some(prefix_delta) = compute_word_fst(index, wtxn, indexing_context.progress)? {
+        compute_prefix_database(
+            index,
+            wtxn,
+            prefix_delta,
+            indexing_context.grenad_parameters,
+            indexing_context.progress,
+        )?;
    };
    Ok(())
 }
@ -48,21 +61,32 @@ fn compute_prefix_database(
    wtxn: &mut RwTxn,
    prefix_delta: PrefixDelta,
    grenad_parameters: &GrenadParameters,
+    progress: &Progress,
 ) -> Result<()> {
    let PrefixDelta { modified, deleted } = prefix_delta;
-    // Compute word prefix docids
+
+    progress.update_progress(PostProcessingWords::WordPrefixDocids);
    compute_word_prefix_docids(wtxn, index, &modified, &deleted, grenad_parameters)?;
-    // Compute exact word prefix docids
+
+    progress.update_progress(PostProcessingWords::ExactWordPrefixDocids);
    compute_exact_word_prefix_docids(wtxn, index, &modified, &deleted, grenad_parameters)?;
-    // Compute word prefix fid docids
+
+    progress.update_progress(PostProcessingWords::WordPrefixFieldIdDocids);
    compute_word_prefix_fid_docids(wtxn, index, &modified, &deleted, grenad_parameters)?;
-    // Compute word prefix position docids
+
+    progress.update_progress(PostProcessingWords::WordPrefixPositionDocids);
    compute_word_prefix_position_docids(wtxn, index, &modified, &deleted, grenad_parameters)
 }

 #[tracing::instrument(level = "trace", skip_all, target = "indexing")]
-fn compute_word_fst(index: &Index, wtxn: &mut RwTxn) -> Result<Option<PrefixDelta>> {
+fn compute_word_fst(
+    index: &Index,
+    wtxn: &mut RwTxn,
+    progress: &Progress,
+) -> Result<Option<PrefixDelta>> {
    let rtxn = index.read_txn()?;
+    progress.update_progress(PostProcessingWords::WordFst);
+
    let words_fst = index.words_fst(&rtxn)?;
    let mut word_fst_builder = WordFstBuilder::new(&words_fst)?;
    let prefix_settings = index.prefix_settings(&rtxn)?;
@ -112,8 +136,10 @@ fn compute_facet_search_database(
    index: &Index,
    wtxn: &mut RwTxn,
    global_fields_ids_map: GlobalFieldsIdsMap,
+    progress: &Progress,
 ) -> Result<()> {
    let rtxn = index.read_txn()?;
+    progress.update_progress(PostProcessingFacets::FacetSearch);

    // if the facet search is not enabled, we can skip the rest of the function
    if !index.facet_search(wtxn)? {
@ -171,10 +197,16 @@ fn compute_facet_level_database(
    wtxn: &mut RwTxn,
    mut facet_field_ids_delta: FacetFieldIdsDelta,
    global_fields_ids_map: &mut GlobalFieldsIdsMap,
+    progress: &Progress,
 ) -> Result<()> {
    let rtxn = index.read_txn()?;
+
    let filterable_attributes_rules = index.filterable_attributes_rules(&rtxn)?;
-    for (fid, delta) in facet_field_ids_delta.consume_facet_string_delta() {
+    let mut deltas: Vec<_> = facet_field_ids_delta.consume_facet_string_delta().collect();
+    // We move all bulks at the front and incrementals (others) at the end.
+    deltas.sort_by_key(|(_, delta)| if let FacetFieldIdDelta::Bulk = delta { 0 } else { 1 });
+
+    for (fid, delta) in deltas {
        // skip field ids that should not be facet leveled
        let Some(metadata) = global_fields_ids_map.metadata(fid) else {
            continue;
@ -187,11 +219,13 @@ fn compute_facet_level_database(
        let _entered = span.enter();
        match delta {
            FacetFieldIdDelta::Bulk => {
+                progress.update_progress(PostProcessingFacets::StringsBulk);
                tracing::debug!(%fid, "bulk string facet processing");
                FacetsUpdateBulk::new_not_updating_level_0(index, vec![fid], FacetType::String)
                    .execute(wtxn)?
            }
            FacetFieldIdDelta::Incremental(delta_data) => {
+                progress.update_progress(PostProcessingFacets::StringsIncremental);
                tracing::debug!(%fid, len=%delta_data.len(), "incremental string facet processing");
                FacetsUpdateIncremental::new(
                    index,
@ -207,16 +241,22 @@ fn compute_facet_level_database(
        }
    }

-    for (fid, delta) in facet_field_ids_delta.consume_facet_number_delta() {
+    let mut deltas: Vec<_> = facet_field_ids_delta.consume_facet_number_delta().collect();
+    // We move all bulks at the front and incrementals (others) at the end.
+    deltas.sort_by_key(|(_, delta)| if let FacetFieldIdDelta::Bulk = delta { 0 } else { 1 });
+
+    for (fid, delta) in deltas {
        let span = tracing::trace_span!(target: "indexing::facet_field_ids", "number");
        let _entered = span.enter();
        match delta {
            FacetFieldIdDelta::Bulk => {
+                progress.update_progress(PostProcessingFacets::NumbersBulk);
                tracing::debug!(%fid, "bulk number facet processing");
                FacetsUpdateBulk::new_not_updating_level_0(index, vec![fid], FacetType::Number)
                    .execute(wtxn)?
            }
            FacetFieldIdDelta::Incremental(delta_data) => {
+                progress.update_progress(PostProcessingFacets::NumbersIncremental);
                tracing::debug!(%fid, len=%delta_data.len(), "incremental number facet processing");
                FacetsUpdateIncremental::new(
                    index,
--- a/crates/milli/src/update/new/indexer/write.rs
+++ b/crates/milli/src/update/new/indexer/write.rs
@ -7,6 +7,7 @@ use rand::SeedableRng as _;
 use time::OffsetDateTime;

 use super::super::channel::*;
+use crate::database_stats::DatabaseStats;
 use crate::documents::PrimaryKey;
 use crate::fields_ids_map::metadata::FieldIdMapWithMetadata;
 use crate::index::IndexEmbeddingConfig;
@ -142,7 +143,6 @@ pub(super) fn update_index(
    embedders: EmbeddingConfigs,
    field_distribution: std::collections::BTreeMap<String, u64>,
    document_ids: roaring::RoaringBitmap,
-    modified_docids: roaring::RoaringBitmap,
 ) -> Result<()> {
    index.put_fields_ids_map(wtxn, new_fields_ids_map.as_fields_ids_map())?;
    if let Some(new_primary_key) = new_primary_key {
@ -153,7 +153,8 @@ pub(super) fn update_index(
    index.put_field_distribution(wtxn, &field_distribution)?;
    index.put_documents_ids(wtxn, &document_ids)?;
    index.set_updated_at(wtxn, &OffsetDateTime::now_utc())?;
-    index.update_documents_stats(wtxn, modified_docids)?;
+    let stats = DatabaseStats::new(index.documents.remap_data_type(), wtxn)?;
+    index.put_documents_stats(wtxn, stats)?;
    Ok(())
 }

--- a/crates/milli/src/update/new/merger.rs
+++ b/crates/milli/src/update/new/merger.rs
@ -82,14 +82,8 @@ where
        merge_caches_sorted(frozen, |key, DelAddRoaringBitmap { del, add }| {
            let current = database.get(&rtxn, key)?;
            match merge_cbo_bitmaps(current, del, add)? {
-                Operation::Write(bitmap) => {
-                    docids_sender.write(key, &bitmap)?;
-                    Ok(())
-                }
-                Operation::Delete => {
-                    docids_sender.delete(key)?;
-                    Ok(())
-                }
+                Operation::Write(bitmap) => docids_sender.write(key, &bitmap),
+                Operation::Delete => docids_sender.delete(key),
                Operation::Ignore => Ok(()),
            }
        })
@ -130,7 +124,6 @@ pub fn merge_and_send_facet_docids<'extractor>(
                    Operation::Ignore => Ok(()),
                }
            })?;
-
            Ok(facet_field_ids_delta)
        })
        .reduce(
--- a/crates/milli/src/update/new/steps.rs
+++ b/crates/milli/src/update/new/steps.rs
@ -1,52 +1,42 @@
-use std::borrow::Cow;
+use crate::make_enum_progress;

-use enum_iterator::Sequence;
-
-use crate::progress::Step;
-
-#[derive(Debug, Clone, Copy, PartialEq, Eq, Sequence)]
-#[repr(u8)]
-pub enum IndexingStep {
-    PreparingPayloads,
-    ExtractingDocuments,
-    ExtractingFacets,
-    ExtractingWords,
-    ExtractingWordProximity,
-    ExtractingEmbeddings,
-    WritingGeoPoints,
-    WaitingForDatabaseWrites,
-    WaitingForExtractors,
-    WritingEmbeddingsToDatabase,
-    PostProcessingFacets,
-    PostProcessingWords,
-    Finalizing,
-}
-
-impl Step for IndexingStep {
-    fn name(&self) -> Cow<'static, str> {
-        match self {
-            IndexingStep::PreparingPayloads => "preparing update file",
-            IndexingStep::ExtractingDocuments => "extracting documents",
-            IndexingStep::ExtractingFacets => "extracting facets",
-            IndexingStep::ExtractingWords => "extracting words",
-            IndexingStep::ExtractingWordProximity => "extracting word proximity",
-            IndexingStep::ExtractingEmbeddings => "extracting embeddings",
-            IndexingStep::WritingGeoPoints => "writing geo points",
-            IndexingStep::WaitingForDatabaseWrites => "waiting for database writes",
-            IndexingStep::WaitingForExtractors => "waiting for extractors",
-            IndexingStep::WritingEmbeddingsToDatabase => "writing embeddings to database",
-            IndexingStep::PostProcessingFacets => "post-processing facets",
-            IndexingStep::PostProcessingWords => "post-processing words",
-            IndexingStep::Finalizing => "finalizing",
-        }
-        .into()
-    }
-
-    fn current(&self) -> u32 {
-        *self as u32
-    }
-
-    fn total(&self) -> u32 {
-        Self::CARDINALITY as u32
+make_enum_progress! {
+    pub enum IndexingStep {
+        PreparingPayloads,
+        ExtractingDocuments,
+        ExtractingFacets,
+        ExtractingWords,
+        ExtractingWordProximity,
+        ExtractingEmbeddings,
+        MergingFacetCaches,
+        MergingWordCaches,
+        MergingWordProximity,
+        WritingGeoPoints,
+        WaitingForDatabaseWrites,
+        WaitingForExtractors,
+        WritingEmbeddingsToDatabase,
+        PostProcessingFacets,
+        PostProcessingWords,
+        Finalizing,
+    }
+}
+
+make_enum_progress! {
+    pub enum PostProcessingFacets {
+        StringsBulk,
+        StringsIncremental,
+        NumbersBulk,
+        NumbersIncremental,
+        FacetSearch,
+    }
+}
+
+make_enum_progress! {
+    pub enum PostProcessingWords {
+        WordFst,
+        WordPrefixDocids,
+        ExactWordPrefixDocids,
+        WordPrefixFieldIdDocids,
+        WordPrefixPositionDocids,
    }
 }
--- a/crates/milli/src/update/settings.rs
+++ b/crates/milli/src/update/settings.rs
@ -1331,8 +1331,21 @@ impl InnerIndexSettingsDiff {

        let cache_exact_attributes = old_settings.exact_attributes != new_settings.exact_attributes;

-        let cache_user_defined_searchables = old_settings.user_defined_searchable_attributes
-            != new_settings.user_defined_searchable_attributes;
+        // Check if any searchable field has been added or removed form the list,
+        // Changing the order should not be considered as a change for reindexing.
+        let cache_user_defined_searchables = match (
+            &old_settings.user_defined_searchable_attributes,
+            &new_settings.user_defined_searchable_attributes,
+        ) {
+            (Some(old), Some(new)) => {
+                let old: BTreeSet<_> = old.iter().collect();
+                let new: BTreeSet<_> = new.iter().collect();
+
+                old != new
+            }
+            (None, None) => false,
+            _otherwise => true,
+        };

        // if the user-defined searchables changed, then we need to reindex prompts.
        if cache_user_defined_searchables {