Merge branch 'main' into all-cpus-in-import-dump

2025-07-04 04:17:10 +02:00 · 2025-05-12 21:48:12 +02:00 · 2025-05-12 21:48:12 +02:00 · 75a7e40a27
commit 75a7e40a27
parent 53f32a7dd7 d9a527854a
74 changed files with 2402 additions and 1726 deletions
--- a/crates/milli/src/constants.rs
+++ b/crates/milli/src/constants.rs
@ -1,6 +1,13 @@
-pub static VERSION_MAJOR: &str = env!("CARGO_PKG_VERSION_MAJOR");
-pub static VERSION_MINOR: &str = env!("CARGO_PKG_VERSION_MINOR");
-pub static VERSION_PATCH: &str = env!("CARGO_PKG_VERSION_PATCH");
+pub const VERSION_MAJOR: u32 = parse_u32(env!("CARGO_PKG_VERSION_MAJOR"));
+pub const VERSION_MINOR: u32 = parse_u32(env!("CARGO_PKG_VERSION_MINOR"));
+pub const VERSION_PATCH: u32 = parse_u32(env!("CARGO_PKG_VERSION_PATCH"));
+
+const fn parse_u32(s: &str) -> u32 {
+    match u32::from_str_radix(s, 10) {
+        Ok(version) => version,
+        Err(_) => panic!("could not parse as u32"),
+    }
+}

 pub const RESERVED_VECTORS_FIELD_NAME: &str = "_vectors";
 pub const RESERVED_GEO_FIELD_NAME: &str = "_geo";
--- a/crates/milli/src/disabled_typos_terms.rs
+++ b/crates/milli/src/disabled_typos_terms.rs
@ -0,0 +1,50 @@
+use heed::{
+    types::{SerdeJson, Str},
+    RoTxn, RwTxn,
+};
+use serde::{Deserialize, Serialize};
+
+use crate::{index::main_key, Index};
+
+#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
+#[serde(rename_all = "camelCase")]
+pub struct DisabledTyposTerms {
+    pub disable_on_numbers: bool,
+}
+
+impl Index {
+    pub fn disabled_typos_terms(&self, txn: &RoTxn<'_>) -> heed::Result<DisabledTyposTerms> {
+        self.main
+            .remap_types::<Str, SerdeJson<DisabledTyposTerms>>()
+            .get(txn, main_key::DISABLED_TYPOS_TERMS)
+            .map(|option| option.unwrap_or_default())
+    }
+
+    pub(crate) fn put_disabled_typos_terms(
+        &self,
+        txn: &mut RwTxn<'_>,
+        disabled_typos_terms: &DisabledTyposTerms,
+    ) -> heed::Result<()> {
+        self.main.remap_types::<Str, SerdeJson<DisabledTyposTerms>>().put(
+            txn,
+            main_key::DISABLED_TYPOS_TERMS,
+            disabled_typos_terms,
+        )?;
+
+        Ok(())
+    }
+
+    pub(crate) fn delete_disabled_typos_terms(&self, txn: &mut RwTxn<'_>) -> heed::Result<()> {
+        self.main
+            .remap_types::<Str, SerdeJson<DisabledTyposTerms>>()
+            .delete(txn, main_key::DISABLED_TYPOS_TERMS)?;
+        Ok(())
+    }
+}
+
+impl DisabledTyposTerms {
+    pub fn is_exact(&self, word: &str) -> bool {
+        // If disable_on_numbers is true, we disable the word if it contains only numbers or punctuation
+        self.disable_on_numbers && word.chars().all(|c| c.is_numeric() || c.is_ascii_punctuation())
+    }
+}
--- a/crates/milli/src/index.rs
+++ b/crates/milli/src/index.rs
--- a/crates/milli/src/lib.rs
+++ b/crates/milli/src/lib.rs
@ -12,6 +12,7 @@ mod asc_desc;
 mod attribute_patterns;
 mod criterion;
 pub mod database_stats;
+pub mod disabled_typos_terms;
 mod error;
 mod external_documents_ids;
 pub mod facet;
--- a/crates/milli/src/search/facet/filter.rs
+++ b/crates/milli/src/search/facet/filter.rs
@ -1,10 +1,11 @@
 use std::collections::BTreeSet;
 use std::fmt::{Debug, Display};
-use std::ops::Bound::{self, Excluded, Included};
+use std::ops::Bound::{self, Excluded, Included, Unbounded};

 use either::Either;
 pub use filter_parser::{Condition, Error as FPError, FilterCondition, Token};
 use heed::types::LazyDecode;
+use heed::BytesEncode;
 use memchr::memmem::Finder;
 use roaring::{MultiOps, RoaringBitmap};
 use serde_json::Value;
@ -14,7 +15,7 @@ use crate::constants::RESERVED_GEO_FIELD_NAME;
 use crate::error::{Error, UserError};
 use crate::filterable_attributes_rules::{filtered_matching_patterns, matching_features};
 use crate::heed_codec::facet::{
-    FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, OrderedF64Codec,
+    FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
 };
 use crate::index::db_name::FACET_ID_STRING_DOCIDS;
 use crate::{
@ -271,7 +272,7 @@ impl<'a> Filter<'a> {
        // as the facets values are all in the same database and prefixed by the
        // field id and the level.

-        let (left, right) = match operator {
+        let (number_bounds, (left_str, right_str)) = match operator {
            // return an error if the filter is not allowed for this field
            Condition::GreaterThan(_)
            | Condition::GreaterThanOrEqual(_)
@ -305,17 +306,37 @@ impl<'a> Filter<'a> {
                ));
            }
            Condition::GreaterThan(val) => {
-                (Excluded(val.parse_finite_float()?), Included(f64::MAX))
+                let number = val.parse_finite_float().ok();
+                let number_bounds = number.map(|number| (Excluded(number), Included(f64::MAX)));
+                let str_bounds = (Excluded(val.value()), Unbounded);
+                (number_bounds, str_bounds)
            }
            Condition::GreaterThanOrEqual(val) => {
-                (Included(val.parse_finite_float()?), Included(f64::MAX))
+                let number = val.parse_finite_float().ok();
+                let number_bounds = number.map(|number| (Included(number), Included(f64::MAX)));
+                let str_bounds = (Included(val.value()), Unbounded);
+                (number_bounds, str_bounds)
+            }
+            Condition::LowerThan(val) => {
+                let number = val.parse_finite_float().ok();
+                let number_bounds = number.map(|number| (Included(f64::MIN), Excluded(number)));
+                let str_bounds = (Unbounded, Excluded(val.value()));
+                (number_bounds, str_bounds)
            }
-            Condition::LowerThan(val) => (Included(f64::MIN), Excluded(val.parse_finite_float()?)),
            Condition::LowerThanOrEqual(val) => {
-                (Included(f64::MIN), Included(val.parse_finite_float()?))
+                let number = val.parse_finite_float().ok();
+                let number_bounds = number.map(|number| (Included(f64::MIN), Included(number)));
+                let str_bounds = (Unbounded, Included(val.value()));
+                (number_bounds, str_bounds)
            }
            Condition::Between { from, to } => {
-                (Included(from.parse_finite_float()?), Included(to.parse_finite_float()?))
+                let from_number = from.parse_finite_float().ok();
+                let to_number = to.parse_finite_float().ok();
+
+                let number_bounds =
+                    from_number.zip(to_number).map(|(from, to)| (Included(from), Included(to)));
+                let str_bounds = (Included(from.value()), Included(to.value()));
+                (number_bounds, str_bounds)
            }
            Condition::Null => {
                let is_null = index.null_faceted_documents_ids(rtxn, field_id)?;
@ -415,29 +436,47 @@ impl<'a> Filter<'a> {
        };

        let mut output = RoaringBitmap::new();
-        Self::explore_facet_number_levels(
+
+        if let Some((left_number, right_number)) = number_bounds {
+            Self::explore_facet_levels(
+                rtxn,
+                numbers_db,
+                field_id,
+                &left_number,
+                &right_number,
+                universe,
+                &mut output,
+            )?;
+        }
+
+        Self::explore_facet_levels(
            rtxn,
-            numbers_db,
+            strings_db,
            field_id,
-            left,
-            right,
+            &left_str,
+            &right_str,
            universe,
            &mut output,
        )?;
+
        Ok(output)
    }

    /// Aggregates the documents ids that are part of the specified range automatically
    /// going deeper through the levels.
-    fn explore_facet_number_levels(
-        rtxn: &heed::RoTxn<'_>,
-        db: heed::Database<FacetGroupKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
+    fn explore_facet_levels<'data, BoundCodec>(
+        rtxn: &'data heed::RoTxn<'data>,
+        db: heed::Database<FacetGroupKeyCodec<BoundCodec>, FacetGroupValueCodec>,
        field_id: FieldId,
-        left: Bound<f64>,
-        right: Bound<f64>,
+        left: &'data Bound<<BoundCodec as heed::BytesEncode<'data>>::EItem>,
+        right: &'data Bound<<BoundCodec as heed::BytesEncode<'data>>::EItem>,
        universe: Option<&RoaringBitmap>,
        output: &mut RoaringBitmap,
-    ) -> Result<()> {
+    ) -> Result<()>
+    where
+        BoundCodec: for<'b> BytesEncode<'b>,
+        for<'b> <BoundCodec as BytesEncode<'b>>::EItem: Sized + PartialOrd,
+    {
        match (left, right) {
            // lower TO upper when lower > upper must return no result
            (Included(l), Included(r)) if l > r => return Ok(()),
@ -446,8 +485,8 @@ impl<'a> Filter<'a> {
            (Excluded(l), Included(r)) if l >= r => return Ok(()),
            (_, _) => (),
        }
-        facet_range_search::find_docids_of_facet_within_bounds::<OrderedF64Codec>(
-            rtxn, db, field_id, &left, &right, universe, output,
+        facet_range_search::find_docids_of_facet_within_bounds::<BoundCodec>(
+            rtxn, db, field_id, left, right, universe, output,
        )?;

        Ok(())
@ -1249,28 +1288,24 @@ mod tests {
        let result = filter.evaluate(&rtxn, &index).unwrap();
        assert!(result.contains(0));
        let filter = Filter::from_str("price < inf").unwrap().unwrap();
-        assert!(matches!(
-            filter.evaluate(&rtxn, &index),
-            Err(crate::Error::UserError(crate::error::UserError::InvalidFilter(_)))
-        ));
+        let result = filter.evaluate(&rtxn, &index).unwrap();
+        // this is allowed due to filters with strings
+        assert!(result.contains(1));

        let filter = Filter::from_str("price = NaN").unwrap().unwrap();
        let result = filter.evaluate(&rtxn, &index).unwrap();
        assert!(result.is_empty());
        let filter = Filter::from_str("price < NaN").unwrap().unwrap();
-        assert!(matches!(
-            filter.evaluate(&rtxn, &index),
-            Err(crate::Error::UserError(crate::error::UserError::InvalidFilter(_)))
-        ));
+        let result = filter.evaluate(&rtxn, &index).unwrap();
+        assert!(result.contains(1));

        let filter = Filter::from_str("price = infinity").unwrap().unwrap();
        let result = filter.evaluate(&rtxn, &index).unwrap();
        assert!(result.contains(2));
        let filter = Filter::from_str("price < infinity").unwrap().unwrap();
-        assert!(matches!(
-            filter.evaluate(&rtxn, &index),
-            Err(crate::Error::UserError(crate::error::UserError::InvalidFilter(_)))
-        ));
+        let result = filter.evaluate(&rtxn, &index).unwrap();
+        assert!(result.contains(0));
+        assert!(result.contains(1));
    }

    #[test]
--- a/crates/milli/src/search/mod.rs
+++ b/crates/milli/src/search/mod.rs
@ -8,7 +8,7 @@ use roaring::bitmap::RoaringBitmap;

 pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET};
 pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords};
-use self::new::{execute_vector_search, PartialSearchResult};
+use self::new::{execute_vector_search, PartialSearchResult, VectorStoreStats};
 use crate::filterable_attributes_rules::{filtered_matching_patterns, matching_features};
 use crate::score_details::{ScoreDetails, ScoringStrategy};
 use crate::vector::Embedder;
@ -269,6 +269,12 @@ impl<'a> Search<'a> {
            )?,
        };

+        if let Some(VectorStoreStats { total_time, total_queries, total_results }) =
+            ctx.vector_store_stats
+        {
+            tracing::debug!("Vector store stats: total_time={total_time:.02?}, total_queries={total_queries}, total_results={total_results}");
+        }
+
        // consume context and located_query_terms to build MatchingWords.
        let matching_words = match located_query_terms {
            Some(located_query_terms) => MatchingWords::new(ctx, located_query_terms),
--- a/crates/milli/src/search/new/mod.rs
+++ b/crates/milli/src/search/new/mod.rs
@ -22,6 +22,8 @@ mod vector_sort;
 mod tests;

 use std::collections::HashSet;
+use std::ops::AddAssign;
+use std::time::Duration;

 use bucket_sort::{bucket_sort, BucketSortOutput};
 use charabia::{Language, TokenizerBuilder};
@ -72,6 +74,7 @@ pub struct SearchContext<'ctx> {
    pub phrase_docids: PhraseDocIdsCache,
    pub restricted_fids: Option<RestrictedFids>,
    pub prefix_search: PrefixSearch,
+    pub vector_store_stats: Option<VectorStoreStats>,
 }

 impl<'ctx> SearchContext<'ctx> {
@ -101,6 +104,7 @@ impl<'ctx> SearchContext<'ctx> {
            phrase_docids: <_>::default(),
            restricted_fids: None,
            prefix_search,
+            vector_store_stats: None,
        })
    }

@ -166,6 +170,25 @@ impl<'ctx> SearchContext<'ctx> {
    }
 }

+#[derive(Debug, Default)]
+pub struct VectorStoreStats {
+    /// The total time spent on vector search.
+    pub total_time: Duration,
+    /// The number of searches performed.
+    pub total_queries: usize,
+    /// The number of nearest neighbors found.
+    pub total_results: usize,
+}
+
+impl AddAssign for VectorStoreStats {
+    fn add_assign(&mut self, other: Self) {
+        let Self { total_time, total_queries, total_results } = self;
+        *total_time += other.total_time;
+        *total_queries += other.total_queries;
+        *total_results += other.total_results;
+    }
+}
+
 #[derive(Clone, Copy, PartialEq, PartialOrd, Ord, Eq)]
 pub enum Word {
    Original(Interned<String>),
--- a/crates/milli/src/search/new/vector_sort.rs
+++ b/crates/milli/src/search/new/vector_sort.rs
@ -1,8 +1,10 @@
 use std::iter::FromIterator;
+use std::time::Instant;

 use roaring::RoaringBitmap;

 use super::ranking_rules::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait};
+use super::VectorStoreStats;
 use crate::score_details::{self, ScoreDetails};
 use crate::vector::{ArroyWrapper, DistributionShift, Embedder};
 use crate::{DocumentId, Result, SearchContext, SearchLogger};
@ -53,9 +55,15 @@ impl<Q: RankingRuleQueryTrait> VectorSort<Q> {
    ) -> Result<()> {
        let target = &self.target;

+        let before = Instant::now();
        let reader = ArroyWrapper::new(ctx.index.vector_arroy, self.embedder_index, self.quantized);
        let results = reader.nns_by_vector(ctx.txn, target, self.limit, Some(vector_candidates))?;
        self.cached_sorted_docids = results.into_iter();
+        *ctx.vector_store_stats.get_or_insert_default() += VectorStoreStats {
+            total_time: before.elapsed(),
+            total_queries: 1,
+            total_results: self.cached_sorted_docids.len(),
+        };

        Ok(())
    }
--- a/crates/milli/src/snapshots/index.rs/bug_3007/geo_faceted_documents_ids.snap
+++ b/crates/milli/src/snapshots/index.rs/bug_3007/geo_faceted_documents_ids.snap
@ -1,4 +0,0 @@
---
-source: milli/src/index.rs
---
-[0, ]
--- a/crates/milli/src/snapshots/index.rs/unexpected_extra_fields_in_geo_field/geo_faceted_documents_ids.snap
+++ b/crates/milli/src/snapshots/index.rs/unexpected_extra_fields_in_geo_field/geo_faceted_documents_ids.snap
@ -1,4 +0,0 @@
---
-source: milli/src/index.rs
---
-[]
--- a/crates/milli/src/snapshots/test_index.rs/bug_3007/geo_faceted_documents_ids.snap
+++ b/crates/milli/src/snapshots/test_index.rs/bug_3007/geo_faceted_documents_ids.snap
@ -0,0 +1,4 @@
+---
+source: crates/milli/src/test_index.rs
+---
+[0, ]
--- a/crates/milli/src/snapshots/test_index.rs/unexpected_extra_fields_in_geo_field/geo_faceted_documents_ids.snap
+++ b/crates/milli/src/snapshots/test_index.rs/unexpected_extra_fields_in_geo_field/geo_faceted_documents_ids.snap
@ -0,0 +1,4 @@
+---
+source: crates/milli/src/test_index.rs
+---
+[]
--- a/crates/milli/src/test_index.rs
+++ b/crates/milli/src/test_index.rs
--- a/crates/milli/src/update/index_documents/extract/extract_word_docids.rs
+++ b/crates/milli/src/update/index_documents/extract/extract_word_docids.rs
@ -127,7 +127,8 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
        // merge all deletions
        let obkv = KvReaderDelAdd::from_slice(value);
        if let Some(value) = obkv.get(DelAdd::Deletion) {
-            let delete_from_exact = settings_diff.old.exact_attributes.contains(&fid);
+            let delete_from_exact = settings_diff.old.exact_attributes.contains(&fid)
+                || settings_diff.old.disabled_typos_terms.is_exact(w);
            buffer.clear();
            let mut obkv = KvWriterDelAdd::new(&mut buffer);
            obkv.insert(DelAdd::Deletion, value)?;
@ -139,7 +140,8 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
        }
        // merge all additions
        if let Some(value) = obkv.get(DelAdd::Addition) {
-            let add_in_exact = settings_diff.new.exact_attributes.contains(&fid);
+            let add_in_exact = settings_diff.new.exact_attributes.contains(&fid)
+                || settings_diff.new.disabled_typos_terms.is_exact(w);
            buffer.clear();
            let mut obkv = KvWriterDelAdd::new(&mut buffer);
            obkv.insert(DelAdd::Addition, value)?;
--- a/crates/milli/src/update/index_documents/typed_chunk.rs
+++ b/crates/milli/src/update/index_documents/typed_chunk.rs
@ -273,14 +273,11 @@ pub(crate) fn write_typed_chunk_into_index(
                    unreachable!();
                };
                let clonable_word_docids = unsafe { as_cloneable_grenad(&word_docids_reader) }?;
-                let clonable_exact_word_docids =
-                    unsafe { as_cloneable_grenad(&exact_word_docids_reader) }?;

                word_docids_builder.push(word_docids_reader.into_cursor()?);
                exact_word_docids_builder.push(exact_word_docids_reader.into_cursor()?);
                word_fid_docids_builder.push(word_fid_docids_reader.into_cursor()?);
                fst_merger_builder.push(clonable_word_docids.into_cursor()?);
-                fst_merger_builder.push(clonable_exact_word_docids.into_cursor()?);
            }

            let word_docids_merger = word_docids_builder.build();
--- a/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs
+++ b/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs
@ -319,8 +319,11 @@ impl WordDocidsExtractors {
        let doc_alloc = &context.doc_alloc;

        let exact_attributes = index.exact_attributes(rtxn)?;
-        let is_exact_attribute =
-            |fname: &str| exact_attributes.iter().any(|attr| contained_in(fname, attr));
+        let disabled_typos_terms = index.disabled_typos_terms(rtxn)?;
+        let is_exact = |fname: &str, word: &str| {
+            exact_attributes.iter().any(|attr| contained_in(fname, attr))
+                || disabled_typos_terms.is_exact(word)
+        };
        match document_change {
            DocumentChange::Deletion(inner) => {
                let mut token_fn = |fname: &str, fid, pos, word: &str| {
@ -328,7 +331,7 @@ impl WordDocidsExtractors {
                        fid,
                        pos,
                        word,
-                        is_exact_attribute(fname),
+                        is_exact(fname, word),
                        inner.docid(),
                        doc_alloc,
                    )
@ -356,7 +359,7 @@ impl WordDocidsExtractors {
                        fid,
                        pos,
                        word,
-                        is_exact_attribute(fname),
+                        is_exact(fname, word),
                        inner.docid(),
                        doc_alloc,
                    )
@ -372,7 +375,7 @@ impl WordDocidsExtractors {
                        fid,
                        pos,
                        word,
-                        is_exact_attribute(fname),
+                        is_exact(fname, word),
                        inner.docid(),
                        doc_alloc,
                    )
@ -389,7 +392,7 @@ impl WordDocidsExtractors {
                        fid,
                        pos,
                        word,
-                        is_exact_attribute(fname),
+                        is_exact(fname, word),
                        inner.docid(),
                        doc_alloc,
                    )
--- a/crates/milli/src/update/new/indexer/mod.rs
+++ b/crates/milli/src/update/new/indexer/mod.rs
@ -9,6 +9,7 @@ pub use document_operation::{DocumentOperation, PayloadStats};
 use hashbrown::HashMap;
 use heed::RwTxn;
 pub use partial_dump::PartialDump;
+pub use post_processing::recompute_word_fst_from_word_docids_database;
 pub use update_by_function::UpdateByFunction;
 pub use write::ChannelCongestion;
 use write::{build_vectors, update_index, write_to_db};
--- a/crates/milli/src/update/new/indexer/post_processing.rs
+++ b/crates/milli/src/update/new/indexer/post_processing.rs
@ -131,6 +131,20 @@ fn compute_word_fst(
    }
 }

+pub fn recompute_word_fst_from_word_docids_database(index: &Index, wtxn: &mut RwTxn) -> Result<()> {
+    let fst = fst::Set::default().map_data(std::borrow::Cow::Owned)?;
+    let mut word_fst_builder = WordFstBuilder::new(&fst)?;
+    let words = index.word_docids.iter(wtxn)?.remap_data_type::<DecodeIgnore>();
+    for res in words {
+        let (word, _) = res?;
+        word_fst_builder.register_word(DelAdd::Addition, word.as_ref())?;
+    }
+    let (word_fst_mmap, _) = word_fst_builder.build(index, wtxn)?;
+    index.main.remap_types::<Str, Bytes>().put(wtxn, WORDS_FST_KEY, &word_fst_mmap)?;
+
+    Ok(())
+}
+
 #[tracing::instrument(level = "trace", skip_all, target = "indexing::facet_search")]
 fn compute_facet_search_database(
    index: &Index,
--- a/crates/milli/src/update/settings.rs
+++ b/crates/milli/src/update/settings.rs
@ -17,6 +17,7 @@ use super::IndexerConfig;
 use crate::attribute_patterns::PatternMatch;
 use crate::constants::RESERVED_GEO_FIELD_NAME;
 use crate::criterion::Criterion;
+use crate::disabled_typos_terms::DisabledTyposTerms;
 use crate::error::UserError;
 use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
 use crate::filterable_attributes_rules::match_faceted_field;
@ -169,6 +170,7 @@ pub struct Settings<'a, 't, 'i> {
    synonyms: Setting<BTreeMap<String, Vec<String>>>,
    primary_key: Setting<String>,
    authorize_typos: Setting<bool>,
+    disable_on_numbers: Setting<bool>,
    min_word_len_two_typos: Setting<u8>,
    min_word_len_one_typo: Setting<u8>,
    exact_words: Setting<BTreeSet<String>>,
@ -207,6 +209,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
            synonyms: Setting::NotSet,
            primary_key: Setting::NotSet,
            authorize_typos: Setting::NotSet,
+            disable_on_numbers: Setting::NotSet,
            exact_words: Setting::NotSet,
            min_word_len_two_typos: Setting::NotSet,
            min_word_len_one_typo: Setting::NotSet,
@ -354,6 +357,14 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
        self.min_word_len_one_typo = Setting::Reset;
    }

+    pub fn set_disable_on_numbers(&mut self, disable_on_numbers: bool) {
+        self.disable_on_numbers = Setting::Set(disable_on_numbers);
+    }
+
+    pub fn reset_disable_on_numbers(&mut self) {
+        self.disable_on_numbers = Setting::Reset;
+    }
+
    pub fn set_exact_words(&mut self, words: BTreeSet<String>) {
        self.exact_words = Setting::Set(words);
    }
@ -866,6 +877,24 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
        Ok(())
    }

+    fn update_disabled_typos_terms(&mut self) -> Result<()> {
+        let mut disabled_typos_terms = self.index.disabled_typos_terms(self.wtxn)?;
+        match self.disable_on_numbers {
+            Setting::Set(disable_on_numbers) => {
+                disabled_typos_terms.disable_on_numbers = disable_on_numbers;
+            }
+            Setting::Reset => {
+                self.index.delete_disabled_typos_terms(self.wtxn)?;
+                disabled_typos_terms.disable_on_numbers =
+                    DisabledTyposTerms::default().disable_on_numbers;
+            }
+            Setting::NotSet => (),
+        }
+
+        self.index.put_disabled_typos_terms(self.wtxn, &disabled_typos_terms)?;
+        Ok(())
+    }
+
    fn update_exact_words(&mut self) -> Result<()> {
        match self.exact_words {
            Setting::Set(ref mut words) => {
@ -1246,6 +1275,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
        self.update_prefix_search()?;
        self.update_facet_search()?;
        self.update_localized_attributes_rules()?;
+        self.update_disabled_typos_terms()?;

        let embedding_config_updates = self.update_embedding_configs()?;

@ -1327,6 +1357,7 @@ impl InnerIndexSettingsDiff {
                || old_settings.prefix_search != new_settings.prefix_search
                || old_settings.localized_attributes_rules
                    != new_settings.localized_attributes_rules
+                || old_settings.disabled_typos_terms != new_settings.disabled_typos_terms
        };

        let cache_exact_attributes = old_settings.exact_attributes != new_settings.exact_attributes;
@ -1526,6 +1557,7 @@ pub(crate) struct InnerIndexSettings {
    pub user_defined_searchable_attributes: Option<Vec<String>>,
    pub sortable_fields: HashSet<String>,
    pub exact_attributes: HashSet<FieldId>,
+    pub disabled_typos_terms: DisabledTyposTerms,
    pub proximity_precision: ProximityPrecision,
    pub embedding_configs: EmbeddingConfigs,
    pub geo_fields_ids: Option<(FieldId, FieldId)>,
@ -1574,7 +1606,7 @@ impl InnerIndexSettings {
            .map(|fields| fields.into_iter().map(|f| f.to_string()).collect());
        let builder = MetadataBuilder::from_index(index, rtxn)?;
        let fields_ids_map = FieldIdMapWithMetadata::new(fields_ids_map, builder);
-
+        let disabled_typos_terms = index.disabled_typos_terms(rtxn)?;
        Ok(Self {
            stop_words,
            allowed_separators,
@ -1592,6 +1624,7 @@ impl InnerIndexSettings {
            geo_fields_ids,
            prefix_search,
            facet_search,
+            disabled_typos_terms,
        })
    }

--- a/crates/milli/src/update/test_settings.rs
+++ b/crates/milli/src/update/test_settings.rs
@ -896,6 +896,7 @@ fn test_correct_settings_init() {
                localized_attributes_rules,
                prefix_search,
                facet_search,
+                disable_on_numbers,
            } = settings;
            assert!(matches!(searchable_fields, Setting::NotSet));
            assert!(matches!(displayed_fields, Setting::NotSet));
@ -923,6 +924,7 @@ fn test_correct_settings_init() {
            assert!(matches!(localized_attributes_rules, Setting::NotSet));
            assert!(matches!(prefix_search, Setting::NotSet));
            assert!(matches!(facet_search, Setting::NotSet));
+            assert!(matches!(disable_on_numbers, Setting::NotSet));
        })
        .unwrap();
 }
--- a/crates/milli/src/update/upgrade/mod.rs
+++ b/crates/milli/src/update/upgrade/mod.rs
@ -1,12 +1,14 @@
 mod v1_12;
 mod v1_13;
 mod v1_14;
-
+mod v1_15;
 use heed::RwTxn;
 use v1_12::{V1_12_3_To_V1_13_0, V1_12_To_V1_12_3};
 use v1_13::{V1_13_0_To_V1_13_1, V1_13_1_To_Latest_V1_13};
 use v1_14::Latest_V1_13_To_Latest_V1_14;
+use v1_15::Latest_V1_14_To_Latest_V1_15;

+use crate::constants::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
 use crate::progress::{Progress, VariableNameStep};
 use crate::{Index, InternalError, Result};

@ -23,12 +25,16 @@ trait UpgradeIndex {
 }

 /// Return true if the cached stats of the index must be regenerated
-pub fn upgrade(
+pub fn upgrade<MSP>(
    wtxn: &mut RwTxn,
    index: &Index,
    db_version: (u32, u32, u32),
+    must_stop_processing: MSP,
    progress: Progress,
-) -> Result<bool> {
+) -> Result<bool>
+where
+    MSP: Fn() -> bool + Sync,
+{
    let from = index.get_version(wtxn)?.unwrap_or(db_version);
    let upgrade_functions: &[&dyn UpgradeIndex] = &[
        &V1_12_To_V1_12_3 {},
@ -36,6 +42,10 @@ pub fn upgrade(
        &V1_13_0_To_V1_13_1 {},
        &V1_13_1_To_Latest_V1_13 {},
        &Latest_V1_13_To_Latest_V1_14 {},
+        &Latest_V1_14_To_Latest_V1_15 {},
+        // This is the last upgrade function, it will be called when the index is up to date.
+        // any other upgrade function should be added before this one.
+        &ToCurrentNoOp {},
    ];

    let start = match from {
@ -43,8 +53,9 @@ pub fn upgrade(
        (1, 12, 3..) => 1,
        (1, 13, 0) => 2,
        (1, 13, _) => 4,
+        (1, 14, _) => 5,
        // We must handle the current version in the match because in case of a failure some index may have been upgraded but not other.
-        (1, 14, _) => 4,
+        (1, 15, _) => 6,
        (major, minor, patch) => {
            return Err(InternalError::CannotUpgradeToVersion(major, minor, patch).into())
        }
@ -56,6 +67,9 @@ pub fn upgrade(
    let mut current_version = from;
    let mut regenerate_stats = false;
    for (i, upgrade) in upgrade_path.iter().enumerate() {
+        if (must_stop_processing)() {
+            return Err(crate::Error::InternalError(InternalError::AbortedIndexation));
+        }
        let target = upgrade.target_version();
        progress.update_progress(VariableNameStep::<UpgradeVersion>::new(
            format!(
@ -77,3 +91,22 @@ pub fn upgrade(

    Ok(regenerate_stats)
 }
+
+#[allow(non_camel_case_types)]
+struct ToCurrentNoOp {}
+
+impl UpgradeIndex for ToCurrentNoOp {
+    fn upgrade(
+        &self,
+        _wtxn: &mut RwTxn,
+        _index: &Index,
+        _original: (u32, u32, u32),
+        _progress: Progress,
+    ) -> Result<bool> {
+        Ok(false)
+    }
+
+    fn target_version(&self) -> (u32, u32, u32) {
+        (VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH)
+    }
+}
--- a/crates/milli/src/update/upgrade/v1_13.rs
+++ b/crates/milli/src/update/upgrade/v1_13.rs
@ -1,7 +1,6 @@
 use heed::RwTxn;

 use super::UpgradeIndex;
-use crate::constants::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
 use crate::database_stats::DatabaseStats;
 use crate::progress::Progress;
 use crate::{make_enum_progress, Index, Result};
@ -51,10 +50,6 @@ impl UpgradeIndex for V1_13_1_To_Latest_V1_13 {
    }

    fn target_version(&self) -> (u32, u32, u32) {
-        (
-            VERSION_MAJOR.parse().unwrap(),
-            VERSION_MINOR.parse().unwrap(),
-            VERSION_PATCH.parse().unwrap(),
-        )
+        (1, 13, 3)
    }
 }
--- a/crates/milli/src/update/upgrade/v1_15.rs
+++ b/crates/milli/src/update/upgrade/v1_15.rs
@ -0,0 +1,35 @@
+use heed::RwTxn;
+
+use super::UpgradeIndex;
+use crate::progress::Progress;
+use crate::update::new::indexer::recompute_word_fst_from_word_docids_database;
+use crate::{make_enum_progress, Index, Result};
+
+#[allow(non_camel_case_types)]
+pub(super) struct Latest_V1_14_To_Latest_V1_15();
+
+impl UpgradeIndex for Latest_V1_14_To_Latest_V1_15 {
+    fn upgrade(
+        &self,
+        wtxn: &mut RwTxn,
+        index: &Index,
+        _original: (u32, u32, u32),
+        progress: Progress,
+    ) -> Result<bool> {
+        // Recompute the word FST from the word docids database.
+        make_enum_progress! {
+            enum TypoTolerance {
+                RecomputeWordFst,
+            }
+        };
+
+        progress.update_progress(TypoTolerance::RecomputeWordFst);
+        recompute_word_fst_from_word_docids_database(index, wtxn)?;
+
+        Ok(false)
+    }
+
+    fn target_version(&self) -> (u32, u32, u32) {
+        (1, 15, 0)
+    }
+}