Merge #5187

5187: Bring back v1.12.0 of pre-release changes into `main` r=irevoire a=curquiza Co-authored-by: ManyTheFish <many@meilisearch.com> Co-authored-by: Louis Dureuil <louis@meilisearch.com> Co-authored-by: Clément Renault <clement@meilisearch.com> Co-authored-by: meili-bors[bot] <89034592+meili-bors[bot]@users.noreply.github.com> Co-authored-by: Many the fish <many@meilisearch.com>
2025-07-03 11:57:07 +02:00 · 2024-12-23 10:59:33 +00:00 · 2024-12-23 10:59:33 +00:00 · d3491851bc
commit d3491851bc
parent 886404cc4d ba11121cfc
168 changed files with 5778 additions and 2556 deletions
--- a/crates/milli/src/documents/primary_key.rs
+++ b/crates/milli/src/documents/primary_key.rs
@ -280,7 +280,7 @@ fn starts_with(selector: &str, key: &str) -> bool {

 pub fn validate_document_id_str(document_id: &str) -> Option<&str> {
    if document_id.is_empty()
-        || document_id.len() > 512
+        || document_id.len() >= 512
        || !document_id.chars().all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_')
    {
        None
--- a/crates/milli/src/error.rs
+++ b/crates/milli/src/error.rs
@ -3,6 +3,7 @@ use std::convert::Infallible;
 use std::fmt::Write;
 use std::{io, str};

+use bstr::BString;
 use heed::{Error as HeedError, MdbError};
 use rayon::ThreadPoolBuildError;
 use rhai::EvalAltResult;
@ -61,6 +62,10 @@ pub enum InternalError {
    Serialization(#[from] SerializationError),
    #[error(transparent)]
    Store(#[from] MdbError),
+    #[error("Cannot delete {key:?} from database {database_name}: {error}")]
+    StoreDeletion { database_name: &'static str, key: BString, error: heed::Error },
+    #[error("Cannot insert {key:?} and value with length {value_length} into database {database_name}: {error}")]
+    StorePut { database_name: &'static str, key: BString, value_length: usize, error: heed::Error },
    #[error(transparent)]
    Utf8(#[from] str::Utf8Error),
    #[error("An indexation process was explicitly aborted")]
@ -109,7 +114,7 @@ pub enum UserError {
        "Document identifier `{}` is invalid. \
 A document identifier can be of type integer or string, \
 only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), \
-and can not be more than 512 bytes.", .document_id.to_string()
+and can not be more than 511 bytes.", .document_id.to_string()
    )]
    InvalidDocumentId { document_id: Value },
    #[error("Invalid facet distribution, {}", format_invalid_filter_distribution(.invalid_facets_name, .valid_facets_name))]
--- a/crates/milli/src/heed_codec/facet/mod.rs
+++ b/crates/milli/src/heed_codec/facet/mod.rs
@ -97,7 +97,7 @@ impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec {

    fn bytes_encode(value: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
        let mut v = vec![value.size];
-        CboRoaringBitmapCodec::serialize_into(&value.bitmap, &mut v);
+        CboRoaringBitmapCodec::serialize_into_vec(&value.bitmap, &mut v);
        Ok(Cow::Owned(v))
    }
 }
--- a/crates/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs
+++ b/crates/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs
@ -27,18 +27,27 @@ impl CboRoaringBitmapCodec {
        }
    }

-    pub fn serialize_into(roaring: &RoaringBitmap, vec: &mut Vec<u8>) {
+    pub fn serialize_into_vec(roaring: &RoaringBitmap, vec: &mut Vec<u8>) {
+        Self::serialize_into_writer(roaring, vec).unwrap()
+    }
+
+    pub fn serialize_into_writer<W: io::Write>(
+        roaring: &RoaringBitmap,
+        mut writer: W,
+    ) -> io::Result<()> {
        if roaring.len() <= THRESHOLD as u64 {
            // If the number of items (u32s) to encode is less than or equal to the threshold
            // it means that it would weigh the same or less than the RoaringBitmap
            // header, so we directly encode them using ByteOrder instead.
            for integer in roaring {
-                vec.write_u32::<NativeEndian>(integer).unwrap();
+                writer.write_u32::<NativeEndian>(integer)?;
            }
        } else {
            // Otherwise, we use the classic RoaringBitmapCodec that writes a header.
-            roaring.serialize_into(vec).unwrap();
+            roaring.serialize_into(writer)?;
        }
+
+        Ok(())
    }

    pub fn deserialize_from(mut bytes: &[u8]) -> io::Result<RoaringBitmap> {
@ -143,7 +152,7 @@ impl CboRoaringBitmapCodec {
            return Ok(None);
        }

-        Self::serialize_into(&previous, buffer);
+        Self::serialize_into_vec(&previous, buffer);
        Ok(Some(&buffer[..]))
    }
 }
@ -169,7 +178,7 @@ impl heed::BytesEncode<'_> for CboRoaringBitmapCodec {

    fn bytes_encode(item: &Self::EItem) -> Result<Cow<'_, [u8]>, BoxedError> {
        let mut vec = Vec::with_capacity(Self::serialized_size(item));
-        Self::serialize_into(item, &mut vec);
+        Self::serialize_into_vec(item, &mut vec);
        Ok(Cow::Owned(vec))
    }
 }
--- a/crates/milli/src/index.rs
+++ b/crates/milli/src/index.rs
@ -70,6 +70,8 @@ pub mod main_key {
    pub const EMBEDDING_CONFIGS: &str = "embedding_configs";
    pub const SEARCH_CUTOFF: &str = "search_cutoff";
    pub const LOCALIZED_ATTRIBUTES_RULES: &str = "localized_attributes_rules";
+    pub const FACET_SEARCH: &str = "facet_search";
+    pub const PREFIX_SEARCH: &str = "prefix_search";
 }

 pub mod db_name {
@ -1233,6 +1235,10 @@ impl Index {
        )
    }

+    pub(crate) fn delete_words_prefixes_fst(&self, wtxn: &mut RwTxn<'_>) -> heed::Result<bool> {
+        self.main.remap_key_type::<Str>().delete(wtxn, main_key::WORDS_PREFIXES_FST_KEY)
+    }
+
    /// Returns the FST which is the words prefixes dictionary of the engine.
    pub fn words_prefixes_fst<'t>(&self, rtxn: &'t RoTxn<'t>) -> Result<fst::Set<Cow<'t, [u8]>>> {
        match self.main.remap_types::<Str, Bytes>().get(rtxn, main_key::WORDS_PREFIXES_FST_KEY)? {
@ -1562,6 +1568,41 @@ impl Index {
        self.main.remap_key_type::<Str>().delete(txn, main_key::PROXIMITY_PRECISION)
    }

+    pub fn prefix_search(&self, txn: &RoTxn<'_>) -> heed::Result<Option<PrefixSearch>> {
+        self.main.remap_types::<Str, SerdeBincode<PrefixSearch>>().get(txn, main_key::PREFIX_SEARCH)
+    }
+
+    pub(crate) fn put_prefix_search(
+        &self,
+        txn: &mut RwTxn<'_>,
+        val: PrefixSearch,
+    ) -> heed::Result<()> {
+        self.main.remap_types::<Str, SerdeBincode<PrefixSearch>>().put(
+            txn,
+            main_key::PREFIX_SEARCH,
+            &val,
+        )
+    }
+
+    pub(crate) fn delete_prefix_search(&self, txn: &mut RwTxn<'_>) -> heed::Result<bool> {
+        self.main.remap_key_type::<Str>().delete(txn, main_key::PREFIX_SEARCH)
+    }
+
+    pub fn facet_search(&self, txn: &RoTxn<'_>) -> heed::Result<bool> {
+        self.main
+            .remap_types::<Str, SerdeBincode<bool>>()
+            .get(txn, main_key::FACET_SEARCH)
+            .map(|v| v.unwrap_or(true))
+    }
+
+    pub(crate) fn put_facet_search(&self, txn: &mut RwTxn<'_>, val: bool) -> heed::Result<()> {
+        self.main.remap_types::<Str, SerdeBincode<bool>>().put(txn, main_key::FACET_SEARCH, &val)
+    }
+
+    pub(crate) fn delete_facet_search(&self, txn: &mut RwTxn<'_>) -> heed::Result<bool> {
+        self.main.remap_key_type::<Str>().delete(txn, main_key::FACET_SEARCH)
+    }
+
    pub fn localized_attributes_rules(
        &self,
        rtxn: &RoTxn<'_>,
@ -1647,12 +1688,9 @@ impl Index {
        Ok(res)
    }

-    pub fn prefix_settings(&self, _rtxn: &RoTxn<'_>) -> Result<PrefixSettings> {
-        Ok(PrefixSettings {
-            compute_prefixes: true,
-            max_prefix_length: 4,
-            prefix_count_threshold: 100,
-        })
+    pub fn prefix_settings(&self, rtxn: &RoTxn<'_>) -> Result<PrefixSettings> {
+        let compute_prefixes = self.prefix_search(rtxn)?.unwrap_or_default();
+        Ok(PrefixSettings { compute_prefixes, max_prefix_length: 4, prefix_count_threshold: 100 })
    }
 }

@ -1665,9 +1703,17 @@ pub struct IndexEmbeddingConfig {

 #[derive(Debug, Deserialize, Serialize)]
 pub struct PrefixSettings {
-    pub prefix_count_threshold: u64,
+    pub prefix_count_threshold: usize,
    pub max_prefix_length: usize,
-    pub compute_prefixes: bool,
+    pub compute_prefixes: PrefixSearch,
+}
+
+#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
+#[serde(rename_all = "camelCase")]
+pub enum PrefixSearch {
+    #[default]
+    IndexingTime,
+    Disabled,
 }

 #[derive(Serialize, Deserialize)]
@ -1688,6 +1734,7 @@ pub(crate) mod tests {

    use crate::error::{Error, InternalError};
    use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS};
+    use crate::progress::Progress;
    use crate::update::new::indexer;
    use crate::update::settings::InnerIndexSettings;
    use crate::update::{
@ -1764,7 +1811,7 @@ pub(crate) mod tests {
                None,
                &mut new_fields_ids_map,
                &|| false,
-                &|_progress| (),
+                Progress::default(),
            )?;

            if let Some(error) = operation_stats.into_iter().find_map(|stat| stat.error) {
@ -1775,6 +1822,7 @@ pub(crate) mod tests {
                indexer::index(
                    wtxn,
                    &self.inner,
+                    &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                    indexer_config.grenad_parameters(),
                    &db_fields_ids_map,
                    new_fields_ids_map,
@ -1782,7 +1830,7 @@ pub(crate) mod tests {
                    &document_changes,
                    embedders,
                    &|| false,
-                    &|_| (),
+                    &Progress::default(),
                )
            })
            .unwrap()?;
@ -1854,7 +1902,7 @@ pub(crate) mod tests {
                None,
                &mut new_fields_ids_map,
                &|| false,
-                &|_progress| (),
+                Progress::default(),
            )?;

            if let Some(error) = operation_stats.into_iter().find_map(|stat| stat.error) {
@ -1865,6 +1913,7 @@ pub(crate) mod tests {
                indexer::index(
                    wtxn,
                    &self.inner,
+                    &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                    indexer_config.grenad_parameters(),
                    &db_fields_ids_map,
                    new_fields_ids_map,
@ -1872,7 +1921,7 @@ pub(crate) mod tests {
                    &document_changes,
                    embedders,
                    &|| false,
-                    &|_| (),
+                    &Progress::default(),
                )
            })
            .unwrap()?;
@ -1934,7 +1983,7 @@ pub(crate) mod tests {
                None,
                &mut new_fields_ids_map,
                &|| false,
-                &|_progress| (),
+                Progress::default(),
            )
            .unwrap();

@ -1945,6 +1994,7 @@ pub(crate) mod tests {
                indexer::index(
                    &mut wtxn,
                    &index.inner,
+                    &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
                    indexer_config.grenad_parameters(),
                    &db_fields_ids_map,
                    new_fields_ids_map,
@ -1952,7 +2002,7 @@ pub(crate) mod tests {
                    &document_changes,
                    embedders,
                    &|| should_abort.load(Relaxed),
-                    &|_| (),
+                    &Progress::default(),
                )
            })
            .unwrap()
--- a/crates/milli/src/lib.rs
+++ b/crates/milli/src/lib.rs
@ -1,6 +1,7 @@
 #![cfg_attr(all(test, fuzzing), feature(no_coverage))]
 #![allow(clippy::type_complexity)]

+#[cfg(not(windows))]
 #[cfg(test)]
 #[global_allocator]
 pub static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
@ -30,6 +31,7 @@ pub mod vector;
 #[macro_use]
 pub mod snapshot_tests;
 mod fieldids_weights_map;
+pub mod progress;

 use std::collections::{BTreeMap, HashMap};
 use std::convert::{TryFrom, TryInto};
--- a/crates/milli/src/progress.rs
+++ b/crates/milli/src/progress.rs
@ -0,0 +1,152 @@
+use std::any::TypeId;
+use std::borrow::Cow;
+use std::sync::atomic::{AtomicU32, Ordering};
+use std::sync::{Arc, RwLock};
+
+use serde::Serialize;
+
+pub trait Step: 'static + Send + Sync {
+    fn name(&self) -> Cow<'static, str>;
+    fn current(&self) -> u32;
+    fn total(&self) -> u32;
+}
+
+#[derive(Clone, Default)]
+pub struct Progress {
+    steps: Arc<RwLock<Vec<(TypeId, Box<dyn Step>)>>>,
+}
+
+impl Progress {
+    pub fn update_progress<P: Step>(&self, sub_progress: P) {
+        let mut steps = self.steps.write().unwrap();
+        let step_type = TypeId::of::<P>();
+        if let Some(idx) = steps.iter().position(|(id, _)| *id == step_type) {
+            steps.truncate(idx);
+        }
+        steps.push((step_type, Box::new(sub_progress)));
+    }
+
+    // TODO: This code should be in meilisearch_types but cannot because milli can't depend on meilisearch_types
+    pub fn as_progress_view(&self) -> ProgressView {
+        let steps = self.steps.read().unwrap();
+
+        let mut percentage = 0.0;
+        let mut prev_factors = 1.0;
+
+        let mut step_view = Vec::with_capacity(steps.len());
+        for (_, step) in steps.iter() {
+            prev_factors *= step.total() as f32;
+            percentage += step.current() as f32 / prev_factors;
+
+            step_view.push(ProgressStepView {
+                current_step: step.name(),
+                finished: step.current(),
+                total: step.total(),
+            });
+        }
+
+        ProgressView { steps: step_view, percentage: percentage * 100.0 }
+    }
+}
+
+/// This trait lets you use the AtomicSubStep defined right below.
+/// The name must be a const that never changed but that can't be enforced by the type system because it make the trait non object-safe.
+/// By forcing the Default trait + the &'static str we make it harder to miss-use the trait.
+pub trait NamedStep: 'static + Send + Sync + Default {
+    fn name(&self) -> &'static str;
+}
+
+/// Structure to quickly define steps that need very quick, lockless updating of their current step.
+/// You can use this struct if:
+/// - The name of the step doesn't change
+/// - The total number of steps doesn't change
+pub struct AtomicSubStep<Name: NamedStep> {
+    unit_name: Name,
+    current: Arc<AtomicU32>,
+    total: u32,
+}
+
+impl<Name: NamedStep> AtomicSubStep<Name> {
+    pub fn new(total: u32) -> (Arc<AtomicU32>, Self) {
+        let current = Arc::new(AtomicU32::new(0));
+        (current.clone(), Self { current, total, unit_name: Name::default() })
+    }
+}
+
+impl<Name: NamedStep> Step for AtomicSubStep<Name> {
+    fn name(&self) -> Cow<'static, str> {
+        self.unit_name.name().into()
+    }
+
+    fn current(&self) -> u32 {
+        self.current.load(Ordering::Relaxed)
+    }
+
+    fn total(&self) -> u32 {
+        self.total
+    }
+}
+
+#[macro_export]
+macro_rules! make_enum_progress {
+    ($visibility:vis enum $name:ident { $($variant:ident,)+ }) => {
+        #[repr(u8)]
+        #[derive(Debug, Clone, Copy, PartialEq, Eq, Sequence)]
+        #[allow(clippy::enum_variant_names)]
+        $visibility enum $name {
+            $($variant),+
+        }
+
+        impl Step for $name {
+            fn name(&self) -> Cow<'static, str> {
+                use convert_case::Casing;
+
+                match self {
+                    $(
+                        $name::$variant => stringify!($variant).from_case(convert_case::Case::Camel).to_case(convert_case::Case::Lower).into()
+                    ),+
+                }
+            }
+
+            fn current(&self) -> u32 {
+                *self as u32
+            }
+
+            fn total(&self) -> u32 {
+                Self::CARDINALITY as u32
+            }
+        }
+    };
+}
+
+#[macro_export]
+macro_rules! make_atomic_progress {
+    ($struct_name:ident alias $atomic_struct_name:ident => $step_name:literal) => {
+        #[derive(Default, Debug, Clone, Copy)]
+        pub struct $struct_name {}
+        impl NamedStep for $struct_name {
+            fn name(&self) -> &'static str {
+                $step_name
+            }
+        }
+        pub type $atomic_struct_name = AtomicSubStep<$struct_name>;
+    };
+}
+
+make_atomic_progress!(Document alias AtomicDocumentStep => "document" );
+make_atomic_progress!(Payload alias AtomicPayloadStep => "payload" );
+
+#[derive(Debug, Serialize, Clone)]
+#[serde(rename_all = "camelCase")]
+pub struct ProgressView {
+    pub steps: Vec<ProgressStepView>,
+    pub percentage: f32,
+}
+
+#[derive(Debug, Serialize, Clone)]
+#[serde(rename_all = "camelCase")]
+pub struct ProgressStepView {
+    pub current_step: Cow<'static, str>,
+    pub finished: u32,
+    pub total: u32,
+}
--- a/crates/milli/src/prompt/document.rs
+++ b/crates/milli/src/prompt/document.rs
@ -3,12 +3,13 @@ use std::collections::BTreeMap;
 use std::fmt::{self, Debug};

 use bumpalo::Bump;
+use bumparaw_collections::{RawMap, RawVec, Value};
 use liquid::model::{
    ArrayView, DisplayCow, KString, KStringCow, ObjectRender, ObjectSource, ScalarCow, State,
    Value as LiquidValue,
 };
 use liquid::{ObjectView, ValueView};
-use raw_collections::{RawMap, RawVec};
+use rustc_hash::FxBuildHasher;
 use serde_json::value::RawValue;

 use crate::update::del_add::{DelAdd, KvReaderDelAdd};
@ -195,7 +196,7 @@ impl<'doc, D: DocumentTrait<'doc> + Debug> ObjectView for ParseableDocument<'doc
 }

 impl<'doc, D: DocumentTrait<'doc> + Debug> ValueView for ParseableDocument<'doc, D> {
-    fn as_debug(&self) -> &dyn fmt::Debug {
+    fn as_debug(&self) -> &dyn Debug {
        self
    }
    fn render(&self) -> liquid::model::DisplayCow<'_> {
@ -243,14 +244,13 @@ impl<'doc, D: DocumentTrait<'doc> + Debug> ValueView for ParseableDocument<'doc,
    }
 }

-#[derive(Debug)]
 struct ParseableValue<'doc> {
-    value: raw_collections::Value<'doc>,
+    value: Value<'doc, FxBuildHasher>,
 }

 impl<'doc> ParseableValue<'doc> {
    pub fn new(value: &'doc RawValue, doc_alloc: &'doc Bump) -> Self {
-        let value = raw_collections::Value::from_raw_value(value, doc_alloc).unwrap();
+        let value = Value::from_raw_value_and_hasher(value, FxBuildHasher, doc_alloc).unwrap();
        Self { value }
    }

@ -260,19 +260,19 @@ impl<'doc> ParseableValue<'doc> {
 }

 // transparent newtype for implementing ValueView
-#[repr(transparent)]
 #[derive(Debug)]
-struct ParseableMap<'doc>(RawMap<'doc>);
+#[repr(transparent)]
+struct ParseableMap<'doc>(RawMap<'doc, FxBuildHasher>);

 // transparent newtype for implementing ValueView
-#[repr(transparent)]
 #[derive(Debug)]
+#[repr(transparent)]
 struct ParseableArray<'doc>(RawVec<'doc>);

 impl<'doc> ParseableMap<'doc> {
-    pub fn as_parseable<'a>(map: &'a RawMap<'doc>) -> &'a ParseableMap<'doc> {
+    pub fn as_parseable<'a>(map: &'a RawMap<'doc, FxBuildHasher>) -> &'a ParseableMap<'doc> {
        // SAFETY: repr(transparent)
-        unsafe { &*(map as *const RawMap as *const Self) }
+        unsafe { &*(map as *const RawMap<FxBuildHasher> as *const Self) }
    }
 }

@ -447,8 +447,9 @@ impl<'doc> ValueView for ParseableValue<'doc> {
    }

    fn render(&self) -> DisplayCow<'_> {
-        use raw_collections::value::Number;
-        use raw_collections::Value;
+        use bumparaw_collections::value::Number;
+        use bumparaw_collections::Value;
+
        match &self.value {
            Value::Null => LiquidValue::Nil.render(),
            Value::Bool(v) => v.render(),
@ -464,8 +465,9 @@ impl<'doc> ValueView for ParseableValue<'doc> {
    }

    fn source(&self) -> DisplayCow<'_> {
-        use raw_collections::value::Number;
-        use raw_collections::Value;
+        use bumparaw_collections::value::Number;
+        use bumparaw_collections::Value;
+
        match &self.value {
            Value::Null => LiquidValue::Nil.source(),
            Value::Bool(v) => ValueView::source(v),
@ -481,8 +483,9 @@ impl<'doc> ValueView for ParseableValue<'doc> {
    }

    fn type_name(&self) -> &'static str {
-        use raw_collections::value::Number;
-        use raw_collections::Value;
+        use bumparaw_collections::value::Number;
+        use bumparaw_collections::Value;
+
        match &self.value {
            Value::Null => LiquidValue::Nil.type_name(),
            Value::Bool(v) => v.type_name(),
@ -498,7 +501,8 @@ impl<'doc> ValueView for ParseableValue<'doc> {
    }

    fn query_state(&self, state: State) -> bool {
-        use raw_collections::Value;
+        use bumparaw_collections::Value;
+
        match &self.value {
            Value::Null => ValueView::query_state(&LiquidValue::Nil, state),
            Value::Bool(v) => ValueView::query_state(v, state),
@ -515,7 +519,8 @@ impl<'doc> ValueView for ParseableValue<'doc> {
    }

    fn to_kstr(&self) -> KStringCow<'_> {
-        use raw_collections::Value;
+        use bumparaw_collections::Value;
+
        match &self.value {
            Value::Null => ValueView::to_kstr(&LiquidValue::Nil),
            Value::Bool(v) => ValueView::to_kstr(v),
@ -527,12 +532,14 @@ impl<'doc> ValueView for ParseableValue<'doc> {
    }

    fn to_value(&self) -> LiquidValue {
-        use raw_collections::Value;
+        use bumparaw_collections::value::Number;
+        use bumparaw_collections::Value;
+
        match &self.value {
            Value::Null => LiquidValue::Nil,
            Value::Bool(v) => LiquidValue::Scalar(liquid::model::ScalarCow::new(*v)),
            Value::Number(number) => match number {
-                raw_collections::value::Number::PosInt(number) => {
+                Number::PosInt(number) => {
                    let number: i64 = match (*number).try_into() {
                        Ok(number) => number,
                        Err(_) => {
@ -541,12 +548,8 @@ impl<'doc> ValueView for ParseableValue<'doc> {
                    };
                    LiquidValue::Scalar(ScalarCow::new(number))
                }
-                raw_collections::value::Number::NegInt(number) => {
-                    LiquidValue::Scalar(ScalarCow::new(*number))
-                }
-                raw_collections::value::Number::Finite(number) => {
-                    LiquidValue::Scalar(ScalarCow::new(*number))
-                }
+                Number::NegInt(number) => LiquidValue::Scalar(ScalarCow::new(*number)),
+                Number::Finite(number) => LiquidValue::Scalar(ScalarCow::new(*number)),
            },
            Value::String(s) => LiquidValue::Scalar(liquid::model::ScalarCow::new(s.to_string())),
            Value::Array(raw_vec) => ParseableArray::as_parseable(raw_vec).to_value(),
@ -555,8 +558,9 @@ impl<'doc> ValueView for ParseableValue<'doc> {
    }

    fn as_scalar(&self) -> Option<liquid::model::ScalarCow<'_>> {
-        use raw_collections::value::Number;
-        use raw_collections::Value;
+        use bumparaw_collections::value::Number;
+        use bumparaw_collections::Value;
+
        match &self.value {
            Value::Bool(v) => Some(liquid::model::ScalarCow::new(*v)),
            Value::Number(number) => match number {
@ -576,34 +580,41 @@ impl<'doc> ValueView for ParseableValue<'doc> {
    }

    fn is_scalar(&self) -> bool {
-        use raw_collections::Value;
+        use bumparaw_collections::Value;
+
        matches!(&self.value, Value::Bool(_) | Value::Number(_) | Value::String(_))
    }

    fn as_array(&self) -> Option<&dyn liquid::model::ArrayView> {
-        if let raw_collections::Value::Array(array) = &self.value {
+        if let Value::Array(array) = &self.value {
            return Some(ParseableArray::as_parseable(array) as _);
        }
        None
    }

    fn is_array(&self) -> bool {
-        matches!(&self.value, raw_collections::Value::Array(_))
+        matches!(&self.value, bumparaw_collections::Value::Array(_))
    }

    fn as_object(&self) -> Option<&dyn ObjectView> {
-        if let raw_collections::Value::Object(object) = &self.value {
+        if let Value::Object(object) = &self.value {
            return Some(ParseableMap::as_parseable(object) as _);
        }
        None
    }

    fn is_object(&self) -> bool {
-        matches!(&self.value, raw_collections::Value::Object(_))
+        matches!(&self.value, bumparaw_collections::Value::Object(_))
    }

    fn is_nil(&self) -> bool {
-        matches!(&self.value, raw_collections::Value::Null)
+        matches!(&self.value, bumparaw_collections::Value::Null)
+    }
+}
+
+impl Debug for ParseableValue<'_> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("ParseableValue").field("value", &self.value).finish()
    }
 }

--- a/crates/milli/src/prompt/error.rs
+++ b/crates/milli/src/prompt/error.rs
@ -38,6 +38,16 @@ pub struct RenderPromptError {
    pub fault: FaultSource,
 }
 impl RenderPromptError {
+    pub(crate) fn missing_context_with_external_docid(
+        external_docid: String,
+        inner: liquid::Error,
+    ) -> RenderPromptError {
+        Self {
+            kind: RenderPromptErrorKind::MissingContextWithExternalDocid(external_docid, inner),
+            fault: FaultSource::User,
+        }
+    }
+
    pub(crate) fn missing_context(inner: liquid::Error) -> RenderPromptError {
        Self { kind: RenderPromptErrorKind::MissingContext(inner), fault: FaultSource::User }
    }
@ -47,6 +57,8 @@ impl RenderPromptError {
 pub enum RenderPromptErrorKind {
    #[error("missing field in document: {0}")]
    MissingContext(liquid::Error),
+    #[error("missing field in document `{0}`: {1}")]
+    MissingContextWithExternalDocid(String, liquid::Error),
 }

 impl From<RenderPromptError> for crate::Error {
--- a/crates/milli/src/prompt/mod.rs
+++ b/crates/milli/src/prompt/mod.rs
@ -119,6 +119,7 @@ impl Prompt {
        'doc: 'a, // lifetime of the allocator, will live for an entire chunk of documents
    >(
        &self,
+        external_docid: &str,
        document: impl crate::update::new::document::Document<'a> + Debug,
        field_id_map: &RefCell<GlobalFieldsIdsMap>,
        doc_alloc: &'doc Bump,
@ -130,9 +131,12 @@ impl Prompt {
            self.max_bytes.unwrap_or_else(default_max_bytes).get(),
            doc_alloc,
        );
-        self.template
-            .render_to(&mut rendered, &context)
-            .map_err(RenderPromptError::missing_context)?;
+        self.template.render_to(&mut rendered, &context).map_err(|liquid_error| {
+            RenderPromptError::missing_context_with_external_docid(
+                external_docid.to_owned(),
+                liquid_error,
+            )
+        })?;
        Ok(std::str::from_utf8(rendered.into_bump_slice())
            .expect("render can only write UTF-8 because all inputs and processing preserve utf-8"))
    }
--- a/crates/milli/src/search/hybrid.rs
+++ b/crates/milli/src/search/hybrid.rs
@ -207,7 +207,11 @@ impl<'a> Search<'a> {
                    Ok(embedding) => embedding,
                    Err(error) => {
                        tracing::error!(error=%error, "Embedding failed");
-                        return Ok((keyword_results, Some(0)));
+                        return Ok(return_keyword_results(
+                            self.limit,
+                            self.offset,
+                            keyword_results,
+                        ));
                    }
                }
            }
--- a/crates/milli/src/search/new/matches/mod.rs
+++ b/crates/milli/src/search/new/matches/mod.rs
@ -274,7 +274,7 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
                    last_match_last_token_position_plus_one
                } else {
                    // we have matched the end of possible tokens, there's nothing to advance
-                    tokens.len() - 1
+                    tokens.len()
                }
            };

--- a/crates/milli/src/search/new/mod.rs
+++ b/crates/milli/src/search/new/mod.rs
@ -49,6 +49,7 @@ pub use self::geo_sort::Strategy as GeoSortStrategy;
 use self::graph_based_ranking_rule::Words;
 use self::interner::Interned;
 use self::vector_sort::VectorSort;
+use crate::index::PrefixSearch;
 use crate::localized_attributes_rules::LocalizedFieldIds;
 use crate::score_details::{ScoreDetails, ScoringStrategy};
 use crate::search::new::distinct::apply_distinct_rule;
@ -68,6 +69,7 @@ pub struct SearchContext<'ctx> {
    pub term_interner: Interner<QueryTerm>,
    pub phrase_docids: PhraseDocIdsCache,
    pub restricted_fids: Option<RestrictedFids>,
+    pub prefix_search: PrefixSearch,
 }

 impl<'ctx> SearchContext<'ctx> {
@ -85,6 +87,8 @@ impl<'ctx> SearchContext<'ctx> {
            }
        }

+        let prefix_search = index.prefix_search(txn)?.unwrap_or_default();
+
        Ok(Self {
            index,
            txn,
@ -94,9 +98,14 @@ impl<'ctx> SearchContext<'ctx> {
            term_interner: <_>::default(),
            phrase_docids: <_>::default(),
            restricted_fids: None,
+            prefix_search,
        })
    }

+    pub fn is_prefix_search_allowed(&self) -> bool {
+        self.prefix_search != PrefixSearch::Disabled
+    }
+
    pub fn attributes_to_search_on(
        &mut self,
        attributes_to_search_on: &'ctx [String],
--- a/crates/milli/src/search/new/query_term/parse_query.rs
+++ b/crates/milli/src/search/new/query_term/parse_query.rs
@ -28,6 +28,7 @@ pub fn located_query_terms_from_tokens(
    words_limit: Option<usize>,
 ) -> Result<ExtractedTokens> {
    let nbr_typos = number_of_typos_allowed(ctx)?;
+    let allow_prefix_search = ctx.is_prefix_search_allowed();

    let mut query_terms = Vec::new();

@ -94,7 +95,7 @@ pub fn located_query_terms_from_tokens(
                        ctx,
                        word,
                        nbr_typos(word),
-                        true,
+                        allow_prefix_search,
                        false,
                    )?;
                    let located_term = LocatedQueryTerm {
--- a/crates/milli/src/search/new/resolve_query_graph.rs
+++ b/crates/milli/src/search/new/resolve_query_graph.rs
@ -193,15 +193,23 @@ pub fn compute_phrase_docids(
    if words.is_empty() {
        return Ok(RoaringBitmap::new());
    }
-    let mut candidates = RoaringBitmap::new();
+    let mut candidates = None;
    for word in words.iter().flatten().copied() {
        if let Some(word_docids) = ctx.word_docids(None, Word::Original(word))? {
-            candidates |= word_docids;
+            if let Some(candidates) = candidates.as_mut() {
+                *candidates &= word_docids;
+            } else {
+                candidates = Some(word_docids);
+            }
        } else {
            return Ok(RoaringBitmap::new());
        }
    }

+    let Some(mut candidates) = candidates else {
+        return Ok(RoaringBitmap::new());
+    };
+
    let winsize = words.len().min(3);

    for win in words.windows(winsize) {
--- a/crates/milli/src/search/new/tests/integration.rs
+++ b/crates/milli/src/search/new/tests/integration.rs
@ -5,6 +5,7 @@ use bumpalo::Bump;
 use heed::EnvOpenOptions;
 use maplit::{btreemap, hashset};

+use crate::progress::Progress;
 use crate::update::new::indexer;
 use crate::update::{IndexDocumentsMethod, IndexerConfig, Settings};
 use crate::vector::EmbeddingConfigs;
@ -72,7 +73,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
            None,
            &mut new_fields_ids_map,
            &|| false,
-            &|_progress| (),
+            Progress::default(),
        )
        .unwrap();

@ -83,6 +84,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
    indexer::index(
        &mut wtxn,
        &index,
+        &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
        config.grenad_parameters(),
        &db_fields_ids_map,
        new_fields_ids_map,
@ -90,7 +92,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
        &document_changes,
        embedders,
        &|| false,
-        &|_| (),
+        &Progress::default(),
    )
    .unwrap();

--- a/crates/milli/src/update/facet/mod.rs
+++ b/crates/milli/src/update/facet/mod.rs
@ -172,6 +172,14 @@ impl<'i> FacetsUpdate<'i> {
            incremental_update.execute(wtxn)?;
        }

+        if !self.index.facet_search(wtxn)? {
+            // If facet search is disabled, we don't need to compute facet search databases.
+            // We clear the facet search databases.
+            self.index.facet_id_string_fst.clear(wtxn)?;
+            self.index.facet_id_normalized_string_strings.clear(wtxn)?;
+            return Ok(());
+        }
+
        match self.normalized_delta_data {
            Some(data) => index_facet_search(wtxn, data, self.index),
            None => Ok(()),
--- a/crates/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
+++ b/crates/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
@ -58,9 +58,9 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
        .map(|s| s.iter().map(String::as_str).collect());
    let old_dictionary: Option<Vec<_>> =
        settings_diff.old.dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
-    let del_builder =
+    let mut del_builder =
        tokenizer_builder(old_stop_words, old_separators.as_deref(), old_dictionary.as_deref());
-    let del_tokenizer = del_builder.into_tokenizer();
+    let del_tokenizer = del_builder.build();

    let new_stop_words = settings_diff.new.stop_words.as_ref();
    let new_separators: Option<Vec<_>> = settings_diff
@ -70,9 +70,9 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
        .map(|s| s.iter().map(String::as_str).collect());
    let new_dictionary: Option<Vec<_>> =
        settings_diff.new.dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
-    let add_builder =
+    let mut add_builder =
        tokenizer_builder(new_stop_words, new_separators.as_deref(), new_dictionary.as_deref());
-    let add_tokenizer = add_builder.into_tokenizer();
+    let add_tokenizer = add_builder.build();

    // iterate over documents.
    let mut cursor = obkv_documents.into_cursor()?;
--- a/crates/milli/src/update/index_documents/extract/extract_facet_string_docids.rs
+++ b/crates/milli/src/update/index_documents/extract/extract_facet_string_docids.rs
@ -34,10 +34,12 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
        extract_facet_string_docids_settings(docid_fid_facet_string, indexer, settings_diff)
    } else {
        let localized_field_ids = &settings_diff.new.localized_faceted_fields_ids;
+        let facet_search = settings_diff.new.facet_search;
        extract_facet_string_docids_document_update(
            docid_fid_facet_string,
            indexer,
            localized_field_ids,
+            facet_search,
        )
    }
 }
@ -51,6 +53,7 @@ fn extract_facet_string_docids_document_update<R: io::Read + io::Seek>(
    docid_fid_facet_string: grenad::Reader<R>,
    indexer: GrenadParameters,
    localized_field_ids: &LocalizedFieldIds,
+    facet_search: bool,
 ) -> Result<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)> {
    let max_memory = indexer.max_memory_by_thread();

@ -96,7 +99,7 @@ fn extract_facet_string_docids_document_update<R: io::Read + io::Seek>(
        let normalized_value = str::from_utf8(normalized_value_bytes)?;

        // Facet search normalization
-        {
+        if facet_search {
            let locales = localized_field_ids.locales(field_id);
            let hyper_normalized_value = normalize_facet_string(normalized_value, locales);

@ -179,8 +182,10 @@ fn extract_facet_string_docids_settings<R: io::Read + io::Seek>(
        let new_locales = settings_diff.new.localized_faceted_fields_ids.locales(field_id);

        let are_same_locales = old_locales == new_locales;
+        let reindex_facet_search =
+            settings_diff.new.facet_search && !settings_diff.old.facet_search;

-        if is_same_value && are_same_locales {
+        if is_same_value && are_same_locales && !reindex_facet_search {
            continue;
        }

@ -191,18 +196,26 @@ fn extract_facet_string_docids_settings<R: io::Read + io::Seek>(
        let normalized_value = str::from_utf8(normalized_value_bytes)?;

        // Facet search normalization
-        {
-            let old_hyper_normalized_value = normalize_facet_string(normalized_value, old_locales);
-            let new_hyper_normalized_value = if are_same_locales {
-                &old_hyper_normalized_value
+        if settings_diff.new.facet_search {
+            let new_hyper_normalized_value = normalize_facet_string(normalized_value, new_locales);
+            let old_hyper_normalized_value;
+            let old_hyper_normalized_value = if !settings_diff.old.facet_search
+                || deladd_reader.get(DelAdd::Deletion).is_none()
+            {
+                // if the facet search is disabled in the old settings or if no facet string is deleted,
+                // we don't need to normalize the facet string.
+                None
+            } else if are_same_locales {
+                Some(&new_hyper_normalized_value)
            } else {
-                &normalize_facet_string(normalized_value, new_locales)
+                old_hyper_normalized_value = normalize_facet_string(normalized_value, old_locales);
+                Some(&old_hyper_normalized_value)
            };

            let set = BTreeSet::from_iter(std::iter::once(normalized_value));

            // if the facet string is the same, we can put the deletion and addition in the same obkv.
-            if old_hyper_normalized_value == new_hyper_normalized_value.as_str() {
+            if old_hyper_normalized_value == Some(&new_hyper_normalized_value) {
                // nothing to do if we delete and re-add the value.
                if is_same_value {
                    continue;
@ -222,7 +235,7 @@ fn extract_facet_string_docids_settings<R: io::Read + io::Seek>(
            } else {
                // if the facet string is different, we need to insert the deletion and addition in different obkv because the related key is different.
                // deletion
-                if deladd_reader.get(DelAdd::Deletion).is_some() {
+                if let Some(old_hyper_normalized_value) = old_hyper_normalized_value {
                    // insert old value
                    let val = SerdeJson::bytes_encode(&set).map_err(heed::Error::Encoding)?;
                    buffer.clear();
--- a/crates/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
+++ b/crates/milli/src/update/index_documents/extract/extract_fid_docid_facet_values.rs
@ -80,7 +80,7 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
    let new_faceted_fids: BTreeSet<_> =
        settings_diff.new.faceted_fields_ids.iter().copied().collect();

-    if !settings_diff.settings_update_only || old_faceted_fids != new_faceted_fids {
+    if !settings_diff.settings_update_only || settings_diff.reindex_facets() {
        let mut cursor = obkv_documents.into_cursor()?;
        while let Some((docid_bytes, value)) = cursor.move_on_next()? {
            let obkv = obkv::KvReader::from_slice(value);
@ -112,8 +112,10 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
                        (field_id, None, add_value)
                    }
                    EitherOrBoth::Both(&field_id, _) => {
-                        // during settings update, recompute the changing settings only.
-                        if settings_diff.settings_update_only {
+                        // during settings update, recompute the changing settings only unless a global change is detected.
+                        if settings_diff.settings_update_only
+                            && !settings_diff.global_facet_settings_changed()
+                        {
                            continue;
                        }

--- a/crates/milli/src/update/index_documents/mod.rs
+++ b/crates/milli/src/update/index_documents/mod.rs
@ -29,6 +29,7 @@ pub use self::transform::{Transform, TransformOutput};
 use super::new::StdResult;
 use crate::documents::{obkv_to_object, DocumentsBatchReader};
 use crate::error::{Error, InternalError};
+use crate::index::{PrefixSearch, PrefixSettings};
 use crate::thread_pool_no_abort::ThreadPoolNoAbortBuilder;
 pub use crate::update::index_documents::helpers::CursorClonableMmap;
 use crate::update::{
@ -82,8 +83,6 @@ pub struct IndexDocuments<'t, 'i, 'a, FP, FA> {

 #[derive(Default, Debug, Clone)]
 pub struct IndexDocumentsConfig {
-    pub words_prefix_threshold: Option<u32>,
-    pub max_prefix_length: Option<usize>,
    pub words_positions_level_group_size: Option<NonZeroU32>,
    pub words_positions_min_level_size: Option<NonZeroU32>,
    pub update_method: IndexDocumentsMethod,
@ -565,14 +564,32 @@ where
            self.index.words_prefixes_fst(self.wtxn)?.map_data(|cow| cow.into_owned())?;

        // Run the words prefixes update operation.
-        let mut builder = WordsPrefixesFst::new(self.wtxn, self.index);
-        if let Some(value) = self.config.words_prefix_threshold {
-            builder.threshold(value);
+        let PrefixSettings { prefix_count_threshold, max_prefix_length, compute_prefixes } =
+            self.index.prefix_settings(self.wtxn)?;
+
+        // If the prefix search is enabled at indexing time, we compute the prefixes.
+        if compute_prefixes == PrefixSearch::IndexingTime {
+            let mut builder = WordsPrefixesFst::new(self.wtxn, self.index);
+            builder.threshold(prefix_count_threshold);
+            builder.max_prefix_length(max_prefix_length);
+            builder.execute()?;
+        } else {
+            // If the prefix search is disabled at indexing time, we delete the previous words prefixes fst.
+            // And all the associated docids databases.
+            self.index.delete_words_prefixes_fst(self.wtxn)?;
+            self.index.word_prefix_docids.clear(self.wtxn)?;
+            self.index.exact_word_prefix_docids.clear(self.wtxn)?;
+            self.index.word_prefix_position_docids.clear(self.wtxn)?;
+            self.index.word_prefix_fid_docids.clear(self.wtxn)?;
+
+            databases_seen += 3;
+            (self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
+                databases_seen,
+                total_databases: TOTAL_POSTING_DATABASE_COUNT,
+            });
+
+            return Ok(());
        }
-        if let Some(value) = self.config.max_prefix_length {
-            builder.max_prefix_length(value);
-        }
-        builder.execute()?;

        if (self.should_abort)() {
            return Err(Error::InternalError(InternalError::AbortedIndexation));
@ -749,6 +766,7 @@ mod tests {
    use crate::documents::mmap_from_objects;
    use crate::index::tests::TempIndex;
    use crate::index::IndexEmbeddingConfig;
+    use crate::progress::Progress;
    use crate::search::TermsMatchingStrategy;
    use crate::update::new::indexer;
    use crate::update::Setting;
@ -1947,7 +1965,7 @@ mod tests {
                None,
                &mut new_fields_ids_map,
                &|| false,
-                &|_progress| (),
+                Progress::default(),
            )
            .unwrap();

@ -2131,13 +2149,14 @@ mod tests {
                None,
                &mut new_fields_ids_map,
                &|| false,
-                &|_progress| (),
+                Progress::default(),
            )
            .unwrap();

        indexer::index(
            &mut wtxn,
            &index.inner,
+            &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
            indexer_config.grenad_parameters(),
            &db_fields_ids_map,
            new_fields_ids_map,
@ -2145,7 +2164,7 @@ mod tests {
            &document_changes,
            embedders,
            &|| false,
-            &|_| (),
+            &Progress::default(),
        )
        .unwrap();
        wtxn.commit().unwrap();
@ -2192,13 +2211,14 @@ mod tests {
                None,
                &mut new_fields_ids_map,
                &|| false,
-                &|_progress| (),
+                Progress::default(),
            )
            .unwrap();

        indexer::index(
            &mut wtxn,
            &index.inner,
+            &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
            indexer_config.grenad_parameters(),
            &db_fields_ids_map,
            new_fields_ids_map,
@ -2206,7 +2226,7 @@ mod tests {
            &document_changes,
            embedders,
            &|| false,
-            &|_| (),
+            &Progress::default(),
        )
        .unwrap();
        wtxn.commit().unwrap();
@ -2244,13 +2264,14 @@ mod tests {
                None,
                &mut new_fields_ids_map,
                &|| false,
-                &|_progress| (),
+                Progress::default(),
            )
            .unwrap();

        indexer::index(
            &mut wtxn,
            &index.inner,
+            &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
            indexer_config.grenad_parameters(),
            &db_fields_ids_map,
            new_fields_ids_map,
@ -2258,7 +2279,7 @@ mod tests {
            &document_changes,
            embedders,
            &|| false,
-            &|_| (),
+            &Progress::default(),
        )
        .unwrap();
        wtxn.commit().unwrap();
@ -2295,13 +2316,14 @@ mod tests {
                None,
                &mut new_fields_ids_map,
                &|| false,
-                &|_progress| (),
+                Progress::default(),
            )
            .unwrap();

        indexer::index(
            &mut wtxn,
            &index.inner,
+            &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
            indexer_config.grenad_parameters(),
            &db_fields_ids_map,
            new_fields_ids_map,
@ -2309,7 +2331,7 @@ mod tests {
            &document_changes,
            embedders,
            &|| false,
-            &|_| (),
+            &Progress::default(),
        )
        .unwrap();
        wtxn.commit().unwrap();
@ -2348,13 +2370,14 @@ mod tests {
                None,
                &mut new_fields_ids_map,
                &|| false,
-                &|_progress| (),
+                Progress::default(),
            )
            .unwrap();

        indexer::index(
            &mut wtxn,
            &index.inner,
+            &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
            indexer_config.grenad_parameters(),
            &db_fields_ids_map,
            new_fields_ids_map,
@ -2362,7 +2385,7 @@ mod tests {
            &document_changes,
            embedders,
            &|| false,
-            &|_| (),
+            &Progress::default(),
        )
        .unwrap();
        wtxn.commit().unwrap();
@ -2406,13 +2429,14 @@ mod tests {
                None,
                &mut new_fields_ids_map,
                &|| false,
-                &|_progress| (),
+                Progress::default(),
            )
            .unwrap();

        indexer::index(
            &mut wtxn,
            &index.inner,
+            &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
            indexer_config.grenad_parameters(),
            &db_fields_ids_map,
            new_fields_ids_map,
@ -2420,7 +2444,7 @@ mod tests {
            &document_changes,
            embedders,
            &|| false,
-            &|_| (),
+            &Progress::default(),
        )
        .unwrap();
        wtxn.commit().unwrap();
@ -2457,13 +2481,14 @@ mod tests {
                None,
                &mut new_fields_ids_map,
                &|| false,
-                &|_progress| (),
+                Progress::default(),
            )
            .unwrap();

        indexer::index(
            &mut wtxn,
            &index.inner,
+            &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
            indexer_config.grenad_parameters(),
            &db_fields_ids_map,
            new_fields_ids_map,
@ -2471,7 +2496,7 @@ mod tests {
            &document_changes,
            embedders,
            &|| false,
-            &|_| (),
+            &Progress::default(),
        )
        .unwrap();
        wtxn.commit().unwrap();
@ -2508,13 +2533,14 @@ mod tests {
                None,
                &mut new_fields_ids_map,
                &|| false,
-                &|_progress| (),
+                Progress::default(),
            )
            .unwrap();

        indexer::index(
            &mut wtxn,
            &index.inner,
+            &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
            indexer_config.grenad_parameters(),
            &db_fields_ids_map,
            new_fields_ids_map,
@ -2522,7 +2548,7 @@ mod tests {
            &document_changes,
            embedders,
            &|| false,
-            &|_| (),
+            &Progress::default(),
        )
        .unwrap();
        wtxn.commit().unwrap();
@ -2701,13 +2727,14 @@ mod tests {
                None,
                &mut new_fields_ids_map,
                &|| false,
-                &|_progress| (),
+                Progress::default(),
            )
            .unwrap();

        indexer::index(
            &mut wtxn,
            &index.inner,
+            &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
            indexer_config.grenad_parameters(),
            &db_fields_ids_map,
            new_fields_ids_map,
@ -2715,7 +2742,7 @@ mod tests {
            &document_changes,
            embedders,
            &|| false,
-            &|_| (),
+            &Progress::default(),
        )
        .unwrap();
        wtxn.commit().unwrap();
@ -2759,13 +2786,14 @@ mod tests {
                None,
                &mut new_fields_ids_map,
                &|| false,
-                &|_progress| (),
+                Progress::default(),
            )
            .unwrap();

        indexer::index(
            &mut wtxn,
            &index.inner,
+            &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
            indexer_config.grenad_parameters(),
            &db_fields_ids_map,
            new_fields_ids_map,
@ -2773,7 +2801,7 @@ mod tests {
            &document_changes,
            embedders,
            &|| false,
-            &|_| (),
+            &Progress::default(),
        )
        .unwrap();
        wtxn.commit().unwrap();
@ -2814,13 +2842,14 @@ mod tests {
                None,
                &mut new_fields_ids_map,
                &|| false,
-                &|_progress| (),
+                Progress::default(),
            )
            .unwrap();

        indexer::index(
            &mut wtxn,
            &index.inner,
+            &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(),
            indexer_config.grenad_parameters(),
            &db_fields_ids_map,
            new_fields_ids_map,
@ -2828,7 +2857,7 @@ mod tests {
            &document_changes,
            embedders,
            &|| false,
-            &|_| (),
+            &Progress::default(),
        )
        .unwrap();
        wtxn.commit().unwrap();
--- a/crates/milli/src/update/index_documents/transform.rs
+++ b/crates/milli/src/update/index_documents/transform.rs
@ -667,14 +667,23 @@ impl<'a, 'i> Transform<'a, 'i> {
        let is_primary_key = |id: FieldId| -> bool { settings_diff.primary_key_id == Some(id) };

        // If only a faceted field has been added, keep only this field.
-        let must_reindex_facets = settings_diff.reindex_facets();
-        let necessary_faceted_field = |id: FieldId| -> bool {
-            let field_name = settings_diff.new.fields_ids_map.name(id).unwrap();
-            must_reindex_facets
-                && modified_faceted_fields
-                    .iter()
-                    .any(|long| is_faceted_by(long, field_name) || is_faceted_by(field_name, long))
-        };
+        let global_facet_settings_changed = settings_diff.global_facet_settings_changed();
+        let facet_fids_changed = settings_diff.facet_fids_changed();
+        let necessary_faceted_field =
+            |id: FieldId| -> bool {
+                let field_name = settings_diff.new.fields_ids_map.name(id).unwrap();
+                if global_facet_settings_changed {
+                    settings_diff.new.user_defined_faceted_fields.iter().any(|long| {
+                        is_faceted_by(long, field_name) || is_faceted_by(field_name, long)
+                    })
+                } else if facet_fids_changed {
+                    modified_faceted_fields.iter().any(|long| {
+                        is_faceted_by(long, field_name) || is_faceted_by(field_name, long)
+                    })
+                } else {
+                    false
+                }
+            };

        // Alway provide all fields when vectors are involved because
        // we need the fields for the prompt/templating.
--- a/crates/milli/src/update/new/channel.rs
+++ b/crates/milli/src/update/new/channel.rs
--- a/crates/milli/src/update/new/document.rs
+++ b/crates/milli/src/update/new/document.rs
@ -1,7 +1,8 @@
 use std::collections::{BTreeMap, BTreeSet};

+use bumparaw_collections::RawMap;
 use heed::RoTxn;
-use raw_collections::RawMap;
+use rustc_hash::FxBuildHasher;
 use serde_json::value::RawValue;

 use super::vector_document::VectorDocument;
@ -385,12 +386,12 @@ pub type Entry<'doc> = (&'doc str, &'doc RawValue);

 #[derive(Debug)]
 pub struct Versions<'doc> {
-    data: RawMap<'doc>,
+    data: RawMap<'doc, FxBuildHasher>,
 }

 impl<'doc> Versions<'doc> {
    pub fn multiple(
-        mut versions: impl Iterator<Item = Result<RawMap<'doc>>>,
+        mut versions: impl Iterator<Item = Result<RawMap<'doc, FxBuildHasher>>>,
    ) -> Result<Option<Self>> {
        let Some(data) = versions.next() else { return Ok(None) };
        let mut data = data?;
@ -403,7 +404,7 @@ impl<'doc> Versions<'doc> {
        Ok(Some(Self::single(data)))
    }

-    pub fn single(version: RawMap<'doc>) -> Self {
+    pub fn single(version: RawMap<'doc, FxBuildHasher>) -> Self {
        Self { data: version }
    }

--- a/crates/milli/src/update/new/document_change.rs
+++ b/crates/milli/src/update/new/document_change.rs
@ -1,7 +1,10 @@
 use bumpalo::Bump;
 use heed::RoTxn;

-use super::document::{DocumentFromDb, DocumentFromVersions, MergedDocument, Versions};
+use super::document::{
+    Document as _, DocumentFromDb, DocumentFromVersions, MergedDocument, Versions,
+};
+use super::extract::perm_json_p;
 use super::vector_document::{
    MergedVectorDocument, VectorDocumentFromDb, VectorDocumentFromVersions,
 };
@ -164,6 +167,80 @@ impl<'doc> Update<'doc> {
        }
    }

+    /// Returns whether the updated version of the document is different from the current version for the passed subset of fields.
+    ///
+    /// `true` if at least one top-level-field that is a exactly a member of field or a parent of a member of field changed.
+    /// Otherwise `false`.
+    pub fn has_changed_for_fields<'t, Mapper: FieldIdMapper>(
+        &self,
+        fields: Option<&[&str]>,
+        rtxn: &'t RoTxn,
+        index: &'t Index,
+        mapper: &'t Mapper,
+    ) -> Result<bool> {
+        let mut changed = false;
+        let mut cached_current = None;
+        let mut updated_selected_field_count = 0;
+
+        for entry in self.updated().iter_top_level_fields() {
+            let (key, updated_value) = entry?;
+
+            if perm_json_p::select_field(key, fields, &[]) == perm_json_p::Selection::Skip {
+                continue;
+            }
+
+            updated_selected_field_count += 1;
+            let current = match cached_current {
+                Some(current) => current,
+                None => self.current(rtxn, index, mapper)?,
+            };
+            let current_value = current.top_level_field(key)?;
+            let Some(current_value) = current_value else {
+                changed = true;
+                break;
+            };
+
+            if current_value.get() != updated_value.get() {
+                changed = true;
+                break;
+            }
+            cached_current = Some(current);
+        }
+
+        if !self.has_deletion {
+            // no field deletion, so fields that don't appear in `updated` cannot have changed
+            return Ok(changed);
+        }
+
+        if changed {
+            return Ok(true);
+        }
+
+        // we saw all updated fields, and set `changed` if any field wasn't in `current`.
+        // so if there are as many fields in `current` as in `updated`, then nothing changed.
+        // If there is any more fields in `current`, then they are missing in `updated`.
+        let has_deleted_fields = {
+            let current = match cached_current {
+                Some(current) => current,
+                None => self.current(rtxn, index, mapper)?,
+            };
+
+            let mut current_selected_field_count = 0;
+            for entry in current.iter_top_level_fields() {
+                let (key, _) = entry?;
+
+                if perm_json_p::select_field(key, fields, &[]) == perm_json_p::Selection::Skip {
+                    continue;
+                }
+                current_selected_field_count += 1;
+            }
+
+            current_selected_field_count != updated_selected_field_count
+        };
+
+        Ok(has_deleted_fields)
+    }
+
    pub fn updated_vectors(
        &self,
        doc_alloc: &'doc Bump,
--- a/crates/milli/src/update/new/extract/cache.rs
+++ b/crates/milli/src/update/new/extract/cache.rs
@ -69,12 +69,12 @@ use std::io::BufReader;
 use std::{io, iter, mem};

 use bumpalo::Bump;
+use bumparaw_collections::bbbul::{BitPacker, BitPacker4x};
+use bumparaw_collections::map::FrozenMap;
+use bumparaw_collections::{Bbbul, FrozenBbbul};
 use grenad::ReaderCursor;
 use hashbrown::hash_map::RawEntryMut;
 use hashbrown::HashMap;
-use raw_collections::bbbul::{BitPacker, BitPacker4x};
-use raw_collections::map::FrozenMap;
-use raw_collections::{Bbbul, FrozenBbbul};
 use roaring::RoaringBitmap;
 use rustc_hash::FxBuildHasher;

@ -177,12 +177,12 @@ impl<'extractor> BalancedCaches<'extractor> {
        Ok(())
    }

-    pub fn freeze(&mut self) -> Result<Vec<FrozenCache<'_, 'extractor>>> {
+    pub fn freeze(&mut self, source_id: usize) -> Result<Vec<FrozenCache<'_, 'extractor>>> {
        match &mut self.caches {
            InnerCaches::Normal(NormalCaches { caches }) => caches
                .iter_mut()
                .enumerate()
-                .map(|(bucket, map)| {
+                .map(|(bucket_id, map)| {
                    // safety: we are transmuting the Bbbul into a FrozenBbbul
                    //         that are the same size.
                    let map = unsafe {
@ -201,14 +201,19 @@ impl<'extractor> BalancedCaches<'extractor> {
                            >,
                        >(map)
                    };
-                    Ok(FrozenCache { bucket, cache: FrozenMap::new(map), spilled: Vec::new() })
+                    Ok(FrozenCache {
+                        source_id,
+                        bucket_id,
+                        cache: FrozenMap::new(map),
+                        spilled: Vec::new(),
+                    })
                })
                .collect(),
            InnerCaches::Spilling(SpillingCaches { caches, spilled_entries, .. }) => caches
                .iter_mut()
                .zip(mem::take(spilled_entries))
                .enumerate()
-                .map(|(bucket, (map, sorter))| {
+                .map(|(bucket_id, (map, sorter))| {
                    let spilled = sorter
                        .into_reader_cursors()?
                        .into_iter()
@ -234,7 +239,7 @@ impl<'extractor> BalancedCaches<'extractor> {
                            >,
                        >(map)
                    };
-                    Ok(FrozenCache { bucket, cache: FrozenMap::new(map), spilled })
+                    Ok(FrozenCache { source_id, bucket_id, cache: FrozenMap::new(map), spilled })
                })
                .collect(),
        }
@ -415,21 +420,21 @@ fn spill_entry_to_sorter(
    match deladd {
        DelAddRoaringBitmap { del: Some(del), add: None } => {
            cbo_buffer.clear();
-            CboRoaringBitmapCodec::serialize_into(&del, cbo_buffer);
+            CboRoaringBitmapCodec::serialize_into_vec(&del, cbo_buffer);
            value_writer.insert(DelAdd::Deletion, &cbo_buffer)?;
        }
        DelAddRoaringBitmap { del: None, add: Some(add) } => {
            cbo_buffer.clear();
-            CboRoaringBitmapCodec::serialize_into(&add, cbo_buffer);
+            CboRoaringBitmapCodec::serialize_into_vec(&add, cbo_buffer);
            value_writer.insert(DelAdd::Addition, &cbo_buffer)?;
        }
        DelAddRoaringBitmap { del: Some(del), add: Some(add) } => {
            cbo_buffer.clear();
-            CboRoaringBitmapCodec::serialize_into(&del, cbo_buffer);
+            CboRoaringBitmapCodec::serialize_into_vec(&del, cbo_buffer);
            value_writer.insert(DelAdd::Deletion, &cbo_buffer)?;

            cbo_buffer.clear();
-            CboRoaringBitmapCodec::serialize_into(&add, cbo_buffer);
+            CboRoaringBitmapCodec::serialize_into_vec(&add, cbo_buffer);
            value_writer.insert(DelAdd::Addition, &cbo_buffer)?;
        }
        DelAddRoaringBitmap { del: None, add: None } => return Ok(()),
@ -440,7 +445,8 @@ fn spill_entry_to_sorter(
 }

 pub struct FrozenCache<'a, 'extractor> {
-    bucket: usize,
+    bucket_id: usize,
+    source_id: usize,
    cache: FrozenMap<
        'a,
        'extractor,
@ -457,40 +463,36 @@ pub fn transpose_and_freeze_caches<'a, 'extractor>(
    let width = caches.first().map(BalancedCaches::buckets).unwrap_or(0);
    let mut bucket_caches: Vec<_> = iter::repeat_with(Vec::new).take(width).collect();

-    for thread_cache in caches {
-        for frozen in thread_cache.freeze()? {
-            bucket_caches[frozen.bucket].push(frozen);
+    for (thread_index, thread_cache) in caches.iter_mut().enumerate() {
+        for frozen in thread_cache.freeze(thread_index)? {
+            bucket_caches[frozen.bucket_id].push(frozen);
        }
    }

    Ok(bucket_caches)
 }

-/// Merges the caches that must be all associated to the same bucket.
+/// Merges the caches that must be all associated to the same bucket
+/// but make sure to sort the different buckets before performing the merges.
 ///
 /// # Panics
 ///
 /// - If the bucket IDs in these frozen caches are not exactly the same.
-pub fn merge_caches<F>(frozen: Vec<FrozenCache>, mut f: F) -> Result<()>
+pub fn merge_caches_sorted<F>(frozen: Vec<FrozenCache>, mut f: F) -> Result<()>
 where
    F: for<'a> FnMut(&'a [u8], DelAddRoaringBitmap) -> Result<()>,
 {
    let mut maps = Vec::new();
-    let mut readers = Vec::new();
-    let mut current_bucket = None;
-    for FrozenCache { bucket, cache, ref mut spilled } in frozen {
-        assert_eq!(*current_bucket.get_or_insert(bucket), bucket);
-        maps.push(cache);
-        readers.append(spilled);
-    }
-
-    // First manage the spilled entries by looking into the HashMaps,
-    // merge them and mark them as dummy.
    let mut heap = BinaryHeap::new();
-    for (source_index, source) in readers.into_iter().enumerate() {
-        let mut cursor = source.into_cursor()?;
-        if cursor.move_on_next()?.is_some() {
-            heap.push(Entry { cursor, source_index });
+    let mut current_bucket = None;
+    for FrozenCache { source_id, bucket_id, cache, spilled } in frozen {
+        assert_eq!(*current_bucket.get_or_insert(bucket_id), bucket_id);
+        maps.push((source_id, cache));
+        for reader in spilled {
+            let mut cursor = reader.into_cursor()?;
+            if cursor.move_on_next()?.is_some() {
+                heap.push(Entry { cursor, source_id });
+            }
        }
    }

@ -507,25 +509,29 @@ where

        let mut output = DelAddRoaringBitmap::from_bytes(first_value)?;
        while let Some(mut entry) = heap.peek_mut() {
-            if let Some((key, _value)) = entry.cursor.current() {
-                if first_key == key {
-                    let new = DelAddRoaringBitmap::from_bytes(first_value)?;
-                    output = output.merge(new);
-                    // When we are done we the current value of this entry move make
-                    // it move forward and let the heap reorganize itself (on drop)
-                    if entry.cursor.move_on_next()?.is_none() {
-                        PeekMut::pop(entry);
-                    }
-                } else {
+            if let Some((key, value)) = entry.cursor.current() {
+                if first_key != key {
                    break;
                }
+
+                let new = DelAddRoaringBitmap::from_bytes(value)?;
+                output = output.merge(new);
+                // When we are done we the current value of this entry move make
+                // it move forward and let the heap reorganize itself (on drop)
+                if entry.cursor.move_on_next()?.is_none() {
+                    PeekMut::pop(entry);
+                }
            }
        }

        // Once we merged all of the spilled bitmaps we must also
        // fetch the entries from the non-spilled entries (the HashMaps).
-        for (map_index, map) in maps.iter_mut().enumerate() {
-            if first_entry.source_index != map_index {
+        for (source_id, map) in maps.iter_mut() {
+            debug_assert!(
+                !(map.get(first_key).is_some() && first_entry.source_id == *source_id),
+                "A thread should not have spiled a key that has been inserted in the cache"
+            );
+            if first_entry.source_id != *source_id {
                if let Some(new) = map.get_mut(first_key) {
                    output.union_and_clear_bbbul(new);
                }
@ -537,22 +543,22 @@ where

        // Don't forget to put the first entry back into the heap.
        if first_entry.cursor.move_on_next()?.is_some() {
-            heap.push(first_entry)
+            heap.push(first_entry);
        }
    }

    // Then manage the content on the HashMap entries that weren't taken (mem::take).
-    while let Some(mut map) = maps.pop() {
-        for (key, bbbul) in map.iter_mut() {
-            // Make sure we don't try to work with entries already managed by the spilled
-            if bbbul.is_empty() {
-                continue;
-            }
+    while let Some((_, mut map)) = maps.pop() {
+        // Make sure we don't try to work with entries already managed by the spilled
+        let mut ordered_entries: Vec<_> =
+            map.iter_mut().filter(|(_, bbbul)| !bbbul.is_empty()).collect();
+        ordered_entries.sort_unstable_by_key(|(key, _)| *key);

+        for (key, bbbul) in ordered_entries {
            let mut output = DelAddRoaringBitmap::empty();
            output.union_and_clear_bbbul(bbbul);

-            for rhs in maps.iter_mut() {
+            for (_, rhs) in maps.iter_mut() {
                if let Some(new) = rhs.get_mut(key) {
                    output.union_and_clear_bbbul(new);
                }
@ -568,14 +574,14 @@ where

 struct Entry<R> {
    cursor: ReaderCursor<R>,
-    source_index: usize,
+    source_id: usize,
 }

 impl<R> Ord for Entry<R> {
    fn cmp(&self, other: &Entry<R>) -> Ordering {
        let skey = self.cursor.current().map(|(k, _)| k);
        let okey = other.cursor.current().map(|(k, _)| k);
-        skey.cmp(&okey).then(self.source_index.cmp(&other.source_index)).reverse()
+        skey.cmp(&okey).then(self.source_id.cmp(&other.source_id)).reverse()
    }
 }

--- a/crates/milli/src/update/new/extract/documents.rs
+++ b/crates/milli/src/update/new/extract/documents.rs
@ -12,13 +12,14 @@ use crate::update::new::thread_local::FullySend;
 use crate::update::new::DocumentChange;
 use crate::vector::EmbeddingConfigs;
 use crate::Result;
-pub struct DocumentsExtractor<'a> {
-    document_sender: &'a DocumentsSender<'a>,
+
+pub struct DocumentsExtractor<'a, 'b> {
+    document_sender: DocumentsSender<'a, 'b>,
    embedders: &'a EmbeddingConfigs,
 }

-impl<'a> DocumentsExtractor<'a> {
-    pub fn new(document_sender: &'a DocumentsSender<'a>, embedders: &'a EmbeddingConfigs) -> Self {
+impl<'a, 'b> DocumentsExtractor<'a, 'b> {
+    pub fn new(document_sender: DocumentsSender<'a, 'b>, embedders: &'a EmbeddingConfigs) -> Self {
        Self { document_sender, embedders }
    }
 }
@ -29,7 +30,7 @@ pub struct DocumentExtractorData {
    pub field_distribution_delta: HashMap<String, i64>,
 }

-impl<'a, 'extractor> Extractor<'extractor> for DocumentsExtractor<'a> {
+impl<'a, 'b, 'extractor> Extractor<'extractor> for DocumentsExtractor<'a, 'b> {
    type Data = FullySend<RefCell<DocumentExtractorData>>;

    fn init_data(&self, _extractor_alloc: &'extractor Bump) -> Result<Self::Data> {
--- a/crates/milli/src/update/new/extract/faceted/extract_facets.rs
+++ b/crates/milli/src/update/new/extract/faceted/extract_facets.rs
@ -16,23 +16,23 @@ use crate::update::del_add::DelAdd;
 use crate::update::new::channel::FieldIdDocidFacetSender;
 use crate::update::new::extract::perm_json_p;
 use crate::update::new::indexer::document_changes::{
-    extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext, Progress,
+    extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext,
 };
 use crate::update::new::ref_cell_ext::RefCellExt as _;
-use crate::update::new::steps::Step;
+use crate::update::new::steps::IndexingStep;
 use crate::update::new::thread_local::{FullySend, ThreadLocal};
 use crate::update::new::DocumentChange;
 use crate::update::GrenadParameters;
 use crate::{DocumentId, FieldId, Index, Result, MAX_FACET_VALUE_LENGTH};

-pub struct FacetedExtractorData<'a> {
+pub struct FacetedExtractorData<'a, 'b> {
    attributes_to_extract: &'a [&'a str],
-    sender: &'a FieldIdDocidFacetSender<'a>,
+    sender: &'a FieldIdDocidFacetSender<'a, 'b>,
    grenad_parameters: GrenadParameters,
    buckets: usize,
 }

-impl<'a, 'extractor> Extractor<'extractor> for FacetedExtractorData<'a> {
+impl<'a, 'b, 'extractor> Extractor<'extractor> for FacetedExtractorData<'a, 'b> {
    type Data = RefCell<BalancedCaches<'extractor>>;

    fn init_data(&self, extractor_alloc: &'extractor Bump) -> Result<Self::Data> {
@ -97,6 +97,15 @@ impl FacetedDocidsExtractor {
                },
            ),
            DocumentChange::Update(inner) => {
+                if !inner.has_changed_for_fields(
+                    Some(attributes_to_extract),
+                    rtxn,
+                    index,
+                    context.db_fields_ids_map,
+                )? {
+                    return Ok(());
+                }
+
                extract_document_facets(
                    attributes_to_extract,
                    inner.current(rtxn, index, context.db_fields_ids_map)?,
@ -318,7 +327,7 @@ impl<'doc> DelAddFacetValue<'doc> {
        docid: DocumentId,
        sender: &FieldIdDocidFacetSender,
        doc_alloc: &Bump,
-    ) -> std::result::Result<(), crossbeam_channel::SendError<()>> {
+    ) -> crate::Result<()> {
        let mut buffer = bumpalo::collections::Vec::new_in(doc_alloc);
        for ((fid, value), deladd) in self.strings {
            if let Ok(s) = std::str::from_utf8(&value) {
@ -364,26 +373,16 @@ fn truncate_str(s: &str) -> &str {

 impl FacetedDocidsExtractor {
    #[tracing::instrument(level = "trace", skip_all, target = "indexing::extract::faceted")]
-    pub fn run_extraction<
-        'pl,
-        'fid,
-        'indexer,
-        'index,
-        'extractor,
-        DC: DocumentChanges<'pl>,
-        MSP,
-        SP,
-    >(
+    pub fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
        grenad_parameters: GrenadParameters,
        document_changes: &DC,
-        indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>,
+        indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
        extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
        sender: &FieldIdDocidFacetSender,
-        step: Step,
+        step: IndexingStep,
    ) -> Result<Vec<BalancedCaches<'extractor>>>
    where
        MSP: Fn() -> bool + Sync,
-        SP: Fn(Progress) + Sync,
    {
        let index = indexing_context.index;
        let rtxn = index.read_txn()?;
--- a/crates/milli/src/update/new/extract/geo/mod.rs
+++ b/crates/milli/src/update/new/extract/geo/mod.rs
@ -1,6 +1,6 @@
 use std::cell::RefCell;
 use std::fs::File;
-use std::io::{self, BufReader, BufWriter, ErrorKind, Read, Write as _};
+use std::io::{self, BufReader, BufWriter, ErrorKind, Read, Seek as _, Write as _};
 use std::{iter, mem, result};

 use bumpalo::Bump;
@ -97,30 +97,34 @@ pub struct FrozenGeoExtractorData<'extractor> {
 impl<'extractor> FrozenGeoExtractorData<'extractor> {
    pub fn iter_and_clear_removed(
        &mut self,
-    ) -> impl IntoIterator<Item = io::Result<ExtractedGeoPoint>> + '_ {
-        mem::take(&mut self.removed)
+    ) -> io::Result<impl IntoIterator<Item = io::Result<ExtractedGeoPoint>> + '_> {
+        Ok(mem::take(&mut self.removed)
            .iter()
            .copied()
            .map(Ok)
-            .chain(iterator_over_spilled_geopoints(&mut self.spilled_removed))
+            .chain(iterator_over_spilled_geopoints(&mut self.spilled_removed)?))
    }

    pub fn iter_and_clear_inserted(
        &mut self,
-    ) -> impl IntoIterator<Item = io::Result<ExtractedGeoPoint>> + '_ {
-        mem::take(&mut self.inserted)
+    ) -> io::Result<impl IntoIterator<Item = io::Result<ExtractedGeoPoint>> + '_> {
+        Ok(mem::take(&mut self.inserted)
            .iter()
            .copied()
            .map(Ok)
-            .chain(iterator_over_spilled_geopoints(&mut self.spilled_inserted))
+            .chain(iterator_over_spilled_geopoints(&mut self.spilled_inserted)?))
    }
 }

 fn iterator_over_spilled_geopoints(
    spilled: &mut Option<BufReader<File>>,
-) -> impl IntoIterator<Item = io::Result<ExtractedGeoPoint>> + '_ {
+) -> io::Result<impl IntoIterator<Item = io::Result<ExtractedGeoPoint>> + '_> {
    let mut spilled = spilled.take();
-    iter::from_fn(move || match &mut spilled {
+    if let Some(spilled) = &mut spilled {
+        spilled.rewind()?;
+    }
+
+    Ok(iter::from_fn(move || match &mut spilled {
        Some(file) => {
            let geopoint_bytes = &mut [0u8; mem::size_of::<ExtractedGeoPoint>()];
            match file.read_exact(geopoint_bytes) {
@ -130,7 +134,7 @@ fn iterator_over_spilled_geopoints(
            }
        }
        None => None,
-    })
+    }))
 }

 impl<'extractor> Extractor<'extractor> for GeoExtractor {
@ -157,7 +161,9 @@ impl<'extractor> Extractor<'extractor> for GeoExtractor {
        let mut data_ref = context.data.borrow_mut_or_yield();

        for change in changes {
-            if max_memory.map_or(false, |mm| context.extractor_alloc.allocated_bytes() >= mm) {
+            if data_ref.spilled_removed.is_none()
+                && max_memory.map_or(false, |mm| context.extractor_alloc.allocated_bytes() >= mm)
+            {
                // We must spill as we allocated too much memory
                data_ref.spilled_removed = tempfile::tempfile().map(BufWriter::new).map(Some)?;
                data_ref.spilled_inserted = tempfile::tempfile().map(BufWriter::new).map(Some)?;
--- a/crates/milli/src/update/new/extract/mod.rs
+++ b/crates/milli/src/update/new/extract/mod.rs
@ -6,30 +6,31 @@ mod searchable;
 mod vectors;

 use bumpalo::Bump;
-pub use cache::{merge_caches, transpose_and_freeze_caches, BalancedCaches, DelAddRoaringBitmap};
+pub use cache::{
+    merge_caches_sorted, transpose_and_freeze_caches, BalancedCaches, DelAddRoaringBitmap,
+};
 pub use documents::*;
 pub use faceted::*;
 pub use geo::*;
 pub use searchable::*;
 pub use vectors::EmbeddingExtractor;

-use super::indexer::document_changes::{DocumentChanges, IndexingContext, Progress};
-use super::steps::Step;
+use super::indexer::document_changes::{DocumentChanges, IndexingContext};
+use super::steps::IndexingStep;
 use super::thread_local::{FullySend, ThreadLocal};
 use crate::update::GrenadParameters;
 use crate::Result;

 pub trait DocidsExtractor {
-    fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP, SP>(
+    fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
        grenad_parameters: GrenadParameters,
        document_changes: &DC,
-        indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>,
+        indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
        extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
-        step: Step,
+        step: IndexingStep,
    ) -> Result<Vec<BalancedCaches<'extractor>>>
    where
-        MSP: Fn() -> bool + Sync,
-        SP: Fn(Progress) + Sync;
+        MSP: Fn() -> bool + Sync;
 }

 /// TODO move in permissive json pointer
--- a/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs
+++ b/crates/milli/src/update/new/extract/searchable/extract_word_docids.rs
@ -11,10 +11,10 @@ use super::tokenize_document::{tokenizer_builder, DocumentTokenizer};
 use crate::update::new::extract::cache::BalancedCaches;
 use crate::update::new::extract::perm_json_p::contained_in;
 use crate::update::new::indexer::document_changes::{
-    extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext, Progress,
+    extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext,
 };
 use crate::update::new::ref_cell_ext::RefCellExt as _;
-use crate::update::new::steps::Step;
+use crate::update::new::steps::IndexingStep;
 use crate::update::new::thread_local::{FullySend, MostlySend, ThreadLocal};
 use crate::update::new::DocumentChange;
 use crate::update::GrenadParameters;
@ -28,7 +28,7 @@ pub struct WordDocidsBalancedCaches<'extractor> {
    exact_word_docids: BalancedCaches<'extractor>,
    word_position_docids: BalancedCaches<'extractor>,
    fid_word_count_docids: BalancedCaches<'extractor>,
-    fid_word_count: HashMap<FieldId, (usize, usize)>,
+    fid_word_count: HashMap<FieldId, (Option<usize>, Option<usize>)>,
    current_docid: Option<DocumentId>,
 }

@ -85,8 +85,8 @@ impl<'extractor> WordDocidsBalancedCaches<'extractor> {

        self.fid_word_count
            .entry(field_id)
-            .and_modify(|(_current_count, new_count)| *new_count += 1)
-            .or_insert((0, 1));
+            .and_modify(|(_current_count, new_count)| *new_count.get_or_insert(0) += 1)
+            .or_insert((None, Some(1)));
        self.current_docid = Some(docid);

        Ok(())
@ -130,8 +130,8 @@ impl<'extractor> WordDocidsBalancedCaches<'extractor> {

        self.fid_word_count
            .entry(field_id)
-            .and_modify(|(current_count, _new_count)| *current_count += 1)
-            .or_insert((1, 0));
+            .and_modify(|(current_count, _new_count)| *current_count.get_or_insert(0) += 1)
+            .or_insert((Some(1), None));

        self.current_docid = Some(docid);

@ -141,14 +141,18 @@ impl<'extractor> WordDocidsBalancedCaches<'extractor> {
    fn flush_fid_word_count(&mut self, buffer: &mut BumpVec<u8>) -> Result<()> {
        for (fid, (current_count, new_count)) in self.fid_word_count.drain() {
            if current_count != new_count {
-                if current_count <= MAX_COUNTED_WORDS {
+                if let Some(current_count) =
+                    current_count.filter(|current_count| *current_count <= MAX_COUNTED_WORDS)
+                {
                    buffer.clear();
                    buffer.extend_from_slice(&fid.to_be_bytes());
                    buffer.push(current_count as u8);
                    self.fid_word_count_docids
                        .insert_del_u32(buffer, self.current_docid.unwrap())?;
                }
-                if new_count <= MAX_COUNTED_WORDS {
+                if let Some(new_count) =
+                    new_count.filter(|new_count| *new_count <= MAX_COUNTED_WORDS)
+                {
                    buffer.clear();
                    buffer.extend_from_slice(&fid.to_be_bytes());
                    buffer.push(new_count as u8);
@ -235,25 +239,15 @@ impl<'a, 'extractor> Extractor<'extractor> for WordDocidsExtractorData<'a> {
 pub struct WordDocidsExtractors;

 impl WordDocidsExtractors {
-    pub fn run_extraction<
-        'pl,
-        'fid,
-        'indexer,
-        'index,
-        'extractor,
-        DC: DocumentChanges<'pl>,
-        MSP,
-        SP,
-    >(
+    pub fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
        grenad_parameters: GrenadParameters,
        document_changes: &DC,
-        indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>,
+        indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
        extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
-        step: Step,
+        step: IndexingStep,
    ) -> Result<WordDocidsCaches<'extractor>>
    where
        MSP: Fn() -> bool + Sync,
-        SP: Fn(Progress) + Sync,
    {
        let index = indexing_context.index;
        let rtxn = index.read_txn()?;
@ -351,6 +345,15 @@ impl WordDocidsExtractors {
                )?;
            }
            DocumentChange::Update(inner) => {
+                if !inner.has_changed_for_fields(
+                    document_tokenizer.attribute_to_extract,
+                    &context.rtxn,
+                    context.index,
+                    context.db_fields_ids_map,
+                )? {
+                    return Ok(());
+                }
+
                let mut token_fn = |fname: &str, fid, pos, word: &str| {
                    cached_sorter.insert_del_u32(
                        fid,
--- a/crates/milli/src/update/new/extract/searchable/extract_word_pair_proximity_docids.rs
+++ b/crates/milli/src/update/new/extract/searchable/extract_word_pair_proximity_docids.rs
@ -70,6 +70,15 @@ impl SearchableExtractor for WordPairProximityDocidsExtractor {
                )?;
            }
            DocumentChange::Update(inner) => {
+                if !inner.has_changed_for_fields(
+                    document_tokenizer.attribute_to_extract,
+                    rtxn,
+                    index,
+                    context.db_fields_ids_map,
+                )? {
+                    return Ok(());
+                }
+
                let document = inner.current(rtxn, index, context.db_fields_ids_map)?;
                process_document_tokens(
                    document,
--- a/crates/milli/src/update/new/extract/searchable/mod.rs
+++ b/crates/milli/src/update/new/extract/searchable/mod.rs
@ -14,9 +14,9 @@ use tokenize_document::{tokenizer_builder, DocumentTokenizer};
 use super::cache::BalancedCaches;
 use super::DocidsExtractor;
 use crate::update::new::indexer::document_changes::{
-    extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext, Progress,
+    extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext,
 };
-use crate::update::new::steps::Step;
+use crate::update::new::steps::IndexingStep;
 use crate::update::new::thread_local::{FullySend, ThreadLocal};
 use crate::update::new::DocumentChange;
 use crate::update::GrenadParameters;
@ -56,16 +56,15 @@ impl<'a, 'extractor, EX: SearchableExtractor + Sync> Extractor<'extractor>
 }

 pub trait SearchableExtractor: Sized + Sync {
-    fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP, SP>(
+    fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
        grenad_parameters: GrenadParameters,
        document_changes: &DC,
-        indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>,
+        indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
        extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
-        step: Step,
+        step: IndexingStep,
    ) -> Result<Vec<BalancedCaches<'extractor>>>
    where
        MSP: Fn() -> bool + Sync,
-        SP: Fn(Progress) + Sync,
    {
        let rtxn = indexing_context.index.read_txn()?;
        let stop_words = indexing_context.index.stop_words(&rtxn)?;
@ -134,16 +133,15 @@ pub trait SearchableExtractor: Sized + Sync {
 }

 impl<T: SearchableExtractor> DocidsExtractor for T {
-    fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP, SP>(
+    fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
        grenad_parameters: GrenadParameters,
        document_changes: &DC,
-        indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>,
+        indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
        extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
-        step: Step,
+        step: IndexingStep,
    ) -> Result<Vec<BalancedCaches<'extractor>>>
    where
        MSP: Fn() -> bool + Sync,
-        SP: Fn(Progress) + Sync,
    {
        Self::run_extraction(
            grenad_parameters,
--- a/crates/milli/src/update/new/extract/searchable/tokenize_document.rs
+++ b/crates/milli/src/update/new/extract/searchable/tokenize_document.rs
@ -176,9 +176,10 @@ pub fn tokenizer_builder<'a>(
 #[cfg(test)]
 mod test {
    use bumpalo::Bump;
+    use bumparaw_collections::RawMap;
    use charabia::TokenizerBuilder;
    use meili_snap::snapshot;
-    use raw_collections::RawMap;
+    use rustc_hash::FxBuildHasher;
    use serde_json::json;
    use serde_json::value::RawValue;

@ -234,7 +235,7 @@ mod test {

        let bump = Bump::new();
        let document: &RawValue = serde_json::from_str(&document).unwrap();
-        let document = RawMap::from_raw_value(document, &bump).unwrap();
+        let document = RawMap::from_raw_value_and_hasher(document, FxBuildHasher, &bump).unwrap();

        let document = Versions::single(document);
        let document = DocumentFromVersions::new(&document);
--- a/crates/milli/src/update/new/extract/vectors/mod.rs
+++ b/crates/milli/src/update/new/extract/vectors/mod.rs
@ -18,17 +18,17 @@ use crate::vector::error::{
 use crate::vector::{Embedder, Embedding, EmbeddingConfigs};
 use crate::{DocumentId, FieldDistribution, InternalError, Result, ThreadPoolNoAbort, UserError};

-pub struct EmbeddingExtractor<'a> {
+pub struct EmbeddingExtractor<'a, 'b> {
    embedders: &'a EmbeddingConfigs,
-    sender: &'a EmbeddingSender<'a>,
+    sender: EmbeddingSender<'a, 'b>,
    possible_embedding_mistakes: PossibleEmbeddingMistakes,
    threads: &'a ThreadPoolNoAbort,
 }

-impl<'a> EmbeddingExtractor<'a> {
+impl<'a, 'b> EmbeddingExtractor<'a, 'b> {
    pub fn new(
        embedders: &'a EmbeddingConfigs,
-        sender: &'a EmbeddingSender<'a>,
+        sender: EmbeddingSender<'a, 'b>,
        field_distribution: &'a FieldDistribution,
        threads: &'a ThreadPoolNoAbort,
    ) -> Self {
@ -43,7 +43,7 @@ pub struct EmbeddingExtractorData<'extractor>(

 unsafe impl MostlySend for EmbeddingExtractorData<'_> {}

-impl<'a, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a> {
+impl<'a, 'b, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a, 'b> {
    type Data = RefCell<EmbeddingExtractorData<'extractor>>;

    fn init_data<'doc>(&'doc self, extractor_alloc: &'extractor Bump) -> crate::Result<Self::Data> {
@ -130,6 +130,7 @@ impl<'a, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a> {
                                );
                            } else if new_vectors.regenerate {
                                let new_rendered = prompt.render_document(
+                                    update.external_document_id(),
                                    update.current(
                                        &context.rtxn,
                                        context.index,
@ -139,6 +140,7 @@ impl<'a, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a> {
                                    &context.doc_alloc,
                                )?;
                                let old_rendered = prompt.render_document(
+                                    update.external_document_id(),
                                    update.merged(
                                        &context.rtxn,
                                        context.index,
@ -158,6 +160,7 @@ impl<'a, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a> {
                            }
                        } else if old_vectors.regenerate {
                            let old_rendered = prompt.render_document(
+                                update.external_document_id(),
                                update.current(
                                    &context.rtxn,
                                    context.index,
@ -167,6 +170,7 @@ impl<'a, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a> {
                                &context.doc_alloc,
                            )?;
                            let new_rendered = prompt.render_document(
+                                update.external_document_id(),
                                update.merged(
                                    &context.rtxn,
                                    context.index,
@ -216,6 +220,7 @@ impl<'a, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a> {
                                );
                            } else if new_vectors.regenerate {
                                let rendered = prompt.render_document(
+                                    insertion.external_document_id(),
                                    insertion.inserted(),
                                    context.new_fields_ids_map,
                                    &context.doc_alloc,
@ -229,6 +234,7 @@ impl<'a, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a> {
                            }
                        } else {
                            let rendered = prompt.render_document(
+                                insertion.external_document_id(),
                                insertion.inserted(),
                                context.new_fields_ids_map,
                                &context.doc_alloc,
@ -259,7 +265,7 @@ impl<'a, 'extractor> Extractor<'extractor> for EmbeddingExtractor<'a> {
 // Currently this is the case as:
 // 1. BVec are inside of the bumaplo
 // 2. All other fields are either trivial (u8) or references.
-struct Chunks<'a, 'extractor> {
+struct Chunks<'a, 'b, 'extractor> {
    texts: BVec<'a, &'a str>,
    ids: BVec<'a, DocumentId>,

@ -270,11 +276,11 @@ struct Chunks<'a, 'extractor> {
    possible_embedding_mistakes: &'a PossibleEmbeddingMistakes,
    user_provided: &'a RefCell<EmbeddingExtractorData<'extractor>>,
    threads: &'a ThreadPoolNoAbort,
-    sender: &'a EmbeddingSender<'a>,
+    sender: EmbeddingSender<'a, 'b>,
    has_manual_generation: Option<&'a str>,
 }

-impl<'a, 'extractor> Chunks<'a, 'extractor> {
+impl<'a, 'b, 'extractor> Chunks<'a, 'b, 'extractor> {
    #[allow(clippy::too_many_arguments)]
    pub fn new(
        embedder: &'a Embedder,
@ -284,7 +290,7 @@ impl<'a, 'extractor> Chunks<'a, 'extractor> {
        user_provided: &'a RefCell<EmbeddingExtractorData<'extractor>>,
        possible_embedding_mistakes: &'a PossibleEmbeddingMistakes,
        threads: &'a ThreadPoolNoAbort,
-        sender: &'a EmbeddingSender<'a>,
+        sender: EmbeddingSender<'a, 'b>,
        doc_alloc: &'a Bump,
    ) -> Self {
        let capacity = embedder.prompt_count_in_chunk_hint() * embedder.chunk_count_hint();
@ -368,7 +374,7 @@ impl<'a, 'extractor> Chunks<'a, 'extractor> {
        possible_embedding_mistakes: &PossibleEmbeddingMistakes,
        unused_vectors_distribution: &UnusedVectorsDistributionBump,
        threads: &ThreadPoolNoAbort,
-        sender: &EmbeddingSender<'a>,
+        sender: EmbeddingSender<'a, 'b>,
        has_manual_generation: Option<&'a str>,
    ) -> Result<()> {
        if let Some(external_docid) = has_manual_generation {
--- a/crates/milli/src/update/new/facet_search_builder.rs
+++ b/crates/milli/src/update/new/facet_search_builder.rs
@ -103,6 +103,8 @@ impl<'indexer> FacetSearchBuilder<'indexer> {

    #[tracing::instrument(level = "trace", skip_all, target = "indexing::facet_fst")]
    pub fn merge_and_write(self, index: &Index, wtxn: &mut RwTxn, rtxn: &RoTxn) -> Result<()> {
+        tracing::trace!("merge facet strings for facet search: {:?}", self.registered_facets);
+
        let reader = self.normalized_facet_string_docids_sorter.into_reader_cursors()?;
        let mut builder = grenad::MergerBuilder::new(MergeDeladdBtreesetString);
        builder.extend(reader);
@ -118,12 +120,15 @@ impl<'indexer> FacetSearchBuilder<'indexer> {
                BEU16StrCodec::bytes_decode(key).map_err(heed::Error::Encoding)?;

            if current_field_id != Some(field_id) {
-                if let Some(fst_merger_builder) = fst_merger_builder {
+                if let (Some(current_field_id), Some(fst_merger_builder)) =
+                    (current_field_id, fst_merger_builder)
+                {
                    let mmap = fst_merger_builder.build(&mut callback)?;
-                    index
-                        .facet_id_string_fst
-                        .remap_data_type::<Bytes>()
-                        .put(wtxn, &field_id, &mmap)?;
+                    index.facet_id_string_fst.remap_data_type::<Bytes>().put(
+                        wtxn,
+                        &current_field_id,
+                        &mmap,
+                    )?;
                }

                fst = index.facet_id_string_fst.get(rtxn, &field_id)?;
--- a/crates/milli/src/update/new/indexer/de.rs
+++ b/crates/milli/src/update/new/indexer/de.rs
@ -1,6 +1,8 @@
 use std::ops::ControlFlow;

 use bumpalo::Bump;
+use bumparaw_collections::RawVec;
+use rustc_hash::FxBuildHasher;
 use serde::de::{DeserializeSeed, Deserializer as _, Visitor};
 use serde_json::value::RawValue;

@ -360,7 +362,7 @@ impl<'a> DeserrRawValue<'a> {
 }

 pub struct DeserrRawVec<'a> {
-    vec: raw_collections::RawVec<'a>,
+    vec: RawVec<'a>,
    alloc: &'a Bump,
 }

@ -379,7 +381,7 @@ impl<'a> deserr::Sequence for DeserrRawVec<'a> {
 }

 pub struct DeserrRawVecIter<'a> {
-    it: raw_collections::vec::iter::IntoIter<'a>,
+    it: bumparaw_collections::vec::iter::IntoIter<'a>,
    alloc: &'a Bump,
 }

@ -393,7 +395,7 @@ impl<'a> Iterator for DeserrRawVecIter<'a> {
 }

 pub struct DeserrRawMap<'a> {
-    map: raw_collections::RawMap<'a>,
+    map: bumparaw_collections::RawMap<'a, FxBuildHasher>,
    alloc: &'a Bump,
 }

@ -416,7 +418,7 @@ impl<'a> deserr::Map for DeserrRawMap<'a> {
 }

 pub struct DeserrRawMapIter<'a> {
-    it: raw_collections::map::iter::IntoIter<'a>,
+    it: bumparaw_collections::map::iter::IntoIter<'a>,
    alloc: &'a Bump,
 }

@ -615,7 +617,7 @@ impl<'de> Visitor<'de> for DeserrRawValueVisitor<'de> {
    where
        A: serde::de::SeqAccess<'de>,
    {
-        let mut raw_vec = raw_collections::RawVec::new_in(self.alloc);
+        let mut raw_vec = RawVec::new_in(self.alloc);
        while let Some(next) = seq.next_element()? {
            raw_vec.push(next);
        }
--- a/crates/milli/src/update/new/indexer/document_changes.rs
+++ b/crates/milli/src/update/new/indexer/document_changes.rs
@ -1,4 +1,5 @@
 use std::cell::{Cell, RefCell};
+use std::sync::atomic::Ordering;
 use std::sync::{Arc, RwLock};

 use bumpalo::Bump;
@ -7,8 +8,9 @@ use rayon::iter::IndexedParallelIterator;

 use super::super::document_change::DocumentChange;
 use crate::fields_ids_map::metadata::FieldIdMapWithMetadata;
+use crate::progress::{AtomicDocumentStep, Progress};
 use crate::update::new::parallel_iterator_ext::ParallelIteratorExt as _;
-use crate::update::new::steps::Step;
+use crate::update::new::steps::IndexingStep;
 use crate::update::new::thread_local::{FullySend, MostlySend, ThreadLocal};
 use crate::{FieldsIdsMap, GlobalFieldsIdsMap, Index, InternalError, Result};

@ -70,7 +72,7 @@ impl<
        F: FnOnce(&'extractor Bump) -> Result<T>,
    {
        let doc_alloc =
-            doc_allocs.get_or(|| FullySend(Cell::new(Bump::with_capacity(1024 * 1024 * 1024))));
+            doc_allocs.get_or(|| FullySend(Cell::new(Bump::with_capacity(1024 * 1024))));
        let doc_alloc = doc_alloc.0.take();
        let fields_ids_map = fields_ids_map_store
            .get_or(|| RefCell::new(GlobalFieldsIdsMap::new(new_fields_ids_map)).into());
@ -133,10 +135,8 @@ pub struct IndexingContext<
    'indexer, // covariant lifetime of objects that are borrowed  during the entire indexing operation
    'index,   // covariant lifetime of the index
    MSP,
-    SP,
 > where
    MSP: Fn() -> bool + Sync,
-    SP: Fn(Progress) + Sync,
 {
    pub index: &'index Index,
    pub db_fields_ids_map: &'indexer FieldsIdsMap,
@ -144,7 +144,7 @@ pub struct IndexingContext<
    pub doc_allocs: &'indexer ThreadLocal<FullySend<Cell<Bump>>>,
    pub fields_ids_map_store: &'indexer ThreadLocal<FullySend<RefCell<GlobalFieldsIdsMap<'fid>>>>,
    pub must_stop_processing: &'indexer MSP,
-    pub send_progress: &'indexer SP,
+    pub progress: &'indexer Progress,
 }

 impl<
@ -152,18 +152,15 @@ impl<
        'indexer, // covariant lifetime of objects that are borrowed  during the entire indexing operation
        'index,   // covariant lifetime of the index
        MSP,
-        SP,
    > Copy
    for IndexingContext<
        'fid,     // invariant lifetime of fields ids map
        'indexer, // covariant lifetime of objects that are borrowed  during the entire indexing operation
        'index,   // covariant lifetime of the index
        MSP,
-        SP,
    >
 where
    MSP: Fn() -> bool + Sync,
-    SP: Fn(Progress) + Sync,
 {
 }

@ -172,18 +169,15 @@ impl<
        'indexer, // covariant lifetime of objects that are borrowed  during the entire indexing operation
        'index,   // covariant lifetime of the index
        MSP,
-        SP,
    > Clone
    for IndexingContext<
        'fid,     // invariant lifetime of fields ids map
        'indexer, // covariant lifetime of objects that are borrowed  during the entire indexing operation
        'index,   // covariant lifetime of the index
        MSP,
-        SP,
    >
 where
    MSP: Fn() -> bool + Sync,
-    SP: Fn(Progress) + Sync,
 {
    fn clone(&self) -> Self {
        *self
@ -202,7 +196,6 @@ pub fn extract<
    EX,
    DC: DocumentChanges<'pl>,
    MSP,
-    SP,
 >(
    document_changes: &DC,
    extractor: &EX,
@ -213,18 +206,18 @@ pub fn extract<
        doc_allocs,
        fields_ids_map_store,
        must_stop_processing,
-        send_progress,
-    }: IndexingContext<'fid, 'indexer, 'index, MSP, SP>,
+        progress,
+    }: IndexingContext<'fid, 'indexer, 'index, MSP>,
    extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
    datastore: &'data ThreadLocal<EX::Data>,
-    step: Step,
+    step: IndexingStep,
 ) -> Result<()>
 where
    EX: Extractor<'extractor>,
    MSP: Fn() -> bool + Sync,
-    SP: Fn(Progress) + Sync,
 {
    tracing::trace!("We are resetting the extractor allocators");
+    progress.update_progress(step);
    // Clean up and reuse the extractor allocs
    for extractor_alloc in extractor_allocs.iter_mut() {
        tracing::trace!("\tWith {} bytes reset", extractor_alloc.0.allocated_bytes());
@ -232,9 +225,11 @@ where
    }

    let total_documents = document_changes.len() as u32;
+    let (step, progress_step) = AtomicDocumentStep::new(total_documents);
+    progress.update_progress(progress_step);

    let pi = document_changes.iter(CHUNK_SIZE);
-    pi.enumerate().try_arc_for_each_try_init(
+    pi.try_arc_for_each_try_init(
        || {
            DocumentChangeContext::new(
                index,
@ -247,13 +242,10 @@ where
                move |index_alloc| extractor.init_data(index_alloc),
            )
        },
-        |context, (finished_documents, items)| {
+        |context, items| {
            if (must_stop_processing)() {
                return Err(Arc::new(InternalError::AbortedIndexation.into()));
            }
-            let finished_documents = (finished_documents * CHUNK_SIZE) as u32;
-
-            (send_progress)(Progress::from_step_substep(step, finished_documents, total_documents));

            // Clean up and reuse the document-specific allocator
            context.doc_alloc.reset();
@ -264,6 +256,7 @@ where
            });

            let res = extractor.process(changes, context).map_err(Arc::new);
+            step.fetch_add(items.as_ref().len() as u32, Ordering::Relaxed);

            // send back the doc_alloc in the pool
            context.doc_allocs.get_or_default().0.set(std::mem::take(&mut context.doc_alloc));
@ -271,32 +264,7 @@ where
            res
        },
    )?;
-
-    (send_progress)(Progress::from_step_substep(step, total_documents, total_documents));
+    step.store(total_documents, Ordering::Relaxed);

    Ok(())
 }
-
-pub struct Progress {
-    pub finished_steps: u16,
-    pub total_steps: u16,
-    pub step_name: &'static str,
-    pub finished_total_substep: Option<(u32, u32)>,
-}
-
-impl Progress {
-    pub fn from_step(step: Step) -> Self {
-        Self {
-            finished_steps: step.finished_steps(),
-            total_steps: Step::total_steps(),
-            step_name: step.name(),
-            finished_total_substep: None,
-        }
-    }
-    pub fn from_step_substep(step: Step, finished_substep: u32, total_substep: u32) -> Self {
-        Self {
-            finished_total_substep: Some((finished_substep, total_substep)),
-            ..Progress::from_step(step)
-        }
-    }
-}
--- a/crates/milli/src/update/new/indexer/document_deletion.rs
+++ b/crates/milli/src/update/new/indexer/document_deletion.rs
@ -92,11 +92,12 @@ mod test {

    use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
    use crate::index::tests::TempIndex;
+    use crate::progress::Progress;
    use crate::update::new::indexer::document_changes::{
        extract, DocumentChangeContext, Extractor, IndexingContext,
    };
    use crate::update::new::indexer::DocumentDeletion;
-    use crate::update::new::steps::Step;
+    use crate::update::new::steps::IndexingStep;
    use crate::update::new::thread_local::{MostlySend, ThreadLocal};
    use crate::update::new::DocumentChange;
    use crate::DocumentId;
@ -164,7 +165,7 @@ mod test {
            doc_allocs: &doc_allocs,
            fields_ids_map_store: &fields_ids_map_store,
            must_stop_processing: &(|| false),
-            send_progress: &(|_progress| {}),
+            progress: &Progress::default(),
        };

        for _ in 0..3 {
@ -176,7 +177,7 @@ mod test {
                context,
                &mut extractor_allocs,
                &datastore,
-                Step::ExtractingDocuments,
+                IndexingStep::ExtractingDocuments,
            )
            .unwrap();

--- a/crates/milli/src/update/new/indexer/document_operation.rs
+++ b/crates/milli/src/update/new/indexer/document_operation.rs
@ -1,19 +1,23 @@
+use std::sync::atomic::Ordering;
+
 use bumpalo::collections::CollectIn;
 use bumpalo::Bump;
+use bumparaw_collections::RawMap;
 use hashbrown::hash_map::Entry;
 use heed::RoTxn;
 use memmap2::Mmap;
-use raw_collections::RawMap;
 use rayon::slice::ParallelSlice;
+use rustc_hash::FxBuildHasher;
 use serde_json::value::RawValue;
 use serde_json::Deserializer;

 use super::super::document_change::DocumentChange;
-use super::document_changes::{DocumentChangeContext, DocumentChanges, Progress};
+use super::document_changes::{DocumentChangeContext, DocumentChanges};
 use super::retrieve_or_guess_primary_key;
 use crate::documents::PrimaryKey;
+use crate::progress::{AtomicPayloadStep, Progress};
 use crate::update::new::document::Versions;
-use crate::update::new::steps::Step;
+use crate::update::new::steps::IndexingStep;
 use crate::update::new::thread_local::MostlySend;
 use crate::update::new::{Deletion, Insertion, Update};
 use crate::update::{AvailableIds, IndexDocumentsMethod};
@ -44,7 +48,7 @@ impl<'pl> DocumentOperation<'pl> {

    #[allow(clippy::too_many_arguments)]
    #[tracing::instrument(level = "trace", skip_all, target = "indexing::document_operation")]
-    pub fn into_changes<MSP, SP>(
+    pub fn into_changes<MSP>(
        self,
        indexer: &'pl Bump,
        index: &Index,
@ -52,12 +56,12 @@ impl<'pl> DocumentOperation<'pl> {
        primary_key_from_op: Option<&'pl str>,
        new_fields_ids_map: &mut FieldsIdsMap,
        must_stop_processing: &MSP,
-        send_progress: &SP,
+        progress: Progress,
    ) -> Result<(DocumentOperationChanges<'pl>, Vec<PayloadStats>, Option<PrimaryKey<'pl>>)>
    where
        MSP: Fn() -> bool,
-        SP: Fn(Progress),
    {
+        progress.update_progress(IndexingStep::PreparingPayloads);
        let Self { operations, method } = self;

        let documents_ids = index.documents_ids(rtxn)?;
@ -67,16 +71,14 @@ impl<'pl> DocumentOperation<'pl> {
        let mut primary_key = None;

        let payload_count = operations.len();
+        let (step, progress_step) = AtomicPayloadStep::new(payload_count as u32);
+        progress.update_progress(progress_step);

        for (payload_index, operation) in operations.into_iter().enumerate() {
            if must_stop_processing() {
                return Err(InternalError::AbortedIndexation.into());
            }
-            send_progress(Progress::from_step_substep(
-                Step::PreparingPayloads,
-                payload_index as u32,
-                payload_count as u32,
-            ));
+            step.store(payload_index as u32, Ordering::Relaxed);

            let mut bytes = 0;
            let result = match operation {
@ -117,12 +119,7 @@ impl<'pl> DocumentOperation<'pl> {
            };
            operations_stats.push(PayloadStats { document_count, bytes, error });
        }
-
-        send_progress(Progress::from_step_substep(
-            Step::PreparingPayloads,
-            payload_count as u32,
-            payload_count as u32,
-        ));
+        step.store(payload_count as u32, Ordering::Relaxed);

        // TODO We must drain the HashMap into a Vec because rayon::hash_map::IntoIter: !Clone
        let mut docids_version_offsets: bumpalo::collections::vec::Vec<_> =
@ -166,8 +163,9 @@ fn extract_addition_payload_changes<'r, 'pl: 'r>(

        // Only guess the primary key if it is the first document
        let retrieved_primary_key = if previous_offset == 0 {
-            let doc =
-                RawMap::from_raw_value(doc, indexer).map(Some).map_err(UserError::SerdeJson)?;
+            let doc = RawMap::from_raw_value_and_hasher(doc, FxBuildHasher, indexer)
+                .map(Some)
+                .map_err(UserError::SerdeJson)?;

            let result = retrieve_or_guess_primary_key(
                rtxn,
@ -545,8 +543,9 @@ impl MergeChanges for MergeDocumentForReplacement {
        match operations.last() {
            Some(InnerDocOp::Addition(DocumentOffset { content })) => {
                let document = serde_json::from_slice(content).unwrap();
-                let document = raw_collections::RawMap::from_raw_value(document, doc_alloc)
-                    .map_err(UserError::SerdeJson)?;
+                let document =
+                    RawMap::from_raw_value_and_hasher(document, FxBuildHasher, doc_alloc)
+                        .map_err(UserError::SerdeJson)?;

                if is_new {
                    Ok(Some(DocumentChange::Insertion(Insertion::create(
@ -632,8 +631,9 @@ impl MergeChanges for MergeDocumentForUpdates {
                    }
                };
                let document = serde_json::from_slice(content).unwrap();
-                let document = raw_collections::RawMap::from_raw_value(document, doc_alloc)
-                    .map_err(UserError::SerdeJson)?;
+                let document =
+                    RawMap::from_raw_value_and_hasher(document, FxBuildHasher, doc_alloc)
+                        .map_err(UserError::SerdeJson)?;

                Some(Versions::single(document))
            }
@ -647,8 +647,9 @@ impl MergeChanges for MergeDocumentForUpdates {
                    };

                    let document = serde_json::from_slice(content).unwrap();
-                    let document = raw_collections::RawMap::from_raw_value(document, doc_alloc)
-                        .map_err(UserError::SerdeJson)?;
+                    let document =
+                        RawMap::from_raw_value_and_hasher(document, FxBuildHasher, doc_alloc)
+                            .map_err(UserError::SerdeJson)?;
                    Ok(document)
                });
                Versions::multiple(versions)?
--- a/crates/milli/src/update/new/indexer/mod.rs
+++ b/crates/milli/src/update/new/indexer/mod.rs
@ -1,9 +1,11 @@
 use std::cmp::Ordering;
+use std::sync::atomic::AtomicBool;
 use std::sync::{OnceLock, RwLock};
 use std::thread::{self, Builder};

 use big_s::S;
-use document_changes::{extract, DocumentChanges, IndexingContext, Progress};
+use bumparaw_collections::RawMap;
+use document_changes::{extract, DocumentChanges, IndexingContext};
 pub use document_deletion::DocumentDeletion;
 pub use document_operation::{DocumentOperation, PayloadStats};
 use hashbrown::HashMap;
@ -12,7 +14,7 @@ use heed::{RoTxn, RwTxn};
 use itertools::{merge_join_by, EitherOrBoth};
 pub use partial_dump::PartialDump;
 use rand::SeedableRng as _;
-use raw_collections::RawMap;
+use rustc_hash::FxBuildHasher;
 use time::OffsetDateTime;
 pub use update_by_function::UpdateByFunction;

@ -20,7 +22,7 @@ use super::channel::*;
 use super::extract::*;
 use super::facet_search_builder::FacetSearchBuilder;
 use super::merger::FacetFieldIdsDelta;
-use super::steps::Step;
+use super::steps::IndexingStep;
 use super::thread_local::ThreadLocal;
 use super::word_fst_builder::{PrefixData, PrefixDelta, WordFstBuilder};
 use super::words_prefix_docids::{
@ -31,6 +33,7 @@ use crate::documents::{PrimaryKey, DEFAULT_PRIMARY_KEY};
 use crate::facet::FacetType;
 use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
 use crate::index::main_key::{WORDS_FST_KEY, WORDS_PREFIXES_FST_KEY};
+use crate::progress::Progress;
 use crate::proximity::ProximityPrecision;
 use crate::update::del_add::DelAdd;
 use crate::update::new::extract::EmbeddingExtractor;
@ -41,7 +44,7 @@ use crate::update::settings::InnerIndexSettings;
 use crate::update::{FacetsUpdateBulk, GrenadParameters};
 use crate::vector::{ArroyWrapper, EmbeddingConfigs, Embeddings};
 use crate::{
-    FieldsIdsMap, GlobalFieldsIdsMap, Index, InternalError, Result, ThreadPoolNoAbort,
+    Error, FieldsIdsMap, GlobalFieldsIdsMap, Index, InternalError, Result, ThreadPoolNoAbort,
    ThreadPoolNoAbortBuilder, UserError,
 };

@ -58,9 +61,10 @@ mod update_by_function;
 ///
 /// TODO return stats
 #[allow(clippy::too_many_arguments)] // clippy: 😝
-pub fn index<'pl, 'indexer, 'index, DC, MSP, SP>(
+pub fn index<'pl, 'indexer, 'index, DC, MSP>(
    wtxn: &mut RwTxn,
    index: &'index Index,
+    pool: &ThreadPoolNoAbort,
    grenad_parameters: GrenadParameters,
    db_fields_ids_map: &'indexer FieldsIdsMap,
    new_fields_ids_map: FieldsIdsMap,
@ -68,14 +72,44 @@ pub fn index<'pl, 'indexer, 'index, DC, MSP, SP>(
    document_changes: &DC,
    embedders: EmbeddingConfigs,
    must_stop_processing: &'indexer MSP,
-    send_progress: &'indexer SP,
+    progress: &'indexer Progress,
 ) -> Result<()>
 where
    DC: DocumentChanges<'pl>,
    MSP: Fn() -> bool + Sync,
-    SP: Fn(Progress) + Sync,
 {
-    let (extractor_sender, writer_receiver) = extractor_writer_channel(10_000);
+    let mut bbbuffers = Vec::new();
+    let finished_extraction = AtomicBool::new(false);
+
+    // We reduce the actual memory used to 5%. The reason we do this here and not in Meilisearch
+    // is because we still use the old indexer for the settings and it is highly impacted by the
+    // max memory. So we keep the changes here and will remove these changes once we use the new
+    // indexer to also index settings. Related to #5125 and #5141.
+    let grenad_parameters = GrenadParameters {
+        max_memory: grenad_parameters.max_memory.map(|mm| mm * 5 / 100),
+        ..grenad_parameters
+    };
+
+    // We compute and remove the allocated BBQueues buffers capacity from the indexing memory.
+    let minimum_capacity = 50 * 1024 * 1024 * pool.current_num_threads(); // 50 MiB
+    let (grenad_parameters, total_bbbuffer_capacity) = grenad_parameters.max_memory.map_or(
+        (grenad_parameters, 2 * minimum_capacity), // 100 MiB by thread by default
+        |max_memory| {
+            // 2% of the indexing memory
+            let total_bbbuffer_capacity = (max_memory / 100 / 2).max(minimum_capacity);
+            let new_grenad_parameters = GrenadParameters {
+                max_memory: Some(
+                    max_memory.saturating_sub(total_bbbuffer_capacity).max(100 * 1024 * 1024),
+                ),
+                ..grenad_parameters
+            };
+            (new_grenad_parameters, total_bbbuffer_capacity)
+        },
+    );
+
+    let (extractor_sender, mut writer_receiver) = pool
+        .install(|| extractor_writer_bbqueue(&mut bbbuffers, total_bbbuffer_capacity, 1000))
+        .unwrap();

    let metadata_builder = MetadataBuilder::from_index(index, wtxn)?;
    let new_fields_ids_map = FieldIdMapWithMetadata::new(new_fields_ids_map, metadata_builder);
@ -91,244 +125,274 @@ where
        doc_allocs: &doc_allocs,
        fields_ids_map_store: &fields_ids_map_store,
        must_stop_processing,
-        send_progress,
+        progress,
    };

+    let mut index_embeddings = index.embedding_configs(wtxn)?;
    let mut field_distribution = index.field_distribution(wtxn)?;
    let mut document_ids = index.documents_ids(wtxn)?;

    thread::scope(|s| -> Result<()> {
        let indexer_span = tracing::Span::current();
        let embedders = &embedders;
+        let finished_extraction = &finished_extraction;
        // prevent moving the field_distribution and document_ids in the inner closure...
        let field_distribution = &mut field_distribution;
        let document_ids = &mut document_ids;
        let extractor_handle = Builder::new().name(S("indexer-extractors")).spawn_scoped(s, move || {
-            let span = tracing::trace_span!(target: "indexing::documents", parent: &indexer_span, "extract");
-            let _entered = span.enter();
-
-            let rtxn = index.read_txn()?;
-
-            // document but we need to create a function that collects and compresses documents.
-            let document_sender = extractor_sender.documents();
-            let document_extractor = DocumentsExtractor::new(&document_sender, embedders);
-            let datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
-
-            extract(document_changes,
-                &document_extractor,
-                indexing_context,
-                &mut extractor_allocs,
-                &datastore,
-                Step::ExtractingDocuments,
-            )?;
-
-            for document_extractor_data in datastore {
-                let document_extractor_data = document_extractor_data.0.into_inner();
-                for (field, delta) in document_extractor_data.field_distribution_delta {
-                    let current = field_distribution.entry(field).or_default();
-                    // adding the delta should never cause a negative result, as we are removing fields that previously existed.
-                    *current = current.saturating_add_signed(delta);
-                }
-                document_extractor_data.docids_delta.apply_to(document_ids);
-            }
-
-            field_distribution.retain(|_, v| *v != 0);
-
-            let facet_field_ids_delta;
-
-            {
-                let span = tracing::trace_span!(target: "indexing::documents::extract", "faceted");
+            pool.install(move || {
+                let span = tracing::trace_span!(target: "indexing::documents", parent: &indexer_span, "extract");
                let _entered = span.enter();

-                facet_field_ids_delta = merge_and_send_facet_docids(
-                    FacetedDocidsExtractor::run_extraction(
-                        grenad_parameters,
+                let rtxn = index.read_txn()?;
+
+                // document but we need to create a function that collects and compresses documents.
+                let document_sender = extractor_sender.documents();
+                let document_extractor = DocumentsExtractor::new(document_sender, embedders);
+                let datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
+                {
+                    let span = tracing::trace_span!(target: "indexing::documents::extract", parent: &indexer_span, "documents");
+                    let _entered = span.enter();
+                    extract(
                        document_changes,
+                        &document_extractor,
                        indexing_context,
                        &mut extractor_allocs,
-                        &extractor_sender.field_id_docid_facet_sender(),
-                        Step::ExtractingFacets
-                    )?,
-                    FacetDatabases::new(index),
-                    index,
-                    extractor_sender.facet_docids(),
-                )?;
-            }
-
-            {
-                let span = tracing::trace_span!(target: "indexing::documents::extract", "word_docids");
-                let _entered = span.enter();
-
-
-                let WordDocidsCaches {
-                    word_docids,
-                    word_fid_docids,
-                    exact_word_docids,
-                    word_position_docids,
-                    fid_word_count_docids,
-                } = WordDocidsExtractors::run_extraction(
-                    grenad_parameters,
-                    document_changes,
-                    indexing_context,
-                    &mut extractor_allocs,
-                    Step::ExtractingWords
-                )?;
-
-                // TODO Word Docids Merger
-                {
-                    let span = tracing::trace_span!(target: "indexing::documents::merge", "word_docids");
-                    let _entered = span.enter();
-                    merge_and_send_docids(
-                        word_docids,
-                        index.word_docids.remap_types(),
-                        index,
-                        extractor_sender.docids::<WordDocids>(),
-                        &indexing_context.must_stop_processing,
+                        &datastore,
+                        IndexingStep::ExtractingDocuments,
                    )?;
                }
-
-                // Word Fid Docids Merging
                {
-                    let span = tracing::trace_span!(target: "indexing::documents::merge", "word_fid_docids");
+                    let span = tracing::trace_span!(target: "indexing::documents::merge", parent: &indexer_span, "documents");
                    let _entered = span.enter();
-                    merge_and_send_docids(
-                        word_fid_docids,
-                        index.word_fid_docids.remap_types(),
-                        index,
-                        extractor_sender.docids::<WordFidDocids>(),
-                        &indexing_context.must_stop_processing,
-                    )?;
+                    for document_extractor_data in datastore {
+                        let document_extractor_data = document_extractor_data.0.into_inner();
+                        for (field, delta) in document_extractor_data.field_distribution_delta {
+                            let current = field_distribution.entry(field).or_default();
+                            // adding the delta should never cause a negative result, as we are removing fields that previously existed.
+                            *current = current.saturating_add_signed(delta);
+                        }
+                        document_extractor_data.docids_delta.apply_to(document_ids);
+                    }
+
+                    field_distribution.retain(|_, v| *v != 0);
                }

-                // Exact Word Docids Merging
+                let facet_field_ids_delta;
+
                {
-                    let span = tracing::trace_span!(target: "indexing::documents::merge", "exact_word_docids");
-                    let _entered = span.enter();
-                    merge_and_send_docids(
-                        exact_word_docids,
-                        index.exact_word_docids.remap_types(),
-                        index,
-                        extractor_sender.docids::<ExactWordDocids>(),
-                        &indexing_context.must_stop_processing,
-                    )?;
-                }
+                    let caches = {
+                        let span = tracing::trace_span!(target: "indexing::documents::extract", parent: &indexer_span, "faceted");
+                        let _entered = span.enter();

-                // Word Position Docids Merging
-                {
-                    let span = tracing::trace_span!(target: "indexing::documents::merge", "word_position_docids");
-                    let _entered = span.enter();
-                    merge_and_send_docids(
-                        word_position_docids,
-                        index.word_position_docids.remap_types(),
-                        index,
-                        extractor_sender.docids::<WordPositionDocids>(),
-                        &indexing_context.must_stop_processing,
-                    )?;
-                }
+                        FacetedDocidsExtractor::run_extraction(
+                                grenad_parameters,
+                                document_changes,
+                                indexing_context,
+                                &mut extractor_allocs,
+                                &extractor_sender.field_id_docid_facet_sender(),
+                                IndexingStep::ExtractingFacets
+                            )?
+                    };

-                // Fid Word Count Docids Merging
-                {
-                    let span = tracing::trace_span!(target: "indexing::documents::merge", "fid_word_count_docids");
-                    let _entered = span.enter();
-                    merge_and_send_docids(
-                        fid_word_count_docids,
-                        index.field_id_word_count_docids.remap_types(),
-                        index,
-                        extractor_sender.docids::<FidWordCountDocids>(),
-                        &indexing_context.must_stop_processing,
-                    )?;
-                }
-            }
+                    {
+                        let span = tracing::trace_span!(target: "indexing::documents::merge", parent: &indexer_span, "faceted");
+                        let _entered = span.enter();

-            // run the proximity extraction only if the precision is by word
-            // this works only if the settings didn't change during this transaction.
-            let proximity_precision = index.proximity_precision(&rtxn)?.unwrap_or_default();
-            if proximity_precision == ProximityPrecision::ByWord {
-                let span = tracing::trace_span!(target: "indexing::documents::extract", "word_pair_proximity_docids");
-                let _entered = span.enter();
-
-
-                let caches = <WordPairProximityDocidsExtractor as DocidsExtractor>::run_extraction(
-                    grenad_parameters,
-                    document_changes,
-                    indexing_context,
-                    &mut extractor_allocs,
-                    Step::ExtractingWordProximity,
-                )?;
-
-                merge_and_send_docids(
-                    caches,
-                    index.word_pair_proximity_docids.remap_types(),
-                    index,
-                    extractor_sender.docids::<WordPairProximityDocids>(),
-                    &indexing_context.must_stop_processing,
-                )?;
-            }
-
-            'vectors: {
-                let span = tracing::trace_span!(target: "indexing::documents::extract", "vectors");
-                let _entered = span.enter();
-
-                let mut index_embeddings = index.embedding_configs(&rtxn)?;
-                if index_embeddings.is_empty() {
-                    break 'vectors;
-                }
-
-                let embedding_sender = extractor_sender.embeddings();
-                let extractor = EmbeddingExtractor::new(embedders, &embedding_sender, field_distribution, request_threads());
-                let mut datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
-                extract(document_changes, &extractor, indexing_context, &mut extractor_allocs, &datastore, Step::ExtractingEmbeddings)?;
-
-                for config in &mut index_embeddings {
-                    'data: for data in datastore.iter_mut() {
-                        let data = &mut data.get_mut().0;
-                        let Some(deladd) = data.remove(&config.name) else { continue 'data; };
-                        deladd.apply_to(&mut config.user_provided);
+                        facet_field_ids_delta = merge_and_send_facet_docids(
+                            caches,
+                            FacetDatabases::new(index),
+                            index,
+                            extractor_sender.facet_docids(),
+                        )?;
                    }
                }

-                embedding_sender.finish(index_embeddings).unwrap();
-            }
+                {
+                    let WordDocidsCaches {
+                        word_docids,
+                        word_fid_docids,
+                        exact_word_docids,
+                        word_position_docids,
+                        fid_word_count_docids,
+                    } = {
+                        let span = tracing::trace_span!(target: "indexing::documents::extract", "word_docids");
+                        let _entered = span.enter();

-            'geo: {
-                let span = tracing::trace_span!(target: "indexing::documents::extract", "geo");
-                let _entered = span.enter();
+                        WordDocidsExtractors::run_extraction(
+                            grenad_parameters,
+                            document_changes,
+                            indexing_context,
+                            &mut extractor_allocs,
+                            IndexingStep::ExtractingWords
+                        )?
+                    };

-                let Some(extractor) = GeoExtractor::new(&rtxn, index, grenad_parameters)? else {
-                    break 'geo;
-                };
-                let datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
-                extract(
-                    document_changes,
-                    &extractor,
-                    indexing_context,
-                    &mut extractor_allocs,
-                    &datastore,
-                    Step::WritingGeoPoints
-                )?;
+                    {
+                        let span = tracing::trace_span!(target: "indexing::documents::merge", "word_docids");
+                        let _entered = span.enter();
+                        merge_and_send_docids(
+                            word_docids,
+                            index.word_docids.remap_types(),
+                            index,
+                            extractor_sender.docids::<WordDocids>(),
+                            &indexing_context.must_stop_processing,
+                        )?;
+                    }

-                merge_and_send_rtree(
-                    datastore,
-                    &rtxn,
-                    index,
-                    extractor_sender.geo(),
-                    &indexing_context.must_stop_processing,
-                )?;
-            }
+                    {
+                        let span = tracing::trace_span!(target: "indexing::documents::merge", "word_fid_docids");
+                        let _entered = span.enter();
+                        merge_and_send_docids(
+                            word_fid_docids,
+                            index.word_fid_docids.remap_types(),
+                            index,
+                            extractor_sender.docids::<WordFidDocids>(),
+                            &indexing_context.must_stop_processing,
+                        )?;
+                    }

-            {
-                let span = tracing::trace_span!(target: "indexing::documents::extract", "FINISH");
-                let _entered = span.enter();
-                (indexing_context.send_progress)(Progress::from_step(Step::WritingToDatabase));
-            }
+                    {
+                        let span = tracing::trace_span!(target: "indexing::documents::merge", "exact_word_docids");
+                        let _entered = span.enter();
+                        merge_and_send_docids(
+                            exact_word_docids,
+                            index.exact_word_docids.remap_types(),
+                            index,
+                            extractor_sender.docids::<ExactWordDocids>(),
+                            &indexing_context.must_stop_processing,
+                        )?;
+                    }

-            Result::Ok(facet_field_ids_delta)
+                    {
+                        let span = tracing::trace_span!(target: "indexing::documents::merge", "word_position_docids");
+                        let _entered = span.enter();
+                        merge_and_send_docids(
+                            word_position_docids,
+                            index.word_position_docids.remap_types(),
+                            index,
+                            extractor_sender.docids::<WordPositionDocids>(),
+                            &indexing_context.must_stop_processing,
+                        )?;
+                    }
+
+                    {
+                        let span = tracing::trace_span!(target: "indexing::documents::merge", "fid_word_count_docids");
+                        let _entered = span.enter();
+                        merge_and_send_docids(
+                            fid_word_count_docids,
+                            index.field_id_word_count_docids.remap_types(),
+                            index,
+                            extractor_sender.docids::<FidWordCountDocids>(),
+                            &indexing_context.must_stop_processing,
+                        )?;
+                    }
+                }
+
+                // run the proximity extraction only if the precision is by word
+                // this works only if the settings didn't change during this transaction.
+                let proximity_precision = index.proximity_precision(&rtxn)?.unwrap_or_default();
+                if proximity_precision == ProximityPrecision::ByWord {
+                    let caches = {
+                        let span = tracing::trace_span!(target: "indexing::documents::extract", "word_pair_proximity_docids");
+                        let _entered = span.enter();
+
+                        <WordPairProximityDocidsExtractor as DocidsExtractor>::run_extraction(
+                            grenad_parameters,
+                            document_changes,
+                            indexing_context,
+                            &mut extractor_allocs,
+                            IndexingStep::ExtractingWordProximity,
+                        )?
+                    };
+
+                    {
+                        let span = tracing::trace_span!(target: "indexing::documents::merge", "word_pair_proximity_docids");
+                        let _entered = span.enter();
+
+                        merge_and_send_docids(
+                            caches,
+                            index.word_pair_proximity_docids.remap_types(),
+                            index,
+                            extractor_sender.docids::<WordPairProximityDocids>(),
+                            &indexing_context.must_stop_processing,
+                        )?;
+                    }
+                }
+
+                'vectors: {
+                    if index_embeddings.is_empty() {
+                        break 'vectors;
+                    }
+
+                    let embedding_sender = extractor_sender.embeddings();
+                    let extractor = EmbeddingExtractor::new(embedders, embedding_sender, field_distribution, request_threads());
+                    let mut datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
+                    {
+                        let span = tracing::trace_span!(target: "indexing::documents::extract", "vectors");
+                        let _entered = span.enter();
+
+                        extract(
+                            document_changes,
+                            &extractor,
+                            indexing_context,
+                            &mut extractor_allocs,
+                            &datastore,
+                            IndexingStep::ExtractingEmbeddings,
+                        )?;
+                    }
+                    {
+                        let span = tracing::trace_span!(target: "indexing::documents::merge", "vectors");
+                        let _entered = span.enter();
+
+                        for config in &mut index_embeddings {
+                            'data: for data in datastore.iter_mut() {
+                                let data = &mut data.get_mut().0;
+                                let Some(deladd) = data.remove(&config.name) else { continue 'data; };
+                                deladd.apply_to(&mut config.user_provided);
+                            }
+                        }
+                    }
+                }
+
+                'geo: {
+                    let Some(extractor) = GeoExtractor::new(&rtxn, index, grenad_parameters)? else {
+                        break 'geo;
+                    };
+                    let datastore = ThreadLocal::with_capacity(rayon::current_num_threads());
+
+                    {
+                        let span = tracing::trace_span!(target: "indexing::documents::extract", "geo");
+                        let _entered = span.enter();
+
+                        extract(
+                            document_changes,
+                            &extractor,
+                            indexing_context,
+                            &mut extractor_allocs,
+                            &datastore,
+                            IndexingStep::WritingGeoPoints
+                        )?;
+                    }
+
+                    merge_and_send_rtree(
+                        datastore,
+                        &rtxn,
+                        index,
+                        extractor_sender.geo(),
+                        &indexing_context.must_stop_processing,
+                    )?;
+                }
+                indexing_context.progress.update_progress(IndexingStep::WritingToDatabase);
+                finished_extraction.store(true, std::sync::atomic::Ordering::Relaxed);
+
+                Result::Ok((facet_field_ids_delta, index_embeddings))
+            }).unwrap()
        })?;

        let global_fields_ids_map = GlobalFieldsIdsMap::new(&new_fields_ids_map);

        let vector_arroy = index.vector_arroy;
-        let mut rng = rand::rngs::StdRng::seed_from_u64(42);
        let indexer_span = tracing::Span::current();
        let arroy_writers: Result<HashMap<_, _>> = embedders
            .inner_as_ref()
@ -351,94 +415,116 @@ where
            })
            .collect();

+        // Used by by the ArroySetVector to copy the embedding into an
+        // aligned memory area, required by arroy to accept a new vector.
+        let mut aligned_embedding = Vec::new();
        let mut arroy_writers = arroy_writers?;
-        for operation in writer_receiver {
-            match operation {
-                WriterOperation::DbOperation(db_operation) => {
-                    let database = db_operation.database(index);
-                    match db_operation.entry() {
-                        EntryOperation::Delete(e) => {
-                            if !database.delete(wtxn, e.entry())? {
-                                unreachable!("We tried to delete an unknown key")
-                            }
-                        }
-                        EntryOperation::Write(e) => database.put(wtxn, e.key(), e.value())?,
-                    }
+
+        {
+            let span = tracing::trace_span!(target: "indexing::write_db", "all");
+            let _entered = span.enter();
+
+            let span = tracing::trace_span!(target: "indexing::write_db", "post_merge");
+            let mut _entered_post_merge = None;
+
+            while let Some(action) = writer_receiver.recv_action() {
+                if _entered_post_merge.is_none()
+                    && finished_extraction.load(std::sync::atomic::Ordering::Relaxed)
+                {
+                    _entered_post_merge = Some(span.enter());
                }
-                WriterOperation::ArroyOperation(arroy_operation) => match arroy_operation {
-                    ArroyOperation::DeleteVectors { docid } => {
-                        for (_embedder_index, (_embedder_name, _embedder, writer, dimensions)) in
-                            &mut arroy_writers
-                        {
-                            let dimensions = *dimensions;
-                            writer.del_items(wtxn, dimensions, docid)?;
+
+                match action {
+                    ReceiverAction::WakeUp => (),
+                    ReceiverAction::LargeEntry(LargeEntry { database, key, value }) => {
+                        let database_name = database.database_name();
+                        let database = database.database(index);
+                        if let Err(error) = database.put(wtxn, &key, &value) {
+                            return Err(Error::InternalError(InternalError::StorePut {
+                                database_name,
+                                key: bstr::BString::from(&key[..]),
+                                value_length: value.len(),
+                                error,
+                            }));
                        }
                    }
-                    ArroyOperation::SetVectors {
-                        docid,
-                        embedder_id,
-                        embeddings: raw_embeddings,
-                    } => {
+                    ReceiverAction::LargeVectors(large_vectors) => {
+                        let LargeVectors { docid, embedder_id, .. } = large_vectors;
                        let (_, _, writer, dimensions) =
                            arroy_writers.get(&embedder_id).expect("requested a missing embedder");
-                        // TODO: switch to Embeddings
                        let mut embeddings = Embeddings::new(*dimensions);
-                        for embedding in raw_embeddings {
-                            embeddings.append(embedding).unwrap();
+                        for embedding in large_vectors.read_embeddings(*dimensions) {
+                            embeddings.push(embedding.to_vec()).unwrap();
                        }
-
                        writer.del_items(wtxn, *dimensions, docid)?;
                        writer.add_items(wtxn, docid, &embeddings)?;
                    }
-                    ArroyOperation::SetVector { docid, embedder_id, embedding } => {
-                        let (_, _, writer, dimensions) =
-                            arroy_writers.get(&embedder_id).expect("requested a missing embedder");
-                        writer.del_items(wtxn, *dimensions, docid)?;
-                        writer.add_item(wtxn, docid, &embedding)?;
-                    }
-                    ArroyOperation::Finish { configs } => {
-                        let span = tracing::trace_span!(target: "indexing::vectors", parent: &indexer_span, "build");
-                        let _entered = span.enter();
+                }

-                        (indexing_context.send_progress)(Progress::from_step(
-                            Step::WritingEmbeddingsToDatabase,
-                        ));
-
-                        for (_embedder_index, (_embedder_name, _embedder, writer, dimensions)) in
-                            &mut arroy_writers
-                        {
-                            let dimensions = *dimensions;
-                            writer.build_and_quantize(
-                                wtxn,
-                                &mut rng,
-                                dimensions,
-                                false,
-                                &indexing_context.must_stop_processing,
-                            )?;
-                        }
-
-                        index.put_embedding_configs(wtxn, configs)?;
-                    }
-                },
+                // Every time the is a message in the channel we search
+                // for new entries in the BBQueue buffers.
+                write_from_bbqueue(
+                    &mut writer_receiver,
+                    index,
+                    wtxn,
+                    &arroy_writers,
+                    &mut aligned_embedding,
+                )?;
            }
+
+            // Once the extractor/writer channel is closed
+            // we must process the remaining BBQueue messages.
+            write_from_bbqueue(
+                &mut writer_receiver,
+                index,
+                wtxn,
+                &arroy_writers,
+                &mut aligned_embedding,
+            )?;
        }

-        (indexing_context.send_progress)(Progress::from_step(Step::WaitingForExtractors));
+        indexing_context.progress.update_progress(IndexingStep::WaitingForExtractors);

-        let facet_field_ids_delta = extractor_handle.join().unwrap()?;
+        let (facet_field_ids_delta, index_embeddings) = extractor_handle.join().unwrap()?;

-        (indexing_context.send_progress)(Progress::from_step(Step::PostProcessingFacets));
+        'vectors: {
+            let span =
+                tracing::trace_span!(target: "indexing::vectors", parent: &indexer_span, "build");
+            let _entered = span.enter();
+
+            if index_embeddings.is_empty() {
+                break 'vectors;
+            }
+
+            indexing_context.progress.update_progress(IndexingStep::WritingEmbeddingsToDatabase);
+            let mut rng = rand::rngs::StdRng::seed_from_u64(42);
+            for (_index, (_embedder_name, _embedder, writer, dimensions)) in &mut arroy_writers {
+                let dimensions = *dimensions;
+                writer.build_and_quantize(
+                    wtxn,
+                    &mut rng,
+                    dimensions,
+                    false,
+                    &indexing_context.must_stop_processing,
+                )?;
+            }
+
+            index.put_embedding_configs(wtxn, index_embeddings)?;
+        }
+
+        indexing_context.progress.update_progress(IndexingStep::PostProcessingFacets);
+        if index.facet_search(wtxn)? {
+            compute_facet_search_database(index, wtxn, global_fields_ids_map)?;
+        }

-        compute_facet_search_database(index, wtxn, global_fields_ids_map)?;
        compute_facet_level_database(index, wtxn, facet_field_ids_delta)?;

-        (indexing_context.send_progress)(Progress::from_step(Step::PostProcessingWords));
-
+        indexing_context.progress.update_progress(IndexingStep::PostProcessingWords);
        if let Some(prefix_delta) = compute_word_fst(index, wtxn)? {
            compute_prefix_database(index, wtxn, prefix_delta, grenad_parameters)?;
        }

-        (indexing_context.send_progress)(Progress::from_step(Step::Finalizing));
+        indexing_context.progress.update_progress(IndexingStep::Finalizing);

        Ok(()) as Result<_>
    })?;
@ -464,6 +550,72 @@ where
    Ok(())
 }

+/// A function dedicated to manage all the available BBQueue frames.
+///
+/// It reads all the available frames, do the corresponding database operations
+/// and stops when no frame are available.
+fn write_from_bbqueue(
+    writer_receiver: &mut WriterBbqueueReceiver<'_>,
+    index: &Index,
+    wtxn: &mut RwTxn<'_>,
+    arroy_writers: &HashMap<u8, (&str, &crate::vector::Embedder, ArroyWrapper, usize)>,
+    aligned_embedding: &mut Vec<f32>,
+) -> crate::Result<()> {
+    while let Some(frame_with_header) = writer_receiver.recv_frame() {
+        match frame_with_header.header() {
+            EntryHeader::DbOperation(operation) => {
+                let database_name = operation.database.database_name();
+                let database = operation.database.database(index);
+                let frame = frame_with_header.frame();
+                match operation.key_value(frame) {
+                    (key, Some(value)) => {
+                        if let Err(error) = database.put(wtxn, key, value) {
+                            return Err(Error::InternalError(InternalError::StorePut {
+                                database_name,
+                                key: key.into(),
+                                value_length: value.len(),
+                                error,
+                            }));
+                        }
+                    }
+                    (key, None) => match database.delete(wtxn, key) {
+                        Ok(false) => {
+                            unreachable!("We tried to delete an unknown key: {key:?}")
+                        }
+                        Ok(_) => (),
+                        Err(error) => {
+                            return Err(Error::InternalError(InternalError::StoreDeletion {
+                                database_name,
+                                key: key.into(),
+                                error,
+                            }));
+                        }
+                    },
+                }
+            }
+            EntryHeader::ArroyDeleteVector(ArroyDeleteVector { docid }) => {
+                for (_index, (_name, _embedder, writer, dimensions)) in arroy_writers {
+                    let dimensions = *dimensions;
+                    writer.del_items(wtxn, dimensions, docid)?;
+                }
+            }
+            EntryHeader::ArroySetVectors(asvs) => {
+                let ArroySetVectors { docid, embedder_id, .. } = asvs;
+                let frame = frame_with_header.frame();
+                let (_, _, writer, dimensions) =
+                    arroy_writers.get(&embedder_id).expect("requested a missing embedder");
+                let mut embeddings = Embeddings::new(*dimensions);
+                let all_embeddings = asvs.read_all_embeddings_into_vec(frame, aligned_embedding);
+                embeddings.append(all_embeddings.to_vec()).unwrap();
+                writer.del_items(wtxn, *dimensions, docid)?;
+                writer.add_items(wtxn, docid, &embeddings)?;
+            }
+        }
+    }
+
+    Ok(())
+}
+
 #[tracing::instrument(level = "trace", skip_all, target = "indexing::prefix")]
 fn compute_prefix_database(
    index: &Index,
@ -618,7 +770,7 @@ pub fn retrieve_or_guess_primary_key<'a>(
    index: &Index,
    new_fields_ids_map: &mut FieldsIdsMap,
    primary_key_from_op: Option<&'a str>,
-    first_document: Option<RawMap<'a>>,
+    first_document: Option<RawMap<'a, FxBuildHasher>>,
 ) -> Result<StdResult<(PrimaryKey<'a>, bool), UserError>> {
    // make sure that we have a declared primary key, either fetching it from the index or attempting to guess it.

--- a/crates/milli/src/update/new/indexer/partial_dump.rs
+++ b/crates/milli/src/update/new/indexer/partial_dump.rs
@ -1,6 +1,8 @@
 use std::ops::DerefMut;

+use bumparaw_collections::RawMap;
 use rayon::iter::IndexedParallelIterator;
+use rustc_hash::FxBuildHasher;
 use serde_json::value::RawValue;

 use super::document_changes::{DocumentChangeContext, DocumentChanges};
@ -75,7 +77,7 @@ where
            self.primary_key.extract_fields_and_docid(document, fields_ids_map, doc_alloc)?;
        let external_document_id = external_document_id.to_de();

-        let document = raw_collections::RawMap::from_raw_value(document, doc_alloc)
+        let document = RawMap::from_raw_value_and_hasher(document, FxBuildHasher, doc_alloc)
            .map_err(InternalError::SerdeJson)?;

        let insertion = Insertion::create(docid, external_document_id, Versions::single(document));
--- a/crates/milli/src/update/new/indexer/update_by_function.rs
+++ b/crates/milli/src/update/new/indexer/update_by_function.rs
@ -1,8 +1,9 @@
-use raw_collections::RawMap;
+use bumparaw_collections::RawMap;
 use rayon::iter::IndexedParallelIterator;
 use rayon::slice::ParallelSlice as _;
 use rhai::{Dynamic, Engine, OptimizationLevel, Scope, AST};
 use roaring::RoaringBitmap;
+use rustc_hash::FxBuildHasher;

 use super::document_changes::DocumentChangeContext;
 use super::DocumentChanges;
@ -160,8 +161,12 @@ impl<'index> DocumentChanges<'index> for UpdateByFunctionChanges<'index> {
                        if document_id != new_document_id {
                            Err(Error::UserError(UserError::DocumentEditionCannotModifyPrimaryKey))
                        } else {
-                            let raw_new_doc = RawMap::from_raw_value(raw_new_doc, doc_alloc)
-                                .map_err(InternalError::SerdeJson)?;
+                            let raw_new_doc = RawMap::from_raw_value_and_hasher(
+                                raw_new_doc,
+                                FxBuildHasher,
+                                doc_alloc,
+                            )
+                            .map_err(InternalError::SerdeJson)?;

                            Ok(Some(DocumentChange::Update(Update::create(
                                docid,
--- a/crates/milli/src/update/new/merger.rs
+++ b/crates/milli/src/update/new/merger.rs
@ -9,8 +9,8 @@ use roaring::RoaringBitmap;

 use super::channel::*;
 use super::extract::{
-    merge_caches, transpose_and_freeze_caches, BalancedCaches, DelAddRoaringBitmap, FacetKind,
-    GeoExtractorData,
+    merge_caches_sorted, transpose_and_freeze_caches, BalancedCaches, DelAddRoaringBitmap,
+    FacetKind, GeoExtractorData,
 };
 use crate::{CboRoaringBitmapCodec, FieldId, GeoPoint, Index, InternalError, Result};

@ -19,7 +19,7 @@ pub fn merge_and_send_rtree<'extractor, MSP>(
    datastore: impl IntoIterator<Item = RefCell<GeoExtractorData<'extractor>>>,
    rtxn: &RoTxn,
    index: &Index,
-    geo_sender: GeoSender<'_>,
+    geo_sender: GeoSender<'_, '_>,
    must_stop_processing: &MSP,
 ) -> Result<()>
 where
@ -34,7 +34,7 @@ where
        }

        let mut frozen = data.into_inner().freeze()?;
-        for result in frozen.iter_and_clear_removed() {
+        for result in frozen.iter_and_clear_removed()? {
            let extracted_geo_point = result?;
            let removed = rtree.remove(&GeoPoint::from(extracted_geo_point));
            debug_assert!(removed.is_some());
@ -42,7 +42,7 @@ where
            debug_assert!(removed);
        }

-        for result in frozen.iter_and_clear_inserted() {
+        for result in frozen.iter_and_clear_inserted()? {
            let extracted_geo_point = result?;
            rtree.insert(GeoPoint::from(extracted_geo_point));
            let inserted = faceted.insert(extracted_geo_point.docid);
@ -56,38 +56,37 @@ where

    let rtree_mmap = unsafe { Mmap::map(&file)? };
    geo_sender.set_rtree(rtree_mmap).unwrap();
-    geo_sender.set_geo_faceted(&faceted).unwrap();
+    geo_sender.set_geo_faceted(&faceted)?;

    Ok(())
 }

 #[tracing::instrument(level = "trace", skip_all, target = "indexing::merge")]
-pub fn merge_and_send_docids<'extractor, MSP>(
+pub fn merge_and_send_docids<'extractor, MSP, D>(
    mut caches: Vec<BalancedCaches<'extractor>>,
    database: Database<Bytes, Bytes>,
    index: &Index,
-    docids_sender: impl DocidsSender + Sync,
+    docids_sender: WordDocidsSender<D>,
    must_stop_processing: &MSP,
 ) -> Result<()>
 where
    MSP: Fn() -> bool + Sync,
+    D: DatabaseType + Sync,
 {
    transpose_and_freeze_caches(&mut caches)?.into_par_iter().try_for_each(|frozen| {
        let rtxn = index.read_txn()?;
-        let mut buffer = Vec::new();
        if must_stop_processing() {
            return Err(InternalError::AbortedIndexation.into());
        }
-        merge_caches(frozen, |key, DelAddRoaringBitmap { del, add }| {
+        merge_caches_sorted(frozen, |key, DelAddRoaringBitmap { del, add }| {
            let current = database.get(&rtxn, key)?;
            match merge_cbo_bitmaps(current, del, add)? {
                Operation::Write(bitmap) => {
-                    let value = cbo_bitmap_serialize_into_vec(&bitmap, &mut buffer);
-                    docids_sender.write(key, value).unwrap();
+                    docids_sender.write(key, &bitmap)?;
                    Ok(())
                }
                Operation::Delete => {
-                    docids_sender.delete(key).unwrap();
+                    docids_sender.delete(key)?;
                    Ok(())
                }
                Operation::Ignore => Ok(()),
@ -101,26 +100,24 @@ pub fn merge_and_send_facet_docids<'extractor>(
    mut caches: Vec<BalancedCaches<'extractor>>,
    database: FacetDatabases,
    index: &Index,
-    docids_sender: impl DocidsSender + Sync,
+    docids_sender: FacetDocidsSender,
 ) -> Result<FacetFieldIdsDelta> {
    transpose_and_freeze_caches(&mut caches)?
        .into_par_iter()
        .map(|frozen| {
            let mut facet_field_ids_delta = FacetFieldIdsDelta::default();
            let rtxn = index.read_txn()?;
-            let mut buffer = Vec::new();
-            merge_caches(frozen, |key, DelAddRoaringBitmap { del, add }| {
+            merge_caches_sorted(frozen, |key, DelAddRoaringBitmap { del, add }| {
                let current = database.get_cbo_roaring_bytes_value(&rtxn, key)?;
                match merge_cbo_bitmaps(current, del, add)? {
                    Operation::Write(bitmap) => {
                        facet_field_ids_delta.register_from_key(key);
-                        let value = cbo_bitmap_serialize_into_vec(&bitmap, &mut buffer);
-                        docids_sender.write(key, value).unwrap();
+                        docids_sender.write(key, &bitmap)?;
                        Ok(())
                    }
                    Operation::Delete => {
                        facet_field_ids_delta.register_from_key(key);
-                        docids_sender.delete(key).unwrap();
+                        docids_sender.delete(key)?;
                        Ok(())
                    }
                    Operation::Ignore => Ok(()),
@ -238,8 +235,12 @@ fn merge_cbo_bitmaps(
        (Some(_current), None, None) => Ok(Operation::Ignore), // but it's strange
        (Some(current), None, Some(add)) => Ok(Operation::Write(current | add)),
        (Some(current), Some(del), add) => {
+            debug_assert!(
+                del.is_subset(&current),
+                "del is not a subset of current, which must be impossible."
+            );
            let output = match add {
-                Some(add) => (&current - del) | add,
+                Some(add) => (&current - (&del - &add)) | (add - del),
                None => &current - del,
            };
            if output.is_empty() {
@ -252,10 +253,3 @@ fn merge_cbo_bitmaps(
        }
    }
 }
-
-/// TODO Return the slice directly from the serialize_into method
-fn cbo_bitmap_serialize_into_vec<'b>(bitmap: &RoaringBitmap, buffer: &'b mut Vec<u8>) -> &'b [u8] {
-    buffer.clear();
-    CboRoaringBitmapCodec::serialize_into(bitmap, buffer);
-    buffer.as_slice()
-}
--- a/crates/milli/src/update/new/ref_cell_ext.rs
+++ b/crates/milli/src/update/new/ref_cell_ext.rs
@ -5,6 +5,7 @@ pub trait RefCellExt<T: ?Sized> {
        &self,
    ) -> std::result::Result<RefMut<'_, T>, std::cell::BorrowMutError>;

+    #[track_caller]
    fn borrow_mut_or_yield(&self) -> RefMut<'_, T> {
        self.try_borrow_mut_or_yield().unwrap()
    }
--- a/crates/milli/src/update/new/steps.rs
+++ b/crates/milli/src/update/new/steps.rs
@ -1,8 +1,12 @@
+use std::borrow::Cow;
+
 use enum_iterator::Sequence;

+use crate::progress::Step;
+
 #[derive(Debug, Clone, Copy, PartialEq, Eq, Sequence)]
-#[repr(u16)]
-pub enum Step {
+#[repr(u8)]
+pub enum IndexingStep {
    PreparingPayloads,
    ExtractingDocuments,
    ExtractingFacets,
@ -11,37 +15,38 @@ pub enum Step {
    ExtractingEmbeddings,
    WritingGeoPoints,
    WritingToDatabase,
-    WritingEmbeddingsToDatabase,
    WaitingForExtractors,
+    WritingEmbeddingsToDatabase,
    PostProcessingFacets,
    PostProcessingWords,
    Finalizing,
 }

-impl Step {
-    pub fn name(&self) -> &'static str {
+impl Step for IndexingStep {
+    fn name(&self) -> Cow<'static, str> {
        match self {
-            Step::PreparingPayloads => "preparing update file",
-            Step::ExtractingDocuments => "extracting documents",
-            Step::ExtractingFacets => "extracting facets",
-            Step::ExtractingWords => "extracting words",
-            Step::ExtractingWordProximity => "extracting word proximity",
-            Step::ExtractingEmbeddings => "extracting embeddings",
-            Step::WritingGeoPoints => "writing geo points",
-            Step::WritingToDatabase => "writing to database",
-            Step::WritingEmbeddingsToDatabase => "writing embeddings to database",
-            Step::WaitingForExtractors => "waiting for extractors",
-            Step::PostProcessingFacets => "post-processing facets",
-            Step::PostProcessingWords => "post-processing words",
-            Step::Finalizing => "finalizing",
+            IndexingStep::PreparingPayloads => "preparing update file",
+            IndexingStep::ExtractingDocuments => "extracting documents",
+            IndexingStep::ExtractingFacets => "extracting facets",
+            IndexingStep::ExtractingWords => "extracting words",
+            IndexingStep::ExtractingWordProximity => "extracting word proximity",
+            IndexingStep::ExtractingEmbeddings => "extracting embeddings",
+            IndexingStep::WritingGeoPoints => "writing geo points",
+            IndexingStep::WritingToDatabase => "writing to database",
+            IndexingStep::WaitingForExtractors => "waiting for extractors",
+            IndexingStep::WritingEmbeddingsToDatabase => "writing embeddings to database",
+            IndexingStep::PostProcessingFacets => "post-processing facets",
+            IndexingStep::PostProcessingWords => "post-processing words",
+            IndexingStep::Finalizing => "finalizing",
        }
+        .into()
    }

-    pub fn finished_steps(self) -> u16 {
-        self as u16
+    fn current(&self) -> u32 {
+        *self as u32
    }

-    pub const fn total_steps() -> u16 {
-        Self::CARDINALITY as u16
+    fn total(&self) -> u32 {
+        Self::CARDINALITY as u32
    }
 }
--- a/crates/milli/src/update/new/vector_document.rs
+++ b/crates/milli/src/update/new/vector_document.rs
@ -1,9 +1,10 @@
 use std::collections::BTreeSet;

 use bumpalo::Bump;
+use bumparaw_collections::RawMap;
 use deserr::{Deserr, IntoValue};
 use heed::RoTxn;
-use raw_collections::RawMap;
+use rustc_hash::FxBuildHasher;
 use serde::Serialize;
 use serde_json::value::RawValue;

@ -84,7 +85,7 @@ pub struct VectorDocumentFromDb<'t> {
    docid: DocumentId,
    embedding_config: Vec<IndexEmbeddingConfig>,
    index: &'t Index,
-    vectors_field: Option<RawMap<'t>>,
+    vectors_field: Option<RawMap<'t, FxBuildHasher>>,
    rtxn: &'t RoTxn<'t>,
    doc_alloc: &'t Bump,
 }
@ -102,9 +103,10 @@ impl<'t> VectorDocumentFromDb<'t> {
        };
        let vectors = document.vectors_field()?;
        let vectors_field = match vectors {
-            Some(vectors) => {
-                Some(RawMap::from_raw_value(vectors, doc_alloc).map_err(InternalError::SerdeJson)?)
-            }
+            Some(vectors) => Some(
+                RawMap::from_raw_value_and_hasher(vectors, FxBuildHasher, doc_alloc)
+                    .map_err(InternalError::SerdeJson)?,
+            ),
            None => None,
        };

@ -220,7 +222,7 @@ fn entry_from_raw_value(

 pub struct VectorDocumentFromVersions<'doc> {
    external_document_id: &'doc str,
-    vectors: RawMap<'doc>,
+    vectors: RawMap<'doc, FxBuildHasher>,
    embedders: &'doc EmbeddingConfigs,
 }

@ -233,8 +235,8 @@ impl<'doc> VectorDocumentFromVersions<'doc> {
    ) -> Result<Option<Self>> {
        let document = DocumentFromVersions::new(versions);
        if let Some(vectors_field) = document.vectors_field()? {
-            let vectors =
-                RawMap::from_raw_value(vectors_field, bump).map_err(UserError::SerdeJson)?;
+            let vectors = RawMap::from_raw_value_and_hasher(vectors_field, FxBuildHasher, bump)
+                .map_err(UserError::SerdeJson)?;
            Ok(Some(Self { external_document_id, vectors, embedders }))
        } else {
            Ok(None)
--- a/crates/milli/src/update/new/word_fst_builder.rs
+++ b/crates/milli/src/update/new/word_fst_builder.rs
@ -1,4 +1,4 @@
-use std::collections::HashSet;
+use std::collections::BTreeSet;
 use std::io::BufWriter;

 use fst::{Set, SetBuilder, Streamer};
@ -75,18 +75,18 @@ pub struct PrefixData {

 #[derive(Debug)]
 pub struct PrefixDelta {
-    pub modified: HashSet<Prefix>,
-    pub deleted: HashSet<Prefix>,
+    pub modified: BTreeSet<Prefix>,
+    pub deleted: BTreeSet<Prefix>,
 }

 struct PrefixFstBuilder {
-    prefix_count_threshold: u64,
+    prefix_count_threshold: usize,
    max_prefix_length: usize,
    /// TODO: Replace the full memory allocation
    prefix_fst_builders: Vec<SetBuilder<Vec<u8>>>,
    current_prefix: Vec<Prefix>,
-    current_prefix_count: Vec<u64>,
-    modified_prefixes: HashSet<Prefix>,
+    current_prefix_count: Vec<usize>,
+    modified_prefixes: BTreeSet<Prefix>,
    current_prefix_is_modified: Vec<bool>,
 }

@ -95,7 +95,7 @@ impl PrefixFstBuilder {
        let PrefixSettings { prefix_count_threshold, max_prefix_length, compute_prefixes } =
            prefix_settings;

-        if !compute_prefixes {
+        if compute_prefixes != crate::index::PrefixSearch::IndexingTime {
            return None;
        }

@ -110,7 +110,7 @@ impl PrefixFstBuilder {
            prefix_fst_builders,
            current_prefix: vec![Prefix::new(); max_prefix_length],
            current_prefix_count: vec![0; max_prefix_length],
-            modified_prefixes: HashSet::new(),
+            modified_prefixes: BTreeSet::new(),
            current_prefix_is_modified: vec![false; max_prefix_length],
        })
    }
@ -180,7 +180,7 @@ impl PrefixFstBuilder {
        let prefix_fst_mmap = unsafe { Mmap::map(&prefix_fst_file)? };
        let new_prefix_fst = Set::new(&prefix_fst_mmap)?;
        let old_prefix_fst = index.words_prefixes_fst(rtxn)?;
-        let mut deleted_prefixes = HashSet::new();
+        let mut deleted_prefixes = BTreeSet::new();
        {
            let mut deleted_prefixes_stream = old_prefix_fst.op().add(&new_prefix_fst).difference();
            while let Some(prefix) = deleted_prefixes_stream.next() {
--- a/crates/milli/src/update/new/words_prefix_docids.rs
+++ b/crates/milli/src/update/new/words_prefix_docids.rs
@ -1,5 +1,5 @@
 use std::cell::RefCell;
-use std::collections::HashSet;
+use std::collections::BTreeSet;
 use std::io::{BufReader, BufWriter, Read, Seek, Write};

 use hashbrown::HashMap;
@ -37,8 +37,8 @@ impl WordPrefixDocids {
    fn execute(
        self,
        wtxn: &mut heed::RwTxn,
-        prefix_to_compute: &HashSet<Prefix>,
-        prefix_to_delete: &HashSet<Prefix>,
+        prefix_to_compute: &BTreeSet<Prefix>,
+        prefix_to_delete: &BTreeSet<Prefix>,
    ) -> Result<()> {
        delete_prefixes(wtxn, &self.prefix_database, prefix_to_delete)?;
        self.recompute_modified_prefixes(wtxn, prefix_to_compute)
@ -48,7 +48,7 @@ impl WordPrefixDocids {
    fn recompute_modified_prefixes(
        &self,
        wtxn: &mut RwTxn,
-        prefixes: &HashSet<Prefix>,
+        prefixes: &BTreeSet<Prefix>,
    ) -> Result<()> {
        // We fetch the docids associated to the newly added word prefix fst only.
        // And collect the CboRoaringBitmaps pointers in an HashMap.
@ -76,7 +76,7 @@ impl WordPrefixDocids {
                .union()?;

            buffer.clear();
-            CboRoaringBitmapCodec::serialize_into(&output, buffer);
+            CboRoaringBitmapCodec::serialize_into_vec(&output, buffer);
            index.push(PrefixEntry { prefix, serialized_length: buffer.len() });
            file.write_all(buffer)
        })?;
@ -127,7 +127,7 @@ impl<'a, 'rtxn> FrozenPrefixBitmaps<'a, 'rtxn> {
    pub fn from_prefixes(
        database: Database<Bytes, CboRoaringBitmapCodec>,
        rtxn: &'rtxn RoTxn,
-        prefixes: &'a HashSet<Prefix>,
+        prefixes: &'a BTreeSet<Prefix>,
    ) -> heed::Result<Self> {
        let database = database.remap_data_type::<Bytes>();

@ -173,8 +173,8 @@ impl WordPrefixIntegerDocids {
    fn execute(
        self,
        wtxn: &mut heed::RwTxn,
-        prefix_to_compute: &HashSet<Prefix>,
-        prefix_to_delete: &HashSet<Prefix>,
+        prefix_to_compute: &BTreeSet<Prefix>,
+        prefix_to_delete: &BTreeSet<Prefix>,
    ) -> Result<()> {
        delete_prefixes(wtxn, &self.prefix_database, prefix_to_delete)?;
        self.recompute_modified_prefixes(wtxn, prefix_to_compute)
@ -184,7 +184,7 @@ impl WordPrefixIntegerDocids {
    fn recompute_modified_prefixes(
        &self,
        wtxn: &mut RwTxn,
-        prefixes: &HashSet<Prefix>,
+        prefixes: &BTreeSet<Prefix>,
    ) -> Result<()> {
        // We fetch the docids associated to the newly added word prefix fst only.
        // And collect the CboRoaringBitmaps pointers in an HashMap.
@ -211,7 +211,7 @@ impl WordPrefixIntegerDocids {
                    .union()?;

                buffer.clear();
-                CboRoaringBitmapCodec::serialize_into(&output, buffer);
+                CboRoaringBitmapCodec::serialize_into_vec(&output, buffer);
                index.push(PrefixIntegerEntry { prefix, pos, serialized_length: buffer.len() });
                file.write_all(buffer)?;
            }
@ -262,7 +262,7 @@ impl<'a, 'rtxn> FrozenPrefixIntegerBitmaps<'a, 'rtxn> {
    pub fn from_prefixes(
        database: Database<Bytes, CboRoaringBitmapCodec>,
        rtxn: &'rtxn RoTxn,
-        prefixes: &'a HashSet<Prefix>,
+        prefixes: &'a BTreeSet<Prefix>,
    ) -> heed::Result<Self> {
        let database = database.remap_data_type::<Bytes>();

@ -291,7 +291,7 @@ unsafe impl<'a, 'rtxn> Sync for FrozenPrefixIntegerBitmaps<'a, 'rtxn> {}
 fn delete_prefixes(
    wtxn: &mut RwTxn,
    prefix_database: &Database<Bytes, CboRoaringBitmapCodec>,
-    prefixes: &HashSet<Prefix>,
+    prefixes: &BTreeSet<Prefix>,
 ) -> Result<()> {
    // We remove all the entries that are no more required in this word prefix docids database.
    for prefix in prefixes {
@ -309,8 +309,8 @@ fn delete_prefixes(
 pub fn compute_word_prefix_docids(
    wtxn: &mut RwTxn,
    index: &Index,
-    prefix_to_compute: &HashSet<Prefix>,
-    prefix_to_delete: &HashSet<Prefix>,
+    prefix_to_compute: &BTreeSet<Prefix>,
+    prefix_to_delete: &BTreeSet<Prefix>,
    grenad_parameters: GrenadParameters,
 ) -> Result<()> {
    WordPrefixDocids::new(
@ -325,8 +325,8 @@ pub fn compute_word_prefix_docids(
 pub fn compute_exact_word_prefix_docids(
    wtxn: &mut RwTxn,
    index: &Index,
-    prefix_to_compute: &HashSet<Prefix>,
-    prefix_to_delete: &HashSet<Prefix>,
+    prefix_to_compute: &BTreeSet<Prefix>,
+    prefix_to_delete: &BTreeSet<Prefix>,
    grenad_parameters: GrenadParameters,
 ) -> Result<()> {
    WordPrefixDocids::new(
@ -341,8 +341,8 @@ pub fn compute_exact_word_prefix_docids(
 pub fn compute_word_prefix_fid_docids(
    wtxn: &mut RwTxn,
    index: &Index,
-    prefix_to_compute: &HashSet<Prefix>,
-    prefix_to_delete: &HashSet<Prefix>,
+    prefix_to_compute: &BTreeSet<Prefix>,
+    prefix_to_delete: &BTreeSet<Prefix>,
    grenad_parameters: GrenadParameters,
 ) -> Result<()> {
    WordPrefixIntegerDocids::new(
@ -357,8 +357,8 @@ pub fn compute_word_prefix_fid_docids(
 pub fn compute_word_prefix_position_docids(
    wtxn: &mut RwTxn,
    index: &Index,
-    prefix_to_compute: &HashSet<Prefix>,
-    prefix_to_delete: &HashSet<Prefix>,
+    prefix_to_compute: &BTreeSet<Prefix>,
+    prefix_to_delete: &BTreeSet<Prefix>,
    grenad_parameters: GrenadParameters,
 ) -> Result<()> {
    WordPrefixIntegerDocids::new(
--- a/crates/milli/src/update/settings.rs
+++ b/crates/milli/src/update/settings.rs
@ -17,7 +17,8 @@ use super::IndexerConfig;
 use crate::criterion::Criterion;
 use crate::error::UserError;
 use crate::index::{
-    IndexEmbeddingConfig, DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS,
+    IndexEmbeddingConfig, PrefixSearch, DEFAULT_MIN_WORD_LEN_ONE_TYPO,
+    DEFAULT_MIN_WORD_LEN_TWO_TYPOS,
 };
 use crate::order_by_map::OrderByMap;
 use crate::prompt::default_max_bytes;
@ -177,6 +178,8 @@ pub struct Settings<'a, 't, 'i> {
    embedder_settings: Setting<BTreeMap<String, Setting<EmbeddingSettings>>>,
    search_cutoff: Setting<u64>,
    localized_attributes_rules: Setting<Vec<LocalizedAttributesRule>>,
+    prefix_search: Setting<PrefixSearch>,
+    facet_search: Setting<bool>,
 }

 impl<'a, 't, 'i> Settings<'a, 't, 'i> {
@ -212,6 +215,8 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
            embedder_settings: Setting::NotSet,
            search_cutoff: Setting::NotSet,
            localized_attributes_rules: Setting::NotSet,
+            prefix_search: Setting::NotSet,
+            facet_search: Setting::NotSet,
            indexer_config,
        }
    }
@ -418,6 +423,22 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
        self.localized_attributes_rules = Setting::Reset;
    }

+    pub fn set_prefix_search(&mut self, value: PrefixSearch) {
+        self.prefix_search = Setting::Set(value);
+    }
+
+    pub fn reset_prefix_search(&mut self) {
+        self.prefix_search = Setting::Reset;
+    }
+
+    pub fn set_facet_search(&mut self, value: bool) {
+        self.facet_search = Setting::Set(value);
+    }
+
+    pub fn reset_facet_search(&mut self) {
+        self.facet_search = Setting::Reset;
+    }
+
    #[tracing::instrument(
        level = "trace"
        skip(self, progress_callback, should_abort, settings_diff),
@ -944,7 +965,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
                    false
                } else {
                    self.index.put_proximity_precision(self.wtxn, new)?;
-                    true
+                    old.is_some() || new != ProximityPrecision::default()
                }
            }
            Setting::Reset => self.index.delete_proximity_precision(self.wtxn)?,
@ -954,6 +975,42 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
        Ok(changed)
    }

+    fn update_prefix_search(&mut self) -> Result<bool> {
+        let changed = match self.prefix_search {
+            Setting::Set(new) => {
+                let old = self.index.prefix_search(self.wtxn)?;
+                if old == Some(new) {
+                    false
+                } else {
+                    self.index.put_prefix_search(self.wtxn, new)?;
+                    old.is_some() || new != PrefixSearch::default()
+                }
+            }
+            Setting::Reset => self.index.delete_prefix_search(self.wtxn)?,
+            Setting::NotSet => false,
+        };
+
+        Ok(changed)
+    }
+
+    fn update_facet_search(&mut self) -> Result<bool> {
+        let changed = match self.facet_search {
+            Setting::Set(new) => {
+                let old = self.index.facet_search(self.wtxn)?;
+                if old == new {
+                    false
+                } else {
+                    self.index.put_facet_search(self.wtxn, new)?;
+                    true
+                }
+            }
+            Setting::Reset => self.index.delete_facet_search(self.wtxn)?,
+            Setting::NotSet => false,
+        };
+
+        Ok(changed)
+    }
+
    fn update_embedding_configs(&mut self) -> Result<BTreeMap<String, EmbedderAction>> {
        match std::mem::take(&mut self.embedder_settings) {
            Setting::Set(configs) => self.update_embedding_configs_set(configs),
@ -1203,6 +1260,8 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
        self.update_searchable()?;
        self.update_exact_attributes()?;
        self.update_proximity_precision()?;
+        self.update_prefix_search()?;
+        self.update_facet_search()?;
        self.update_localized_attributes_rules()?;

        let embedding_config_updates = self.update_embedding_configs()?;
@ -1282,6 +1341,7 @@ impl InnerIndexSettingsDiff {
                || old_settings.allowed_separators != new_settings.allowed_separators
                || old_settings.dictionary != new_settings.dictionary
                || old_settings.proximity_precision != new_settings.proximity_precision
+                || old_settings.prefix_search != new_settings.prefix_search
                || old_settings.localized_searchable_fields_ids
                    != new_settings.localized_searchable_fields_ids
        };
@ -1372,7 +1432,7 @@ impl InnerIndexSettingsDiff {
        }
    }

-    pub fn reindex_facets(&self) -> bool {
+    pub fn facet_fids_changed(&self) -> bool {
        let existing_fields = &self.new.existing_fields;
        if existing_fields.iter().any(|field| field.contains('.')) {
            return true;
@ -1392,7 +1452,15 @@ impl InnerIndexSettingsDiff {
        }

        (existing_fields - old_faceted_fields) != (existing_fields - new_faceted_fields)
-            || self.old.localized_faceted_fields_ids != self.new.localized_faceted_fields_ids
+    }
+
+    pub fn global_facet_settings_changed(&self) -> bool {
+        self.old.localized_faceted_fields_ids != self.new.localized_faceted_fields_ids
+            || self.old.facet_search != self.new.facet_search
+    }
+
+    pub fn reindex_facets(&self) -> bool {
+        self.facet_fids_changed() || self.global_facet_settings_changed()
    }

    pub fn reindex_vectors(&self) -> bool {
@ -1432,6 +1500,8 @@ pub(crate) struct InnerIndexSettings {
    pub non_faceted_fields_ids: Vec<FieldId>,
    pub localized_searchable_fields_ids: LocalizedFieldIds,
    pub localized_faceted_fields_ids: LocalizedFieldIds,
+    pub prefix_search: PrefixSearch,
+    pub facet_search: bool,
 }

 impl InnerIndexSettings {
@ -1457,6 +1527,8 @@ impl InnerIndexSettings {
            Some(embedding_configs) => embedding_configs,
            None => embedders(index.embedding_configs(rtxn)?)?,
        };
+        let prefix_search = index.prefix_search(rtxn)?.unwrap_or_default();
+        let facet_search = index.facet_search(rtxn)?;
        let existing_fields: HashSet<_> = index
            .field_distribution(rtxn)?
            .into_iter()
@ -1514,6 +1586,8 @@ impl InnerIndexSettings {
            non_faceted_fields_ids: vectors_fids.clone(),
            localized_searchable_fields_ids,
            localized_faceted_fields_ids,
+            prefix_search,
+            facet_search,
        })
    }

@ -2721,6 +2795,8 @@ mod tests {
                    embedder_settings,
                    search_cutoff,
                    localized_attributes_rules,
+                    prefix_search,
+                    facet_search,
                } = settings;
                assert!(matches!(searchable_fields, Setting::NotSet));
                assert!(matches!(displayed_fields, Setting::NotSet));
@ -2746,6 +2822,8 @@ mod tests {
                assert!(matches!(embedder_settings, Setting::NotSet));
                assert!(matches!(search_cutoff, Setting::NotSet));
                assert!(matches!(localized_attributes_rules, Setting::NotSet));
+                assert!(matches!(prefix_search, Setting::NotSet));
+                assert!(matches!(facet_search, Setting::NotSet));
            })
            .unwrap();
    }
--- a/crates/milli/src/update/words_prefixes_fst.rs
+++ b/crates/milli/src/update/words_prefixes_fst.rs
@ -9,7 +9,7 @@ use crate::{Index, Result, SmallString32};
 pub struct WordsPrefixesFst<'t, 'i> {
    wtxn: &'t mut RwTxn<'i>,
    index: &'i Index,
-    threshold: u32,
+    threshold: usize,
    max_prefix_length: usize,
 }

@ -24,8 +24,8 @@ impl<'t, 'i> WordsPrefixesFst<'t, 'i> {
    ///
    /// Default value is 100. This value must be higher than 50 and will be clamped
    /// to this bound otherwise.
-    pub fn threshold(&mut self, value: u32) -> &mut Self {
-        self.threshold = value.max(50);
+    pub fn threshold(&mut self, value: usize) -> &mut Self {
+        self.threshold = value;
        self
    }

@ -34,7 +34,7 @@ impl<'t, 'i> WordsPrefixesFst<'t, 'i> {
    /// Default value is `4` bytes. This value must be between 1 and 25 will be clamped
    /// to these bounds, otherwise.
    pub fn max_prefix_length(&mut self, value: usize) -> &mut Self {
-        self.max_prefix_length = value.clamp(1, 25);
+        self.max_prefix_length = value;
        self
    }

--- a/crates/milli/src/vector/mod.rs
+++ b/crates/milli/src/vector/mod.rs
@ -475,7 +475,7 @@ impl<F> Embeddings<F> {
        Ok(())
    }

-    /// Append a flat vector of embeddings a the end of the embeddings.
+    /// Append a flat vector of embeddings at the end of the embeddings.
    ///
    /// If `embeddings.len() % self.dimension != 0`, then the append operation fails.
    pub fn append(&mut self, mut embeddings: Vec<F>) -> Result<(), Vec<F>> {