Merge branch 'main' into indexer-edition-2024

2025-07-03 20:07:09 +02:00 · 2024-11-06 15:19:18 +01:00 · 2024-11-06 15:19:18 +01:00 · 10feeb88f2
commit 10feeb88f2
parent a9ecbf0b64 6b67f9fc4c
1122 changed files with 6265 additions and 5265 deletions
--- a/crates/milli/src/update/facet/bulk.rs
+++ b/crates/milli/src/update/facet/bulk.rs
@ -0,0 +1,533 @@
+use std::fs::File;
+use std::io::BufReader;
+
+use grenad::{CompressionType, Merger};
+use heed::types::Bytes;
+use heed::{BytesDecode, BytesEncode, Error, PutFlags, RoTxn, RwTxn};
+use roaring::RoaringBitmap;
+
+use super::{FACET_GROUP_SIZE, FACET_MIN_LEVEL_SIZE};
+use crate::facet::FacetType;
+use crate::heed_codec::facet::{
+    FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
+};
+use crate::heed_codec::BytesRefCodec;
+use crate::update::del_add::{DelAdd, KvReaderDelAdd};
+use crate::update::index_documents::{create_writer, valid_lmdb_key, writer_into_reader};
+use crate::update::MergeDeladdCboRoaringBitmaps;
+use crate::{CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, FieldId, Index, Result};
+
+/// Algorithm to insert elememts into the `facet_id_(string/f64)_docids` databases
+/// by rebuilding the database "from scratch".
+///
+/// First, the new elements are inserted into the level 0 of the database. Then, the
+/// higher levels are cleared and recomputed from the content of level 0.
+pub struct FacetsUpdateBulk<'i> {
+    index: &'i Index,
+    group_size: u8,
+    min_level_size: u8,
+    facet_type: FacetType,
+    field_ids: Vec<FieldId>,
+    // None if level 0 does not need to be updated
+    delta_data: Option<Merger<BufReader<File>, MergeDeladdCboRoaringBitmaps>>,
+}
+
+impl<'i> FacetsUpdateBulk<'i> {
+    pub fn new(
+        index: &'i Index,
+        field_ids: Vec<FieldId>,
+        facet_type: FacetType,
+        delta_data: Merger<BufReader<File>, MergeDeladdCboRoaringBitmaps>,
+        group_size: u8,
+        min_level_size: u8,
+    ) -> FacetsUpdateBulk<'i> {
+        FacetsUpdateBulk {
+            index,
+            field_ids,
+            group_size,
+            min_level_size,
+            facet_type,
+            delta_data: Some(delta_data),
+        }
+    }
+
+    pub fn new_not_updating_level_0(
+        index: &'i Index,
+        field_ids: Vec<FieldId>,
+        facet_type: FacetType,
+    ) -> FacetsUpdateBulk<'i> {
+        FacetsUpdateBulk {
+            index,
+            field_ids,
+            group_size: FACET_GROUP_SIZE,
+            min_level_size: FACET_MIN_LEVEL_SIZE,
+            facet_type,
+            delta_data: None,
+        }
+    }
+
+    #[tracing::instrument(level = "trace", skip_all, target = "indexing::facets::bulk")]
+    pub fn execute(self, wtxn: &mut heed::RwTxn<'_>) -> Result<()> {
+        let Self { index, field_ids, group_size, min_level_size, facet_type, delta_data } = self;
+
+        let db = match facet_type {
+            FacetType::String => {
+                index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>()
+            }
+            FacetType::Number => {
+                index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>()
+            }
+        };
+
+        let inner = FacetsUpdateBulkInner { db, delta_data, group_size, min_level_size };
+
+        inner.update(wtxn, &field_ids)?;
+
+        Ok(())
+    }
+}
+
+/// Implementation of `FacetsUpdateBulk` that is independent of milli's `Index` type
+pub(crate) struct FacetsUpdateBulkInner<R: std::io::Read + std::io::Seek> {
+    pub db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
+    pub delta_data: Option<Merger<R, MergeDeladdCboRoaringBitmaps>>,
+    pub group_size: u8,
+    pub min_level_size: u8,
+}
+impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
+    pub fn update(mut self, wtxn: &mut RwTxn<'_>, field_ids: &[u16]) -> Result<()> {
+        self.update_level0(wtxn)?;
+        for &field_id in field_ids.iter() {
+            self.clear_levels(wtxn, field_id)?;
+        }
+
+        for &field_id in field_ids.iter() {
+            let level_readers = self.compute_levels_for_field_id(field_id, wtxn)?;
+
+            for level_reader in level_readers {
+                let mut cursor = level_reader.into_cursor()?;
+                while let Some((k, v)) = cursor.move_on_next()? {
+                    self.db.remap_types::<Bytes, Bytes>().put(wtxn, k, v)?;
+                }
+            }
+        }
+        Ok(())
+    }
+
+    fn clear_levels(&self, wtxn: &mut heed::RwTxn<'_>, field_id: FieldId) -> Result<()> {
+        let left = FacetGroupKey::<&[u8]> { field_id, level: 1, left_bound: &[] };
+        let right = FacetGroupKey::<&[u8]> { field_id, level: u8::MAX, left_bound: &[] };
+        let range = left..=right;
+        self.db.delete_range(wtxn, &range).map(drop)?;
+        Ok(())
+    }
+
+    fn update_level0(&mut self, wtxn: &mut RwTxn<'_>) -> Result<()> {
+        let delta_data = match self.delta_data.take() {
+            Some(x) => x,
+            None => return Ok(()),
+        };
+        if self.db.is_empty(wtxn)? {
+            let mut buffer = Vec::new();
+            let mut database = self.db.iter_mut(wtxn)?.remap_types::<Bytes, Bytes>();
+            let mut iter = delta_data.into_stream_merger_iter()?;
+            while let Some((key, value)) = iter.next()? {
+                if !valid_lmdb_key(key) {
+                    continue;
+                }
+                let value = KvReaderDelAdd::from_slice(value);
+
+                // DB is empty, it is safe to ignore Del operations
+                let Some(value) = value.get(DelAdd::Addition) else {
+                    continue;
+                };
+
+                buffer.clear();
+                // the group size for level 0
+                buffer.push(1);
+                // then we extend the buffer with the docids bitmap
+                buffer.extend_from_slice(value);
+                unsafe {
+                    database.put_current_with_options::<Bytes>(PutFlags::APPEND, key, &buffer)?
+                };
+            }
+        } else {
+            let mut buffer = Vec::new();
+            let database = self.db.remap_types::<Bytes, Bytes>();
+
+            let mut iter = delta_data.into_stream_merger_iter()?;
+            while let Some((key, value)) = iter.next()? {
+                if !valid_lmdb_key(key) {
+                    continue;
+                }
+
+                let value = KvReaderDelAdd::from_slice(value);
+
+                // the value is a CboRoaringBitmap, but I still need to prepend the
+                // group size for level 0 (= 1) to it
+                buffer.clear();
+                buffer.push(1);
+                // then we extend the buffer with the docids bitmap
+                match database.get(wtxn, key)? {
+                    Some(prev_value) => {
+                        // prev_value is the group size for level 0, followed by the previous bitmap.
+                        let old_bitmap = &prev_value[1..];
+                        CboRoaringBitmapCodec::merge_deladd_into(value, old_bitmap, &mut buffer)?;
+                    }
+                    None => {
+                        // it is safe to ignore the del in that case.
+                        let Some(value) = value.get(DelAdd::Addition) else {
+                            // won't put the key in DB as the value would be empty
+                            continue;
+                        };
+
+                        buffer.extend_from_slice(value);
+                    }
+                };
+                let new_bitmap = &buffer[1..];
+                // if the new bitmap is empty, let's remove it
+                if CboRoaringBitmapLenCodec::bytes_decode(new_bitmap).unwrap_or_default() == 0 {
+                    database.delete(wtxn, key)?;
+                } else {
+                    database.put(wtxn, key, &buffer)?;
+                }
+            }
+        }
+        Ok(())
+    }
+    fn compute_levels_for_field_id(
+        &self,
+        field_id: FieldId,
+        txn: &RoTxn<'_>,
+    ) -> Result<Vec<grenad::Reader<BufReader<File>>>> {
+        let subwriters = self.compute_higher_levels(txn, field_id, 32, &mut |_, _| Ok(()))?;
+
+        Ok(subwriters)
+    }
+    #[allow(clippy::type_complexity)]
+    fn read_level_0<'t>(
+        &self,
+        rtxn: &'t RoTxn<'t>,
+        field_id: u16,
+        handle_group: &mut dyn FnMut(&[RoaringBitmap], &'t [u8]) -> Result<()>,
+    ) -> Result<()> {
+        // we read the elements one by one and
+        // 1. keep track of the left bound
+        // 2. fill the `bitmaps` vector to give it to level 1 once `level_group_size` elements were read
+        let mut bitmaps = vec![];
+
+        let mut level_0_prefix = vec![];
+        level_0_prefix.extend_from_slice(&field_id.to_be_bytes());
+        level_0_prefix.push(0);
+
+        let level_0_iter = self
+            .db
+            .remap_types::<Bytes, Bytes>()
+            .prefix_iter(rtxn, level_0_prefix.as_slice())?
+            .remap_types::<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>();
+
+        let mut left_bound: &[u8] = &[];
+        let mut first_iteration_for_new_group = true;
+        for el in level_0_iter {
+            let (key, value) = el?;
+            let bound = key.left_bound;
+            let docids = value.bitmap;
+
+            if first_iteration_for_new_group {
+                left_bound = bound;
+                first_iteration_for_new_group = false;
+            }
+            bitmaps.push(docids);
+
+            if bitmaps.len() == self.group_size as usize {
+                handle_group(&bitmaps, left_bound)?;
+                first_iteration_for_new_group = true;
+                bitmaps.clear();
+            }
+        }
+        // don't forget to give the leftover bitmaps as well
+        if !bitmaps.is_empty() {
+            handle_group(&bitmaps, left_bound)?;
+            bitmaps.clear();
+        }
+        Ok(())
+    }
+
+    /// Compute the content of the database levels from its level 0 for the given field id.
+    ///
+    /// ## Returns:
+    /// A vector of grenad::Reader. The reader at index `i` corresponds to the elements of level `i + 1`
+    /// that must be inserted into the database.
+    #[allow(clippy::type_complexity)]
+    fn compute_higher_levels<'t>(
+        &self,
+        rtxn: &'t RoTxn<'t>,
+        field_id: u16,
+        level: u8,
+        handle_group: &mut dyn FnMut(&[RoaringBitmap], &'t [u8]) -> Result<()>,
+    ) -> Result<Vec<grenad::Reader<BufReader<File>>>> {
+        if level == 0 {
+            self.read_level_0(rtxn, field_id, handle_group)?;
+            // Level 0 is already in the database
+            return Ok(vec![]);
+        }
+        // level >= 1
+        // we compute each element of this level based on the elements of the level below it
+        // once we have computed `level_group_size` elements, we give the left bound
+        // of those elements, and their bitmaps, to the level above
+
+        let mut cur_writer = create_writer(CompressionType::None, None, tempfile::tempfile()?);
+        let mut cur_writer_len: usize = 0;
+
+        let mut group_sizes = vec![];
+        let mut left_bounds = vec![];
+        let mut bitmaps = vec![];
+
+        // compute the levels below
+        // in the callback, we fill `cur_writer` with the correct elements for this level
+        let mut sub_writers = self.compute_higher_levels(
+            rtxn,
+            field_id,
+            level - 1,
+            &mut |sub_bitmaps, left_bound| {
+                let mut combined_bitmap = RoaringBitmap::default();
+                for bitmap in sub_bitmaps {
+                    combined_bitmap |= bitmap;
+                }
+                // The conversion of sub_bitmaps.len() to a u8 will always be correct
+                // since its length is bounded by max_group_size, which is a u8.
+                group_sizes.push(sub_bitmaps.len() as u8);
+                left_bounds.push(left_bound);
+
+                bitmaps.push(combined_bitmap);
+                if bitmaps.len() != self.group_size as usize {
+                    return Ok(());
+                }
+                let left_bound = left_bounds.first().unwrap();
+                handle_group(&bitmaps, left_bound)?;
+
+                for ((bitmap, left_bound), group_size) in
+                    bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..))
+                {
+                    let key = FacetGroupKey { field_id, level, left_bound };
+                    let key = FacetGroupKeyCodec::<BytesRefCodec>::bytes_encode(&key)
+                        .map_err(Error::Encoding)?;
+                    let value = FacetGroupValue { size: group_size, bitmap };
+                    let value =
+                        FacetGroupValueCodec::bytes_encode(&value).map_err(Error::Encoding)?;
+                    cur_writer.insert(key, value)?;
+                    cur_writer_len += 1;
+                }
+                Ok(())
+            },
+        )?;
+        // don't forget to insert the leftover elements into the writer as well
+
+        // but only do so if the current number of elements to be inserted into this
+        // levelcould grow to the minimum level size
+
+        if !bitmaps.is_empty() && (cur_writer_len >= self.min_level_size as usize - 1) {
+            // the length of bitmaps is between 0 and group_size
+            assert!(bitmaps.len() < self.group_size as usize);
+            assert!(cur_writer_len > 0);
+
+            let left_bound = left_bounds.first().unwrap();
+            handle_group(&bitmaps, left_bound)?;
+
+            // Note: how many bitmaps are there here?
+            for ((bitmap, left_bound), group_size) in
+                bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..))
+            {
+                let key = FacetGroupKey { field_id, level, left_bound };
+                let key = FacetGroupKeyCodec::<BytesRefCodec>::bytes_encode(&key)
+                    .map_err(Error::Encoding)?;
+                let value = FacetGroupValue { size: group_size, bitmap };
+                let value = FacetGroupValueCodec::bytes_encode(&value).map_err(Error::Encoding)?;
+                cur_writer.insert(key, value)?;
+                cur_writer_len += 1;
+            }
+        }
+        // if we inserted enough elements to reach the minimum level size, then we push the writer
+        if cur_writer_len >= self.min_level_size as usize {
+            sub_writers.push(writer_into_reader(cur_writer)?);
+        } else {
+            // otherwise, if there are still leftover elements, we give them to the level above
+            // this is necessary in order to get the union of all docids
+            if !bitmaps.is_empty() {
+                handle_group(&bitmaps, left_bounds.first().unwrap())?;
+            }
+        }
+        Ok(sub_writers)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::iter::once;
+
+    use big_s::S;
+    use maplit::hashset;
+    use roaring::RoaringBitmap;
+
+    use crate::documents::documents_batch_reader_from_objects;
+    use crate::heed_codec::facet::OrderedF64Codec;
+    use crate::heed_codec::StrRefCodec;
+    use crate::index::tests::TempIndex;
+    use crate::update::facet::test_helpers::{ordered_string, FacetIndex};
+    use crate::{db_snap, milli_snap};
+
+    #[test]
+    fn insert() {
+        let test = |name: &str, group_size: u8, min_level_size: u8| {
+            let index =
+                FacetIndex::<OrderedF64Codec>::new(group_size, 0 /*NA*/, min_level_size);
+
+            let mut elements = Vec::<((u16, f64), RoaringBitmap)>::new();
+            for i in 0..1_000u32 {
+                // field id = 0, left_bound = i, docids = [i]
+                elements.push(((0, i as f64), once(i).collect()));
+            }
+            for i in 0..100u32 {
+                // field id = 1, left_bound = i, docids = [i]
+                elements.push(((1, i as f64), once(i).collect()));
+            }
+            let mut wtxn = index.env.write_txn().unwrap();
+            index.bulk_insert(&mut wtxn, &[0, 1], elements.iter());
+
+            index.verify_structure_validity(&wtxn, 0);
+            index.verify_structure_validity(&wtxn, 1);
+
+            wtxn.commit().unwrap();
+
+            milli_snap!(format!("{index}"), name);
+        };
+
+        test("default", 4, 5);
+        test("small_group_small_min_level", 2, 2);
+        test("small_group_large_min_level", 2, 128);
+        test("large_group_small_min_level", 16, 2);
+        test("odd_group_odd_min_level", 7, 3);
+    }
+    #[test]
+    fn insert_delete_field_insert() {
+        let test = |name: &str, group_size: u8, min_level_size: u8| {
+            let index =
+                FacetIndex::<OrderedF64Codec>::new(group_size, 0 /*NA*/, min_level_size);
+            let mut wtxn = index.env.write_txn().unwrap();
+
+            let mut elements = Vec::<((u16, f64), RoaringBitmap)>::new();
+            for i in 0..100u32 {
+                // field id = 0, left_bound = i, docids = [i]
+                elements.push(((0, i as f64), once(i).collect()));
+            }
+            for i in 0..100u32 {
+                // field id = 1, left_bound = i, docids = [i]
+                elements.push(((1, i as f64), once(i).collect()));
+            }
+            index.bulk_insert(&mut wtxn, &[0, 1], elements.iter());
+
+            index.verify_structure_validity(&wtxn, 0);
+            index.verify_structure_validity(&wtxn, 1);
+            // delete all the elements for the facet id 0
+            for i in 0..100u32 {
+                index.delete_single_docid(&mut wtxn, 0, &(i as f64), i);
+            }
+            index.verify_structure_validity(&wtxn, 0);
+            index.verify_structure_validity(&wtxn, 1);
+
+            let mut elements = Vec::<((u16, f64), RoaringBitmap)>::new();
+            // then add some elements again for the facet id 1
+            for i in 0..110u32 {
+                // field id = 1, left_bound = i, docids = [i]
+                elements.push(((1, i as f64), once(i).collect()));
+            }
+            index.verify_structure_validity(&wtxn, 0);
+            index.verify_structure_validity(&wtxn, 1);
+            index.bulk_insert(&mut wtxn, &[0, 1], elements.iter());
+
+            wtxn.commit().unwrap();
+
+            milli_snap!(format!("{index}"), name);
+        };
+
+        test("default", 4, 5);
+        test("small_group_small_min_level", 2, 2);
+        test("small_group_large_min_level", 2, 128);
+        test("large_group_small_min_level", 16, 2);
+        test("odd_group_odd_min_level", 7, 3);
+    }
+
+    #[test]
+    fn bug_3165() {
+        // Indexing a number of facet values that falls within certains ranges (e.g. 22_540 qualifies)
+        // would lead to a facet DB which was missing some levels.
+        // That was because before writing a level into the database, we would
+        // check that its size was higher than the minimum level size using
+        // a lossy integer conversion: `level_size as u8 >= min_level_size`.
+        //
+        // This missing level in the facet DBs would make the incremental indexer
+        // (and other search algorithms) crash.
+        //
+        // https://github.com/meilisearch/meilisearch/issues/3165
+        let index = TempIndex::new_with_map_size(4096 * 1000 * 100);
+
+        index
+            .update_settings(|settings| {
+                settings.set_primary_key("id".to_owned());
+                settings.set_filterable_fields(hashset! { S("id") });
+            })
+            .unwrap();
+
+        let mut documents = vec![];
+        for i in 0..=22_540 {
+            documents.push(
+                serde_json::json! {
+                    {
+                        "id": i as u64,
+                    }
+                }
+                .as_object()
+                .unwrap()
+                .clone(),
+            );
+        }
+
+        let documents = documents_batch_reader_from_objects(documents);
+        index.add_documents(documents).unwrap();
+
+        db_snap!(index, facet_id_f64_docids, "initial", @"c34f499261f3510d862fa0283bbe843a");
+    }
+
+    #[test]
+    fn insert_string() {
+        let test = |name: &str, group_size: u8, min_level_size: u8| {
+            let index = FacetIndex::<StrRefCodec>::new(group_size, 0 /*NA*/, min_level_size);
+
+            let strings = (0..1_000).map(|i| ordered_string(i as usize)).collect::<Vec<_>>();
+            let mut elements = Vec::<((u16, &str), RoaringBitmap)>::new();
+            for i in 0..1_000u32 {
+                // field id = 0, left_bound = i, docids = [i]
+                elements.push(((0, &strings[i as usize]), once(i).collect()));
+            }
+            for i in 0..100u32 {
+                // field id = 1, left_bound = i, docids = [i]
+                elements.push(((1, &strings[i as usize]), once(i).collect()));
+            }
+            let mut wtxn = index.env.write_txn().unwrap();
+            index.bulk_insert(&mut wtxn, &[0, 1], elements.iter());
+
+            index.verify_structure_validity(&wtxn, 0);
+            index.verify_structure_validity(&wtxn, 1);
+
+            wtxn.commit().unwrap();
+
+            milli_snap!(format!("{index}"), name);
+        };
+
+        test("default", 4, 5);
+        test("small_group_small_min_level", 2, 2);
+        test("small_group_large_min_level", 2, 128);
+        test("large_group_small_min_level", 16, 2);
+        test("odd_group_odd_min_level", 7, 3);
+    }
+}
--- a/crates/milli/src/update/facet/incremental.rs
+++ b/crates/milli/src/update/facet/incremental.rs
--- a/crates/milli/src/update/facet/mod.rs
+++ b/crates/milli/src/update/facet/mod.rs
@ -0,0 +1,640 @@
+/*!
+This module implements two different algorithms for updating the `facet_id_string_docids`
+and `facet_id_f64_docids` databases. The first algorithm is a "bulk" algorithm, meaning that
+it recreates the database from scratch when new elements are added to it. The second algorithm
+is incremental: it modifies the database as little as possible.
+
+The databases must be able to return results for queries such as:
+1. Filter       : find all the document ids that have a facet value greater than X and/or smaller than Y
+2. Min/Max      : find the minimum/maximum facet value among these document ids
+3. Sort         : sort these document ids by increasing/decreasing facet values
+4. Distribution : given some document ids, make a list of each facet value
+   found in these documents along with the number of documents that contain it
+
+The algorithms that implement these queries are found in the `src/search/facet` folder.
+
+To make these queries fast to compute, the database adopts a tree structure:
+```text
+            ┌───────────────────────────────┬───────────────────────────────┬───────────────┐
+┌───────┐   │           "ab" (2)            │           "gaf" (2)           │   "woz" (1)   │
+│Level 2│   │                               │                               │               │
+└───────┘   │        [a, b, d, f, z]        │        [c, d, e, f, g]        │    [u, y]     │
+            ├───────────────┬───────────────┼───────────────┬───────────────┼───────────────┤
+┌───────┐   │   "ab" (2)    │   "ba" (2)    │   "gaf" (2)   │  "form" (2)   │   "woz" (2)   │
+│Level 1│   │               │               │               │               │               │
+└───────┘   │ [a, b, d, z]  │   [a, b, f]   │   [c, d, g]   │    [e, f]     │    [u, y]     │
+            ├───────┬───────┼───────┬───────┼───────┬───────┼───────┬───────┼───────┬───────┤
+┌───────┐   │  "ab" │  "ac" │  "ba" │ "bac" │ "gaf" │ "gal" │ "form"│ "wow" │ "woz" │  "zz" │
+│Level 0│   │       │       │       │       │       │       │       │       │       │       │
+└───────┘   │ [a, b]│ [d, z]│ [b, f]│ [a, f]│ [c, d]│  [g]  │  [e]  │ [e, f]│  [y]  │  [u]  │
+            └───────┴───────┴───────┴───────┴───────┴───────┴───────┴───────┴───────┴───────┘
+```
+In the diagram above, each cell corresponds to a node in the tree. The first line of the cell
+contains the left bound of the range of facet values as well as the number of children of the node.
+The second line contains the document ids which have a facet value within the range of the node.
+The nodes at level 0 are the leaf nodes. They have 0 children and a single facet value in their range.
+
+In the diagram above, the first cell of level 2 is `ab (2)`. Its range is `ab .. gaf` (because
+`gaf` is the left bound of the next node) and it has two children. Its document ids are `[a,b,d,f,z]`.
+These documents all contain a facet value that is contained within `ab .. gaf`.
+
+In the database, each node is represented by a key/value pair encoded as a [`FacetGroupKey`] and a
+[`FacetGroupValue`], which have the following format:
+
+```text
+FacetGroupKey:
+- field id  : u16
+- level     : u8
+- left bound: [u8]    // the facet value encoded using either OrderedF64Codec or Str
+
+FacetGroupValue:
+- #children : u8
+- docids    : RoaringBitmap
+```
+
+When the database is first created using the "bulk" method, each node has a fixed number of children
+(except for possibly the last one) given by the `group_size` parameter (default to `FACET_GROUP_SIZE`).
+The tree is also built such that the highest level has more than `min_level_size`
+(default to `FACET_MIN_LEVEL_SIZE`) elements in it.
+
+When the database is incrementally updated, the number of children of a node can vary between
+1 and `max_group_size`. This is done so that most incremental operations do not need to change
+the structure of the tree. When the number of children of a node reaches `max_group_size`,
+we split the node in two and update the number of children of its parent.
+
+When adding documents to the databases, it is important to determine which method to use to
+minimise indexing time. The incremental method is faster when adding few new facet values, but the
+bulk method is faster when a large part of the database is modified. Empirically, it seems that
+it takes 50x more time to incrementally add N facet values to an existing database than it is to
+construct a database of N facet values. This is the heuristic that is used to choose between the
+two methods.
+
+Related PR: https://github.com/meilisearch/milli/pull/619
+*/
+
+pub const FACET_MAX_GROUP_SIZE: u8 = 8;
+pub const FACET_GROUP_SIZE: u8 = 4;
+pub const FACET_MIN_LEVEL_SIZE: u8 = 5;
+
+use std::collections::BTreeSet;
+use std::fs::File;
+use std::io::BufReader;
+
+use grenad::Merger;
+use heed::types::{Bytes, DecodeIgnore};
+use time::OffsetDateTime;
+use tracing::debug;
+
+use self::incremental::FacetsUpdateIncremental;
+use super::{FacetsUpdateBulk, MergeDeladdBtreesetString, MergeDeladdCboRoaringBitmaps};
+use crate::facet::FacetType;
+use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
+use crate::heed_codec::BytesRefCodec;
+use crate::update::del_add::{DelAdd, KvReaderDelAdd};
+use crate::{try_split_array_at, FieldId, Index, Result};
+
+pub mod bulk;
+pub mod incremental;
+
+/// A builder used to add new elements to the `facet_id_string_docids` or `facet_id_f64_docids` databases.
+///
+/// Depending on the number of new elements and the existing size of the database, we use either
+/// a bulk update method or an incremental update method.
+pub struct FacetsUpdate<'i> {
+    index: &'i Index,
+    database: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
+    facet_type: FacetType,
+    delta_data: Merger<BufReader<File>, MergeDeladdCboRoaringBitmaps>,
+    normalized_delta_data: Option<Merger<BufReader<File>, MergeDeladdBtreesetString>>,
+    group_size: u8,
+    max_group_size: u8,
+    min_level_size: u8,
+    data_size: u64,
+}
+impl<'i> FacetsUpdate<'i> {
+    pub fn new(
+        index: &'i Index,
+        facet_type: FacetType,
+        delta_data: Merger<BufReader<File>, MergeDeladdCboRoaringBitmaps>,
+        normalized_delta_data: Option<Merger<BufReader<File>, MergeDeladdBtreesetString>>,
+        data_size: u64,
+    ) -> Self {
+        let database = match facet_type {
+            FacetType::String => {
+                index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>()
+            }
+            FacetType::Number => {
+                index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>()
+            }
+        };
+        Self {
+            index,
+            database,
+            group_size: FACET_GROUP_SIZE,
+            max_group_size: FACET_MAX_GROUP_SIZE,
+            min_level_size: FACET_MIN_LEVEL_SIZE,
+            facet_type,
+            delta_data,
+            normalized_delta_data,
+            data_size,
+        }
+    }
+
+    pub fn execute(self, wtxn: &mut heed::RwTxn<'_>) -> Result<()> {
+        if self.data_size == 0 {
+            return Ok(());
+        }
+        debug!("Computing and writing the facet values levels docids into LMDB on disk...");
+        self.index.set_updated_at(wtxn, &OffsetDateTime::now_utc())?;
+
+        // See self::comparison_bench::benchmark_facet_indexing
+        if self.data_size >= (self.database.len(wtxn)? / 500) {
+            let field_ids =
+                self.index.faceted_fields_ids(wtxn)?.iter().copied().collect::<Vec<_>>();
+            let bulk_update = FacetsUpdateBulk::new(
+                self.index,
+                field_ids,
+                self.facet_type,
+                self.delta_data,
+                self.group_size,
+                self.min_level_size,
+            );
+            bulk_update.execute(wtxn)?;
+        } else {
+            let incremental_update = FacetsUpdateIncremental::new(
+                self.index,
+                self.facet_type,
+                self.delta_data,
+                self.group_size,
+                self.min_level_size,
+                self.max_group_size,
+            );
+            incremental_update.execute(wtxn)?;
+        }
+
+        match self.normalized_delta_data {
+            Some(data) => index_facet_search(wtxn, data, self.index),
+            None => Ok(()),
+        }
+    }
+}
+
+fn index_facet_search(
+    wtxn: &mut heed::RwTxn<'_>,
+    normalized_delta_data: Merger<BufReader<File>, MergeDeladdBtreesetString>,
+    index: &Index,
+) -> Result<()> {
+    let mut iter = normalized_delta_data.into_stream_merger_iter()?;
+    while let Some((key_bytes, delta_bytes)) = iter.next()? {
+        let deladd_reader = KvReaderDelAdd::from_slice(delta_bytes);
+
+        let database_set = index
+            .facet_id_normalized_string_strings
+            .remap_key_type::<Bytes>()
+            .get(wtxn, key_bytes)?
+            .unwrap_or_default();
+
+        let add_set = deladd_reader
+            .get(DelAdd::Addition)
+            .and_then(|bytes| serde_json::from_slice::<BTreeSet<String>>(bytes).ok())
+            .unwrap_or_default();
+
+        let del_set = match deladd_reader
+            .get(DelAdd::Deletion)
+            .and_then(|bytes| serde_json::from_slice::<BTreeSet<String>>(bytes).ok())
+        {
+            Some(del_set) => {
+                let (field_id_bytes, _) = try_split_array_at(key_bytes).unwrap();
+                let field_id = FieldId::from_be_bytes(field_id_bytes);
+                let mut set = BTreeSet::new();
+                for facet in del_set {
+                    let key = FacetGroupKey { field_id, level: 0, left_bound: facet.as_str() };
+                    // Check if the referenced value doesn't exist anymore before deleting it.
+                    if index
+                        .facet_id_string_docids
+                        .remap_data_type::<DecodeIgnore>()
+                        .get(wtxn, &key)?
+                        .is_none()
+                    {
+                        set.insert(facet);
+                    }
+                }
+                set
+            }
+            None => BTreeSet::new(),
+        };
+
+        let set: BTreeSet<_> =
+            database_set.difference(&del_set).chain(add_set.iter()).cloned().collect();
+
+        if set.is_empty() {
+            index
+                .facet_id_normalized_string_strings
+                .remap_key_type::<Bytes>()
+                .delete(wtxn, key_bytes)?;
+        } else {
+            index
+                .facet_id_normalized_string_strings
+                .remap_key_type::<Bytes>()
+                .put(wtxn, key_bytes, &set)?;
+        }
+    }
+
+    // We clear the FST of normalized-for-search to compute everything from scratch.
+    index.facet_id_string_fst.clear(wtxn)?;
+    // We compute one FST by string facet
+    let mut text_fsts = vec![];
+    let mut current_fst: Option<(u16, fst::SetBuilder<Vec<u8>>)> = None;
+    let database = index.facet_id_normalized_string_strings.remap_data_type::<DecodeIgnore>();
+    for result in database.iter(wtxn)? {
+        let ((field_id, normalized_facet), _) = result?;
+        current_fst = match current_fst.take() {
+            Some((fid, fst_builder)) if fid != field_id => {
+                let fst = fst_builder.into_set();
+                text_fsts.push((fid, fst));
+                Some((field_id, fst::SetBuilder::memory()))
+            }
+            Some((field_id, fst_builder)) => Some((field_id, fst_builder)),
+            None => Some((field_id, fst::SetBuilder::memory())),
+        };
+
+        if let Some((_, fst_builder)) = current_fst.as_mut() {
+            fst_builder.insert(normalized_facet)?;
+        }
+    }
+
+    if let Some((field_id, fst_builder)) = current_fst {
+        let fst = fst_builder.into_set();
+        text_fsts.push((field_id, fst));
+    }
+
+    // We write those FSTs in LMDB now
+    for (field_id, fst) in text_fsts {
+        index.facet_id_string_fst.put(wtxn, &field_id, &fst)?;
+    }
+
+    Ok(())
+}
+
+#[cfg(test)]
+pub(crate) mod test_helpers {
+    use std::cell::Cell;
+    use std::fmt::Display;
+    use std::iter::FromIterator;
+    use std::marker::PhantomData;
+    use std::rc::Rc;
+
+    use grenad::MergerBuilder;
+    use heed::types::Bytes;
+    use heed::{BytesDecode, BytesEncode, Env, RoTxn, RwTxn};
+    use roaring::RoaringBitmap;
+
+    use super::bulk::FacetsUpdateBulkInner;
+    use crate::heed_codec::facet::{
+        FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
+    };
+    use crate::heed_codec::BytesRefCodec;
+    use crate::search::facet::get_highest_level;
+    use crate::snapshot_tests::display_bitmap;
+    use crate::update::del_add::{DelAdd, KvWriterDelAdd};
+    use crate::update::index_documents::MergeDeladdCboRoaringBitmaps;
+    use crate::update::FacetsUpdateIncrementalInner;
+    use crate::CboRoaringBitmapCodec;
+
+    /// Utility function to generate a string whose position in a lexicographically
+    /// ordered list is `i`.
+    pub fn ordered_string(mut i: usize) -> String {
+        // The first string is empty
+        if i == 0 {
+            return String::new();
+        }
+        // The others are 5 char long, each between 'a' and 'z'
+        let mut s = String::new();
+        for _ in 0..5 {
+            let (digit, next) = (i % 26, i / 26);
+            s.insert(0, char::from_u32('a' as u32 + digit as u32).unwrap());
+            i = next;
+        }
+        s
+    }
+
+    /// A dummy index that only contains the facet database, used for testing
+    pub struct FacetIndex<BoundCodec>
+    where
+        for<'a> BoundCodec:
+            BytesEncode<'a> + BytesDecode<'a, DItem = <BoundCodec as BytesEncode<'a>>::EItem>,
+    {
+        pub env: Env,
+        pub content: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
+        pub group_size: Cell<u8>,
+        pub min_level_size: Cell<u8>,
+        pub max_group_size: Cell<u8>,
+        _tempdir: Rc<tempfile::TempDir>,
+        _phantom: PhantomData<BoundCodec>,
+    }
+
+    impl<BoundCodec> FacetIndex<BoundCodec>
+    where
+        for<'a> BoundCodec:
+            BytesEncode<'a> + BytesDecode<'a, DItem = <BoundCodec as BytesEncode<'a>>::EItem>,
+    {
+        #[cfg(all(test, fuzzing))]
+        pub fn open_from_tempdir(
+            tempdir: Rc<tempfile::TempDir>,
+            group_size: u8,
+            max_group_size: u8,
+            min_level_size: u8,
+        ) -> FacetIndex<BoundCodec> {
+            let group_size = std::cmp::min(16, std::cmp::max(group_size, 2)); // 2 <= x <= 16
+            let max_group_size = std::cmp::min(16, std::cmp::max(group_size * 2, max_group_size)); // 2*group_size <= x <= 16
+            let min_level_size = std::cmp::min(17, std::cmp::max(1, min_level_size)); // 1 <= x <= 17
+
+            let mut options = heed::EnvOpenOptions::new();
+            let options = options.map_size(4096 * 4 * 10 * 1000);
+            unsafe {
+                options.flag(heed::flags::Flags::MdbAlwaysFreePages);
+            }
+            let env = options.open(tempdir.path()).unwrap();
+            let content = env.open_database(None).unwrap().unwrap();
+
+            FacetIndex {
+                content,
+                group_size: Cell::new(group_size),
+                max_group_size: Cell::new(max_group_size),
+                min_level_size: Cell::new(min_level_size),
+                _tempdir: tempdir,
+                env,
+                _phantom: PhantomData,
+            }
+        }
+        pub fn new(
+            group_size: u8,
+            max_group_size: u8,
+            min_level_size: u8,
+        ) -> FacetIndex<BoundCodec> {
+            let group_size = group_size.clamp(2, 127);
+            let max_group_size = std::cmp::min(127, std::cmp::max(group_size * 2, max_group_size)); // 2*group_size <= x <= 127
+            let min_level_size = std::cmp::max(1, min_level_size); // 1 <= x <= inf
+            let mut options = heed::EnvOpenOptions::new();
+            let options = options.map_size(4096 * 4 * 1000 * 100);
+            let tempdir = tempfile::TempDir::new().unwrap();
+            let env = unsafe { options.open(tempdir.path()) }.unwrap();
+            let mut wtxn = env.write_txn().unwrap();
+            let content = env.create_database(&mut wtxn, None).unwrap();
+            wtxn.commit().unwrap();
+
+            FacetIndex {
+                content,
+                group_size: Cell::new(group_size),
+                max_group_size: Cell::new(max_group_size),
+                min_level_size: Cell::new(min_level_size),
+                _tempdir: Rc::new(tempdir),
+                env,
+                _phantom: PhantomData,
+            }
+        }
+
+        #[cfg(all(test, fuzzing))]
+        pub fn set_group_size(&self, group_size: u8) {
+            // 2 <= x <= 64
+            self.group_size.set(std::cmp::min(64, std::cmp::max(group_size, 2)));
+        }
+        #[cfg(all(test, fuzzing))]
+        pub fn set_max_group_size(&self, max_group_size: u8) {
+            // 2*group_size <= x <= 128
+            let max_group_size = std::cmp::max(4, std::cmp::min(128, max_group_size));
+            self.max_group_size.set(max_group_size);
+            if self.group_size.get() < max_group_size / 2 {
+                self.group_size.set(max_group_size / 2);
+            }
+        }
+        #[cfg(all(test, fuzzing))]
+        pub fn set_min_level_size(&self, min_level_size: u8) {
+            // 1 <= x <= inf
+            self.min_level_size.set(std::cmp::max(1, min_level_size));
+        }
+
+        pub fn insert<'a>(
+            &self,
+            wtxn: &'a mut RwTxn<'_>,
+            field_id: u16,
+            key: &'a <BoundCodec as BytesEncode<'a>>::EItem,
+            docids: &RoaringBitmap,
+        ) {
+            let update = FacetsUpdateIncrementalInner {
+                db: self.content,
+                group_size: self.group_size.get(),
+                min_level_size: self.min_level_size.get(),
+                max_group_size: self.max_group_size.get(),
+            };
+            let key_bytes = BoundCodec::bytes_encode(key).unwrap();
+            update.modify(wtxn, field_id, &key_bytes, Some(docids), None).unwrap();
+            update.add_or_delete_level(wtxn, field_id).unwrap();
+        }
+        pub fn delete_single_docid<'a>(
+            &self,
+            wtxn: &'a mut RwTxn<'_>,
+            field_id: u16,
+            key: &'a <BoundCodec as BytesEncode<'a>>::EItem,
+            docid: u32,
+        ) {
+            self.delete(wtxn, field_id, key, &RoaringBitmap::from_iter(std::iter::once(docid)))
+        }
+
+        pub fn delete<'a>(
+            &self,
+            wtxn: &'a mut RwTxn<'_>,
+            field_id: u16,
+            key: &'a <BoundCodec as BytesEncode<'a>>::EItem,
+            docids: &RoaringBitmap,
+        ) {
+            let update = FacetsUpdateIncrementalInner {
+                db: self.content,
+                group_size: self.group_size.get(),
+                min_level_size: self.min_level_size.get(),
+                max_group_size: self.max_group_size.get(),
+            };
+            let key_bytes = BoundCodec::bytes_encode(key).unwrap();
+            update.modify(wtxn, field_id, &key_bytes, None, Some(docids)).unwrap();
+            update.add_or_delete_level(wtxn, field_id).unwrap();
+        }
+
+        pub fn bulk_insert<'a, 'b>(
+            &self,
+            wtxn: &'a mut RwTxn<'_>,
+            field_ids: &[u16],
+            els: impl IntoIterator<
+                Item = &'a ((u16, <BoundCodec as BytesEncode<'a>>::EItem), RoaringBitmap),
+            >,
+        ) where
+            for<'c> <BoundCodec as BytesEncode<'c>>::EItem: Sized,
+        {
+            let mut new_data = vec![];
+            let mut writer = grenad::Writer::new(&mut new_data);
+            for ((field_id, left_bound), docids) in els {
+                let left_bound_bytes = BoundCodec::bytes_encode(left_bound).unwrap().into_owned();
+                let key: FacetGroupKey<&[u8]> =
+                    FacetGroupKey { field_id: *field_id, level: 0, left_bound: &left_bound_bytes };
+                let key = FacetGroupKeyCodec::<BytesRefCodec>::bytes_encode(&key).unwrap();
+                let mut inner_writer = KvWriterDelAdd::memory();
+                let value = CboRoaringBitmapCodec::bytes_encode(docids).unwrap();
+                inner_writer.insert(DelAdd::Addition, value).unwrap();
+                writer.insert(&key, inner_writer.into_inner().unwrap()).unwrap();
+            }
+            writer.finish().unwrap();
+            let reader = grenad::Reader::new(std::io::Cursor::new(new_data)).unwrap();
+            let mut builder = MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
+            builder.push(reader.into_cursor().unwrap());
+            let merger = builder.build();
+
+            let update = FacetsUpdateBulkInner {
+                db: self.content,
+                delta_data: Some(merger),
+                group_size: self.group_size.get(),
+                min_level_size: self.min_level_size.get(),
+            };
+
+            update.update(wtxn, field_ids).unwrap();
+        }
+
+        pub fn verify_structure_validity(&self, txn: &RoTxn<'_>, field_id: u16) {
+            let mut field_id_prefix = vec![];
+            field_id_prefix.extend_from_slice(&field_id.to_be_bytes());
+
+            let highest_level = get_highest_level(txn, self.content, field_id).unwrap();
+
+            for level_no in (1..=highest_level).rev() {
+                let mut level_no_prefix = vec![];
+                level_no_prefix.extend_from_slice(&field_id.to_be_bytes());
+                level_no_prefix.push(level_no);
+
+                let iter = self
+                    .content
+                    .remap_types::<Bytes, FacetGroupValueCodec>()
+                    .prefix_iter(txn, &level_no_prefix)
+                    .unwrap();
+                for el in iter {
+                    let (key, value) = el.unwrap();
+                    let key = FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(key).unwrap();
+
+                    let mut prefix_start_below = vec![];
+                    prefix_start_below.extend_from_slice(&field_id.to_be_bytes());
+                    prefix_start_below.push(level_no - 1);
+                    prefix_start_below.extend_from_slice(key.left_bound);
+
+                    let start_below = {
+                        let mut start_below_iter = self
+                            .content
+                            .remap_types::<Bytes, FacetGroupValueCodec>()
+                            .prefix_iter(txn, &prefix_start_below)
+                            .unwrap();
+                        let (key_bytes, _) = start_below_iter.next().unwrap().unwrap();
+                        FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(key_bytes).unwrap()
+                    };
+
+                    assert!(value.size > 0);
+
+                    let mut actual_size = 0;
+                    let mut values_below = RoaringBitmap::new();
+                    let iter_below = self
+                        .content
+                        .range(txn, &(start_below..))
+                        .unwrap()
+                        .take(value.size as usize);
+                    for el in iter_below {
+                        let (_, value) = el.unwrap();
+                        actual_size += 1;
+                        values_below |= value.bitmap;
+                    }
+                    assert_eq!(actual_size, value.size, "{key:?} start_below: {start_below:?}");
+
+                    assert_eq!(value.bitmap, values_below);
+                }
+            }
+        }
+    }
+
+    impl<BoundCodec> Display for FacetIndex<BoundCodec>
+    where
+        for<'a> <BoundCodec as BytesEncode<'a>>::EItem: Sized + Display,
+        for<'a> BoundCodec:
+            BytesEncode<'a> + BytesDecode<'a, DItem = <BoundCodec as BytesEncode<'a>>::EItem>,
+    {
+        fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+            let txn = self.env.read_txn().unwrap();
+            let iter = self.content.iter(&txn).unwrap();
+            for el in iter {
+                let (key, value) = el.unwrap();
+                let FacetGroupKey { field_id, level, left_bound: bound } = key;
+                let bound = BoundCodec::bytes_decode(bound).unwrap();
+                let FacetGroupValue { size, bitmap } = value;
+                writeln!(
+                    f,
+                    "{field_id:<2} {level:<2} k{bound:<8} {size:<4} {values:?}",
+                    values = display_bitmap(&bitmap)
+                )?;
+            }
+            Ok(())
+        }
+    }
+}
+
+#[allow(unused)]
+#[cfg(test)]
+mod comparison_bench {
+    use std::iter::once;
+
+    use rand::Rng;
+    use roaring::RoaringBitmap;
+
+    use super::test_helpers::FacetIndex;
+    use crate::heed_codec::facet::OrderedF64Codec;
+
+    // This is a simple test to get an intuition on the relative speed
+    // of the incremental vs. bulk indexer.
+    //
+    // The benchmark shows the worst-case scenario for the incremental indexer, since
+    // each facet value contains only one document ID.
+    //
+    // In that scenario, it appears that the incremental indexer is about 50 times slower than the
+    // bulk indexer.
+    // #[test]
+    fn benchmark_facet_indexing() {
+        let mut facet_value = 0;
+
+        let mut r = rand::thread_rng();
+
+        for i in 1..=20 {
+            let size = 50_000 * i;
+            let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
+
+            let mut txn = index.env.write_txn().unwrap();
+            let mut elements = Vec::<((u16, f64), RoaringBitmap)>::new();
+            for i in 0..size {
+                // field id = 0, left_bound = i, docids = [i]
+                elements.push(((0, facet_value as f64), once(i).collect()));
+                facet_value += 1;
+            }
+            let timer = std::time::Instant::now();
+            index.bulk_insert(&mut txn, &[0], elements.iter());
+            let time_spent = timer.elapsed().as_millis();
+            println!("bulk {size} : {time_spent}ms");
+
+            txn.commit().unwrap();
+
+            for nbr_doc in [1, 100, 1000, 10_000] {
+                let mut txn = index.env.write_txn().unwrap();
+                let timer = std::time::Instant::now();
+                //
+                // insert one document
+                //
+                for _ in 0..nbr_doc {
+                    index.insert(&mut txn, 0, &r.gen(), &once(1).collect());
+                }
+                let time_spent = timer.elapsed().as_millis();
+                println!("    add {nbr_doc} : {time_spent}ms");
+                txn.abort();
+            }
+        }
+    }
+}
--- a/crates/milli/src/update/facet/snapshots/bulk.rs/insert/default.hash.snap
+++ b/crates/milli/src/update/facet/snapshots/bulk.rs/insert/default.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/update/facet/bulk.rs
+---
+b40dd31a65e033ffc6b35c027ce19506
--- a/crates/milli/src/update/facet/snapshots/bulk.rs/insert/large_group_small_min_level.hash.snap
+++ b/crates/milli/src/update/facet/snapshots/bulk.rs/insert/large_group_small_min_level.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/update/facet/bulk.rs
+---
+7ee22d8e9387e72758f00918eb67e4c6
--- a/crates/milli/src/update/facet/snapshots/bulk.rs/insert/odd_group_odd_min_level.hash.snap
+++ b/crates/milli/src/update/facet/snapshots/bulk.rs/insert/odd_group_odd_min_level.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/update/facet/bulk.rs
+---
+60f567359382507afdaf45fb075740c3
--- a/crates/milli/src/update/facet/snapshots/bulk.rs/insert/small_group_large_min_level.hash.snap
+++ b/crates/milli/src/update/facet/snapshots/bulk.rs/insert/small_group_large_min_level.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/update/facet/bulk.rs
+---
+b986d6e6cbf425685f409a8b417010e1
--- a/crates/milli/src/update/facet/snapshots/bulk.rs/insert/small_group_small_min_level.hash.snap
+++ b/crates/milli/src/update/facet/snapshots/bulk.rs/insert/small_group_small_min_level.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/update/facet/bulk.rs
+---
+ee10dd2ae2b5c6621a89a5d0a9aa8ccc
--- a/crates/milli/src/update/facet/snapshots/bulk.rs/insert_delete_field_insert/default.hash.snap
+++ b/crates/milli/src/update/facet/snapshots/bulk.rs/insert_delete_field_insert/default.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/update/facet/bulk.rs
+---
+fa877559eef78b383b496c15a364a2dc
--- a/crates/milli/src/update/facet/snapshots/bulk.rs/insert_delete_field_insert/large_group_small_min_level.hash.snap
+++ b/crates/milli/src/update/facet/snapshots/bulk.rs/insert_delete_field_insert/large_group_small_min_level.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/update/facet/bulk.rs
+---
+16a96353bc42f2ff3e91611ca4d5b184
--- a/crates/milli/src/update/facet/snapshots/bulk.rs/insert_delete_field_insert/odd_group_odd_min_level.hash.snap
+++ b/crates/milli/src/update/facet/snapshots/bulk.rs/insert_delete_field_insert/odd_group_odd_min_level.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/update/facet/bulk.rs
+---
+be1b08073b9d9788d18080c1320151d7
--- a/crates/milli/src/update/facet/snapshots/bulk.rs/insert_delete_field_insert/small_group_large_min_level.hash.snap
+++ b/crates/milli/src/update/facet/snapshots/bulk.rs/insert_delete_field_insert/small_group_large_min_level.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/update/facet/bulk.rs
+---
+16a96353bc42f2ff3e91611ca4d5b184
--- a/crates/milli/src/update/facet/snapshots/bulk.rs/insert_delete_field_insert/small_group_small_min_level.hash.snap
+++ b/crates/milli/src/update/facet/snapshots/bulk.rs/insert_delete_field_insert/small_group_small_min_level.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/update/facet/bulk.rs
+---
+32a45d555df2e001420fea149818d376
--- a/crates/milli/src/update/facet/snapshots/bulk.rs/insert_string/default.hash.snap
+++ b/crates/milli/src/update/facet/snapshots/bulk.rs/insert_string/default.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/update/facet/bulk.rs
+---
+353d70f52eea66e5031dca989ea8a037
--- a/crates/milli/src/update/facet/snapshots/bulk.rs/insert_string/large_group_small_min_level.hash.snap
+++ b/crates/milli/src/update/facet/snapshots/bulk.rs/insert_string/large_group_small_min_level.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/update/facet/bulk.rs
+---
+52a093c909133d84023a4a7b83864808
--- a/crates/milli/src/update/facet/snapshots/bulk.rs/insert_string/odd_group_odd_min_level.hash.snap
+++ b/crates/milli/src/update/facet/snapshots/bulk.rs/insert_string/odd_group_odd_min_level.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/update/facet/bulk.rs
+---
+9d86c72ddb241d0aeca2995d61a3648a
--- a/crates/milli/src/update/facet/snapshots/bulk.rs/insert_string/small_group_large_min_level.hash.snap
+++ b/crates/milli/src/update/facet/snapshots/bulk.rs/insert_string/small_group_large_min_level.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/update/facet/bulk.rs
+---
+c0943177594534bfe5527cbf40fe388e
--- a/crates/milli/src/update/facet/snapshots/bulk.rs/insert_string/small_group_small_min_level.hash.snap
+++ b/crates/milli/src/update/facet/snapshots/bulk.rs/insert_string/small_group_small_min_level.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/update/facet/bulk.rs
+---
+6ed86f234028ae3df5881bee5512f11e
--- a/crates/milli/src/update/facet/snapshots/incremental.rs/append/append.hash.snap
+++ b/crates/milli/src/update/facet/snapshots/incremental.rs/append/append.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/update/facet/incremental.rs
+---
+5dbfa134cc44abeb3ab6242fc182e48e
--- a/crates/milli/src/update/facet/snapshots/incremental.rs/delete_from_end/0.snap
+++ b/crates/milli/src/update/facet/snapshots/incremental.rs/delete_from_end/0.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/update/facet/incremental.rs
+---
+
--- a/crates/milli/src/update/facet/snapshots/incremental.rs/delete_from_end/100.hash.snap
+++ b/crates/milli/src/update/facet/snapshots/incremental.rs/delete_from_end/100.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/update/facet/incremental.rs
+---
+6ed7bf5d440599b3b10b37549a271fdf
--- a/crates/milli/src/update/facet/snapshots/incremental.rs/delete_from_end/15.snap
+++ b/crates/milli/src/update/facet/snapshots/incremental.rs/delete_from_end/15.snap
@ -0,0 +1,19 @@
+---
+source: milli/src/update/facet/incremental.rs
+---
+0  0  k0        1    "[0, ]"
+0  0  k1        1    "[1, ]"
+0  0  k2        1    "[2, ]"
+0  0  k3        1    "[3, ]"
+0  0  k4        1    "[4, ]"
+0  0  k5        1    "[5, ]"
+0  0  k6        1    "[6, ]"
+0  0  k7        1    "[7, ]"
+0  0  k8        1    "[8, ]"
+0  0  k9        1    "[9, ]"
+0  0  k10       1    "[10, ]"
+0  0  k11       1    "[11, ]"
+0  0  k12       1    "[12, ]"
+0  0  k13       1    "[13, ]"
+0  0  k14       1    "[14, ]"
+
--- a/crates/milli/src/update/facet/snapshots/incremental.rs/delete_from_end/150.hash.snap
+++ b/crates/milli/src/update/facet/snapshots/incremental.rs/delete_from_end/150.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/update/facet/incremental.rs
+---
+b5203f0df0036ebaa133dd77d63a00eb
--- a/crates/milli/src/update/facet/snapshots/incremental.rs/delete_from_end/17.snap
+++ b/crates/milli/src/update/facet/snapshots/incremental.rs/delete_from_end/17.snap
@ -0,0 +1,26 @@
+---
+source: milli/src/update/facet/incremental.rs
+---
+0  0  k0        1    "[0, ]"
+0  0  k1        1    "[1, ]"
+0  0  k2        1    "[2, ]"
+0  0  k3        1    "[3, ]"
+0  0  k4        1    "[4, ]"
+0  0  k5        1    "[5, ]"
+0  0  k6        1    "[6, ]"
+0  0  k7        1    "[7, ]"
+0  0  k8        1    "[8, ]"
+0  0  k9        1    "[9, ]"
+0  0  k10       1    "[10, ]"
+0  0  k11       1    "[11, ]"
+0  0  k12       1    "[12, ]"
+0  0  k13       1    "[13, ]"
+0  0  k14       1    "[14, ]"
+0  0  k15       1    "[15, ]"
+0  0  k16       1    "[16, ]"
+0  1  k0        4    "[0, 1, 2, 3, ]"
+0  1  k4        4    "[4, 5, 6, 7, ]"
+0  1  k8        4    "[8, 9, 10, 11, ]"
+0  1  k12       4    "[12, 13, 14, 15, ]"
+0  1  k16       1    "[16, ]"
+
--- a/crates/milli/src/update/facet/snapshots/incremental.rs/delete_from_end/200.hash.snap
+++ b/crates/milli/src/update/facet/snapshots/incremental.rs/delete_from_end/200.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/update/facet/incremental.rs
+---
+95497d8579740868ee0bfc655b0bf782
--- a/crates/milli/src/update/facet/snapshots/incremental.rs/delete_from_start/127.hash.snap
+++ b/crates/milli/src/update/facet/snapshots/incremental.rs/delete_from_start/127.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/update/facet/incremental.rs
+---
+d565c2f7bbd9e13e12de40cfbbfba6bb
--- a/crates/milli/src/update/facet/snapshots/incremental.rs/delete_from_start/215.snap
+++ b/crates/milli/src/update/facet/snapshots/incremental.rs/delete_from_start/215.snap
@ -0,0 +1,54 @@
+---
+source: milli/src/update/facet/incremental.rs
+---
+0  0  k216      1    "[216, ]"
+0  0  k217      1    "[217, ]"
+0  0  k218      1    "[218, ]"
+0  0  k219      1    "[219, ]"
+0  0  k220      1    "[220, ]"
+0  0  k221      1    "[221, ]"
+0  0  k222      1    "[222, ]"
+0  0  k223      1    "[223, ]"
+0  0  k224      1    "[224, ]"
+0  0  k225      1    "[225, ]"
+0  0  k226      1    "[226, ]"
+0  0  k227      1    "[227, ]"
+0  0  k228      1    "[228, ]"
+0  0  k229      1    "[229, ]"
+0  0  k230      1    "[230, ]"
+0  0  k231      1    "[231, ]"
+0  0  k232      1    "[232, ]"
+0  0  k233      1    "[233, ]"
+0  0  k234      1    "[234, ]"
+0  0  k235      1    "[235, ]"
+0  0  k236      1    "[236, ]"
+0  0  k237      1    "[237, ]"
+0  0  k238      1    "[238, ]"
+0  0  k239      1    "[239, ]"
+0  0  k240      1    "[240, ]"
+0  0  k241      1    "[241, ]"
+0  0  k242      1    "[242, ]"
+0  0  k243      1    "[243, ]"
+0  0  k244      1    "[244, ]"
+0  0  k245      1    "[245, ]"
+0  0  k246      1    "[246, ]"
+0  0  k247      1    "[247, ]"
+0  0  k248      1    "[248, ]"
+0  0  k249      1    "[249, ]"
+0  0  k250      1    "[250, ]"
+0  0  k251      1    "[251, ]"
+0  0  k252      1    "[252, ]"
+0  0  k253      1    "[253, ]"
+0  0  k254      1    "[254, ]"
+0  0  k255      1    "[255, ]"
+0  1  k216      4    "[216, 217, 218, 219, ]"
+0  1  k220      4    "[220, 221, 222, 223, ]"
+0  1  k224      4    "[224, 225, 226, 227, ]"
+0  1  k228      4    "[228, 229, 230, 231, ]"
+0  1  k232      4    "[232, 233, 234, 235, ]"
+0  1  k236      4    "[236, 237, 238, 239, ]"
+0  1  k240      4    "[240, 241, 242, 243, ]"
+0  1  k244      4    "[244, 245, 246, 247, ]"
+0  1  k248      4    "[248, 249, 250, 251, ]"
+0  1  k252      4    "[252, 253, 254, 255, ]"
+
--- a/crates/milli/src/update/facet/snapshots/incremental.rs/delete_from_start/255.snap
+++ b/crates/milli/src/update/facet/snapshots/incremental.rs/delete_from_start/255.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/update/facet/incremental.rs
+---
+
--- a/crates/milli/src/update/facet/snapshots/incremental.rs/delete_shuffled/127.hash.snap
+++ b/crates/milli/src/update/facet/snapshots/incremental.rs/delete_shuffled/127.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/update/facet/incremental.rs
+---
+7cb503827ba17e9670296cc9531a1380
--- a/crates/milli/src/update/facet/snapshots/incremental.rs/delete_shuffled/215.hash.snap
+++ b/crates/milli/src/update/facet/snapshots/incremental.rs/delete_shuffled/215.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/update/facet/incremental.rs
+---
+b061f43e379e16f0617c05d3313d0078
--- a/crates/milli/src/update/facet/snapshots/incremental.rs/delete_shuffled/255.snap
+++ b/crates/milli/src/update/facet/snapshots/incremental.rs/delete_shuffled/255.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/update/facet/incremental.rs
+---
+
--- a/crates/milli/src/update/facet/snapshots/incremental.rs/in_place_level0_delete/after_delete.hash.snap
+++ b/crates/milli/src/update/facet/snapshots/incremental.rs/in_place_level0_delete/after_delete.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/update/facet/incremental.rs
+---
+81fc9489d6b163935b97433477dea63b
--- a/crates/milli/src/update/facet/snapshots/incremental.rs/in_place_level0_delete/before_delete.hash.snap
+++ b/crates/milli/src/update/facet/snapshots/incremental.rs/in_place_level0_delete/before_delete.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/update/facet/incremental.rs
+---
+b17b2c4ec87a778aae07854c96c08b48
--- a/crates/milli/src/update/facet/snapshots/incremental.rs/in_place_level0_insert/in_place_level0_insert.snap
+++ b/crates/milli/src/update/facet/snapshots/incremental.rs/in_place_level0_insert/in_place_level0_insert.snap
@ -0,0 +1,20 @@
+---
+source: milli/src/update/facet/incremental.rs
+---
+0  0  k0        1    "[3, 435, 583, 849, ]"
+0  0  k1        1    "[35, 494, 693, 796, ]"
+0  0  k2        1    "[76, 420, 526, 909, ]"
+0  0  k3        1    "[133, 451, 653, 806, ]"
+0  0  k4        1    "[131, 464, 656, 853, ]"
+0  0  k5        1    "[61, 308, 701, 903, ]"
+0  0  k6        1    "[144, 449, 674, 794, ]"
+0  0  k7        1    "[182, 451, 735, 941, ]"
+0  0  k8        1    "[6, 359, 679, 1003, ]"
+0  0  k9        1    "[197, 418, 659, 904, ]"
+0  0  k10       1    "[88, 297, 567, 800, ]"
+0  0  k11       1    "[150, 309, 530, 946, ]"
+0  0  k12       1    "[156, 466, 567, 892, ]"
+0  0  k13       1    "[46, 425, 610, 807, ]"
+0  0  k14       1    "[236, 433, 549, 891, ]"
+0  0  k15       1    "[207, 472, 603, 974, ]"
+
--- a/crates/milli/src/update/facet/snapshots/incremental.rs/many_field_ids_append/many_field_ids_append.hash.snap
+++ b/crates/milli/src/update/facet/snapshots/incremental.rs/many_field_ids_append/many_field_ids_append.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/update/facet/incremental.rs
+---
+7f8aa18d2b3a6422d55c03bede0563db
--- a/crates/milli/src/update/facet/snapshots/incremental.rs/many_field_ids_prepend/many_field_ids_prepend.hash.snap
+++ b/crates/milli/src/update/facet/snapshots/incremental.rs/many_field_ids_prepend/many_field_ids_prepend.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/update/facet/incremental.rs
+---
+7f8aa18d2b3a6422d55c03bede0563db
--- a/crates/milli/src/update/facet/snapshots/incremental.rs/merge_values/merge_values.hash.snap
+++ b/crates/milli/src/update/facet/snapshots/incremental.rs/merge_values/merge_values.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/update/facet/incremental.rs
+---
+b3e2de9020d9e0f3941bc3a179c795ba
--- a/crates/milli/src/update/facet/snapshots/incremental.rs/prepend/prepend.hash.snap
+++ b/crates/milli/src/update/facet/snapshots/incremental.rs/prepend/prepend.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/update/facet/incremental.rs
+---
+5dbfa134cc44abeb3ab6242fc182e48e
--- a/crates/milli/src/update/facet/snapshots/incremental.rs/shuffle_merge_string_and_delete/after_delete.hash.snap
+++ b/crates/milli/src/update/facet/snapshots/incremental.rs/shuffle_merge_string_and_delete/after_delete.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/update/facet/incremental.rs
+---
+9343355bf535ed4a0c956df2b229d5e6
--- a/crates/milli/src/update/facet/snapshots/incremental.rs/shuffle_merge_string_and_delete/before_delete.hash.snap
+++ b/crates/milli/src/update/facet/snapshots/incremental.rs/shuffle_merge_string_and_delete/before_delete.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/update/facet/incremental.rs
+---
+4fc800f49201a336295af0542fdf01ab
--- a/crates/milli/src/update/facet/snapshots/incremental.rs/shuffled/shuffled.hash.snap
+++ b/crates/milli/src/update/facet/snapshots/incremental.rs/shuffled/shuffled.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/update/facet/incremental.rs
+---
+fd65ce7d96a07aafb0ef6cfb5bf016b8