Add FacetsUpdate type that wraps incremental and bulk indexing methods

2025-07-04 20:37:15 +02:00 · 2022-09-05 12:52:05 +02:00 · 2022-09-05 12:52:05 +02:00 · 9b55e582cd
commit 9b55e582cd
parent 3d145d7f48
11 changed files with 216 additions and 129 deletions
--- a/milli/src/update/index_documents/typed_chunk.rs
+++ b/milli/src/update/index_documents/typed_chunk.rs
@ -1,5 +1,4 @@
 use std::borrow::Cow;
-use std::collections::HashMap;
 use std::convert::TryInto;
 use std::fs::File;
 use std::io;
@ -14,12 +13,12 @@ use super::helpers::{
    valid_lmdb_key, CursorClonableMmap,
 };
 use super::{ClonableMmap, MergeFn};
-use crate::heed_codec::facet::new::{FacetKeyCodec, MyByteSlice};
+use crate::facet::FacetType;
+use crate::update::facet::FacetsUpdate;
 use crate::update::index_documents::helpers::as_cloneable_grenad;
-use crate::update::FacetsUpdateIncremental;
 use crate::{
-    lat_lng_to_xyz, BoRoaringBitmapCodec, CboRoaringBitmapCodec, DocumentId, FieldId, GeoPoint,
-    Index, Result,
+    lat_lng_to_xyz, BoRoaringBitmapCodec, CboRoaringBitmapCodec, DocumentId, GeoPoint, Index,
+    Result,
 };

 pub(crate) enum TypedChunk {
@ -138,78 +137,14 @@ pub(crate) fn write_typed_chunk_into_index(
            )?;
            is_merged_database = true;
        }
-        TypedChunk::FieldIdFacetNumberDocids(facet_id_f64_docids_iter) => {
-            // merge cbo roaring bitmaps is not the correct merger because the data in the DB
-            // is FacetGroupValue and not RoaringBitmap
-            // so I need to create my own merging function
-
-            // facet_id_string_docids is encoded as:
-            // key: FacetKeyCodec<StrRefCodec>
-            // value: CboRoaringBitmapCodec
-            // basically
-
-            // TODO: a condition saying "if I have more than 1/50th of the DB to add,
-            // then I do it in bulk, otherwise I do it incrementally". But instead of 1/50,
-            // it is a ratio I determine empirically
-
-            // for now I only do it incrementally, to see if things work
-            let indexer = FacetsUpdateIncremental::new(
-                index.facet_id_f64_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>(),
-            );
-
-            let mut new_faceted_docids = HashMap::<FieldId, RoaringBitmap>::default();
-
-            let mut cursor = facet_id_f64_docids_iter.into_cursor()?;
-            while let Some((key, value)) = cursor.move_on_next()? {
-                let key =
-                    FacetKeyCodec::<MyByteSlice>::bytes_decode(key).ok_or(heed::Error::Encoding)?;
-                let docids =
-                    CboRoaringBitmapCodec::bytes_decode(value).ok_or(heed::Error::Encoding)?;
-                indexer.insert(wtxn, key.field_id, key.left_bound, &docids)?;
-                *new_faceted_docids.entry(key.field_id).or_default() |= docids;
-            }
-            for (field_id, new_docids) in new_faceted_docids {
-                let mut docids = index.number_faceted_documents_ids(wtxn, field_id)?;
-                docids |= new_docids;
-                index.put_number_faceted_documents_ids(wtxn, field_id, &docids)?;
-            }
-
+        TypedChunk::FieldIdFacetNumberDocids(facet_id_number_docids_iter) => {
+            let indexer = FacetsUpdate::new(index, FacetType::Number, facet_id_number_docids_iter);
+            indexer.execute(wtxn)?;
            is_merged_database = true;
        }
-        TypedChunk::FieldIdFacetStringDocids(facet_id_string_docids) => {
-            // merge cbo roaring bitmaps is not the correct merger because the data in the DB
-            // is FacetGroupValue and not RoaringBitmap
-            // so I need to create my own merging function
-
-            // facet_id_string_docids is encoded as:
-            // key: FacetKeyCodec<StrRefCodec>
-            // value: CboRoaringBitmapCodec
-            // basically
-
-            // TODO: a condition saying "if I have more than 1/50th of the DB to add,
-            // then I do it in bulk, otherwise I do it incrementally". But instead of 1/50,
-            // it is a ratio I determine empirically
-
-            // for now I only do it incrementally, to see if things work
-            let indexer = FacetsUpdateIncremental::new(
-                index.facet_id_string_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>(),
-            );
-            let mut new_faceted_docids = HashMap::<FieldId, RoaringBitmap>::default();
-
-            let mut cursor = facet_id_string_docids.into_cursor()?;
-            while let Some((key, value)) = cursor.move_on_next()? {
-                let key =
-                    FacetKeyCodec::<MyByteSlice>::bytes_decode(key).ok_or(heed::Error::Encoding)?;
-                let docids =
-                    CboRoaringBitmapCodec::bytes_decode(value).ok_or(heed::Error::Encoding)?;
-                indexer.insert(wtxn, key.field_id, key.left_bound, &docids)?;
-                *new_faceted_docids.entry(key.field_id).or_default() |= docids;
-            }
-            for (field_id, new_docids) in new_faceted_docids {
-                let mut docids = index.string_faceted_documents_ids(wtxn, field_id)?;
-                docids |= new_docids;
-                index.put_string_faceted_documents_ids(wtxn, field_id, &docids)?;
-            }
+        TypedChunk::FieldIdFacetStringDocids(facet_id_string_docids_iter) => {
+            let indexer = FacetsUpdate::new(index, FacetType::String, facet_id_string_docids_iter);
+            indexer.execute(wtxn)?;
            is_merged_database = true;
        }
        TypedChunk::FieldIdFacetExistsDocids(facet_id_exists_docids) => {