From 04ec293024191deabe4725427230f78f01af72af Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 19 Oct 2023 12:01:12 +0200 Subject: [PATCH] Facet Incremental update --- milli/src/update/facet/incremental.rs | 77 +++++++++++++++++++++++---- 1 file changed, 66 insertions(+), 11 deletions(-) diff --git a/milli/src/update/facet/incremental.rs b/milli/src/update/facet/incremental.rs index 743c0b038..802c02b85 100644 --- a/milli/src/update/facet/incremental.rs +++ b/milli/src/update/facet/incremental.rs @@ -4,6 +4,7 @@ use std::io::BufReader; use heed::types::{ByteSlice, DecodeIgnore}; use heed::{BytesDecode, Error, RoTxn, RwTxn}; +use obkv::KvReader; use roaring::RoaringBitmap; use crate::facet::FacetType; @@ -12,6 +13,7 @@ use crate::heed_codec::facet::{ }; use crate::heed_codec::ByteSliceRefCodec; use crate::search::facet::get_highest_level; +use crate::update::del_add::DelAdd; use crate::update::index_documents::valid_lmdb_key; use crate::{CboRoaringBitmapCodec, FieldId, Index, Result}; @@ -35,14 +37,14 @@ pub struct FacetsUpdateIncremental<'i> { index: &'i Index, inner: FacetsUpdateIncrementalInner, facet_type: FacetType, - new_data: grenad::Reader>, + delta_data: grenad::Reader>, } impl<'i> FacetsUpdateIncremental<'i> { pub fn new( index: &'i Index, facet_type: FacetType, - new_data: grenad::Reader>, + delta_data: grenad::Reader>, group_size: u8, min_level_size: u8, max_group_size: u8, @@ -63,29 +65,82 @@ impl<'i> FacetsUpdateIncremental<'i> { min_level_size, }, facet_type, - new_data, + delta_data, } } pub fn execute(self, wtxn: &'i mut RwTxn) -> crate::Result<()> { - let mut new_faceted_docids = HashMap::::default(); + #[derive(Default)] + struct DeltaDocids { + deleted: RoaringBitmap, + added: RoaringBitmap, + } + impl DeltaDocids { + fn add(&mut self, added: &RoaringBitmap) { + self.deleted -= added; + self.added |= added; + } + fn delete(&mut self, deleted: &RoaringBitmap) { + self.deleted |= deleted; + self.added -= deleted; + } + fn applied(self, mut docids: RoaringBitmap) -> RoaringBitmap { + docids -= self.deleted; + docids |= self.added; + docids + } + } - let mut cursor = self.new_data.into_cursor()?; + let mut new_faceted_docids = HashMap::::default(); + + let mut cursor = self.delta_data.into_cursor()?; while let Some((key, value)) = cursor.move_on_next()? { if !valid_lmdb_key(key) { continue; } let key = FacetGroupKeyCodec::::bytes_decode(key) .ok_or(heed::Error::Encoding)?; - let docids = CboRoaringBitmapCodec::bytes_decode(value).ok_or(heed::Error::Encoding)?; - self.inner.insert(wtxn, key.field_id, key.left_bound, &docids)?; - *new_faceted_docids.entry(key.field_id).or_default() |= docids; + let value = KvReader::new(value); + + let entry = new_faceted_docids.entry(key.field_id).or_default(); + + let docids_to_delete = value + .get(DelAdd::Deletion) + .map(CboRoaringBitmapCodec::bytes_decode) + .map(|o| o.ok_or(heed::Error::Encoding)); + + let docids_to_add = value + .get(DelAdd::Addition) + .map(CboRoaringBitmapCodec::bytes_decode) + .map(|o| o.ok_or(heed::Error::Encoding)); + + if let Some(docids_to_delete) = docids_to_delete { + let docids_to_delete = docids_to_delete?; + self.inner.delete(wtxn, key.field_id, key.left_bound, &docids_to_delete)?; + entry.delete(&docids_to_delete); + } + + if let Some(docids_to_add) = docids_to_add { + let docids_to_add = docids_to_add?; + self.inner.insert(wtxn, key.field_id, key.left_bound, &docids_to_add)?; + entry.add(&docids_to_add); + } } + // FIXME: broken for multi-value facets? + // + // Consider an incremental update: `facet="tags", facet_value="Action", {Del: Some([0, 1]), Add: None }` + // The current code will inconditionally remove docs 0 and 1 from faceted docs for "tags". + // Now for doc 0: `"tags": "Action"`, it's correct behavior + // for doc 1: `"tags": "Action, Adventure"`, it's incorrect behavior for (field_id, new_docids) in new_faceted_docids { - let mut docids = self.index.faceted_documents_ids(wtxn, field_id, self.facet_type)?; - docids |= new_docids; - self.index.put_faceted_documents_ids(wtxn, field_id, self.facet_type, &docids)?; + let old_docids = self.index.faceted_documents_ids(wtxn, field_id, self.facet_type)?; + self.index.put_faceted_documents_ids( + wtxn, + field_id, + self.facet_type, + &new_docids.applied(old_docids), + )?; } Ok(()) }