mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-30 00:34:26 +01:00
Facet Incremental update
This commit is contained in:
parent
f67ff3a738
commit
04ec293024
@ -4,6 +4,7 @@ use std::io::BufReader;
|
|||||||
|
|
||||||
use heed::types::{ByteSlice, DecodeIgnore};
|
use heed::types::{ByteSlice, DecodeIgnore};
|
||||||
use heed::{BytesDecode, Error, RoTxn, RwTxn};
|
use heed::{BytesDecode, Error, RoTxn, RwTxn};
|
||||||
|
use obkv::KvReader;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use crate::facet::FacetType;
|
use crate::facet::FacetType;
|
||||||
@ -12,6 +13,7 @@ use crate::heed_codec::facet::{
|
|||||||
};
|
};
|
||||||
use crate::heed_codec::ByteSliceRefCodec;
|
use crate::heed_codec::ByteSliceRefCodec;
|
||||||
use crate::search::facet::get_highest_level;
|
use crate::search::facet::get_highest_level;
|
||||||
|
use crate::update::del_add::DelAdd;
|
||||||
use crate::update::index_documents::valid_lmdb_key;
|
use crate::update::index_documents::valid_lmdb_key;
|
||||||
use crate::{CboRoaringBitmapCodec, FieldId, Index, Result};
|
use crate::{CboRoaringBitmapCodec, FieldId, Index, Result};
|
||||||
|
|
||||||
@ -35,14 +37,14 @@ pub struct FacetsUpdateIncremental<'i> {
|
|||||||
index: &'i Index,
|
index: &'i Index,
|
||||||
inner: FacetsUpdateIncrementalInner,
|
inner: FacetsUpdateIncrementalInner,
|
||||||
facet_type: FacetType,
|
facet_type: FacetType,
|
||||||
new_data: grenad::Reader<BufReader<File>>,
|
delta_data: grenad::Reader<BufReader<File>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'i> FacetsUpdateIncremental<'i> {
|
impl<'i> FacetsUpdateIncremental<'i> {
|
||||||
pub fn new(
|
pub fn new(
|
||||||
index: &'i Index,
|
index: &'i Index,
|
||||||
facet_type: FacetType,
|
facet_type: FacetType,
|
||||||
new_data: grenad::Reader<BufReader<File>>,
|
delta_data: grenad::Reader<BufReader<File>>,
|
||||||
group_size: u8,
|
group_size: u8,
|
||||||
min_level_size: u8,
|
min_level_size: u8,
|
||||||
max_group_size: u8,
|
max_group_size: u8,
|
||||||
@ -63,29 +65,82 @@ impl<'i> FacetsUpdateIncremental<'i> {
|
|||||||
min_level_size,
|
min_level_size,
|
||||||
},
|
},
|
||||||
facet_type,
|
facet_type,
|
||||||
new_data,
|
delta_data,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn execute(self, wtxn: &'i mut RwTxn) -> crate::Result<()> {
|
pub fn execute(self, wtxn: &'i mut RwTxn) -> crate::Result<()> {
|
||||||
let mut new_faceted_docids = HashMap::<FieldId, RoaringBitmap>::default();
|
#[derive(Default)]
|
||||||
|
struct DeltaDocids {
|
||||||
|
deleted: RoaringBitmap,
|
||||||
|
added: RoaringBitmap,
|
||||||
|
}
|
||||||
|
impl DeltaDocids {
|
||||||
|
fn add(&mut self, added: &RoaringBitmap) {
|
||||||
|
self.deleted -= added;
|
||||||
|
self.added |= added;
|
||||||
|
}
|
||||||
|
fn delete(&mut self, deleted: &RoaringBitmap) {
|
||||||
|
self.deleted |= deleted;
|
||||||
|
self.added -= deleted;
|
||||||
|
}
|
||||||
|
fn applied(self, mut docids: RoaringBitmap) -> RoaringBitmap {
|
||||||
|
docids -= self.deleted;
|
||||||
|
docids |= self.added;
|
||||||
|
docids
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
let mut cursor = self.new_data.into_cursor()?;
|
let mut new_faceted_docids = HashMap::<FieldId, DeltaDocids>::default();
|
||||||
|
|
||||||
|
let mut cursor = self.delta_data.into_cursor()?;
|
||||||
while let Some((key, value)) = cursor.move_on_next()? {
|
while let Some((key, value)) = cursor.move_on_next()? {
|
||||||
if !valid_lmdb_key(key) {
|
if !valid_lmdb_key(key) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key)
|
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key)
|
||||||
.ok_or(heed::Error::Encoding)?;
|
.ok_or(heed::Error::Encoding)?;
|
||||||
let docids = CboRoaringBitmapCodec::bytes_decode(value).ok_or(heed::Error::Encoding)?;
|
let value = KvReader::new(value);
|
||||||
self.inner.insert(wtxn, key.field_id, key.left_bound, &docids)?;
|
|
||||||
*new_faceted_docids.entry(key.field_id).or_default() |= docids;
|
let entry = new_faceted_docids.entry(key.field_id).or_default();
|
||||||
|
|
||||||
|
let docids_to_delete = value
|
||||||
|
.get(DelAdd::Deletion)
|
||||||
|
.map(CboRoaringBitmapCodec::bytes_decode)
|
||||||
|
.map(|o| o.ok_or(heed::Error::Encoding));
|
||||||
|
|
||||||
|
let docids_to_add = value
|
||||||
|
.get(DelAdd::Addition)
|
||||||
|
.map(CboRoaringBitmapCodec::bytes_decode)
|
||||||
|
.map(|o| o.ok_or(heed::Error::Encoding));
|
||||||
|
|
||||||
|
if let Some(docids_to_delete) = docids_to_delete {
|
||||||
|
let docids_to_delete = docids_to_delete?;
|
||||||
|
self.inner.delete(wtxn, key.field_id, key.left_bound, &docids_to_delete)?;
|
||||||
|
entry.delete(&docids_to_delete);
|
||||||
|
}
|
||||||
|
|
||||||
|
if let Some(docids_to_add) = docids_to_add {
|
||||||
|
let docids_to_add = docids_to_add?;
|
||||||
|
self.inner.insert(wtxn, key.field_id, key.left_bound, &docids_to_add)?;
|
||||||
|
entry.add(&docids_to_add);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// FIXME: broken for multi-value facets?
|
||||||
|
//
|
||||||
|
// Consider an incremental update: `facet="tags", facet_value="Action", {Del: Some([0, 1]), Add: None }`
|
||||||
|
// The current code will inconditionally remove docs 0 and 1 from faceted docs for "tags".
|
||||||
|
// Now for doc 0: `"tags": "Action"`, it's correct behavior
|
||||||
|
// for doc 1: `"tags": "Action, Adventure"`, it's incorrect behavior
|
||||||
for (field_id, new_docids) in new_faceted_docids {
|
for (field_id, new_docids) in new_faceted_docids {
|
||||||
let mut docids = self.index.faceted_documents_ids(wtxn, field_id, self.facet_type)?;
|
let old_docids = self.index.faceted_documents_ids(wtxn, field_id, self.facet_type)?;
|
||||||
docids |= new_docids;
|
self.index.put_faceted_documents_ids(
|
||||||
self.index.put_faceted_documents_ids(wtxn, field_id, self.facet_type, &docids)?;
|
wtxn,
|
||||||
|
field_id,
|
||||||
|
self.facet_type,
|
||||||
|
&new_docids.applied(old_docids),
|
||||||
|
)?;
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user