4433: Enhance facet incremental r=Kerollmops a=ManyTheFish

# Pull Request

## Related issue
Fixes #4367
Fixes #4409

## What does this PR do?

- Add a test reproducing #4409
- Fix #4409 by removing a document from a level only if it is no more present in all the linked sub-level nodes
- Optimize facet Incremental indexing by creating or deleting a complete level once per field id instead of for each facet value
- Optimize facet Incremental indexing by doing the additions and the deletions in the same process instead of doing them separately


Co-authored-by: ManyTheFish <many@meilisearch.com>
This commit is contained in:
meili-bors[bot] 2024-02-28 15:28:46 +00:00 committed by GitHub
commit b87485e80d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 458 additions and 318 deletions

View File

@ -1,4 +1,4 @@
use meili_snap::snapshot; use meili_snap::{json_string, snapshot};
use crate::common::encoder::Encoder; use crate::common::encoder::Encoder;
use crate::common::{GetAllDocumentsOptions, Server}; use crate::common::{GetAllDocumentsOptions, Server};
@ -209,3 +209,93 @@ async fn error_update_documents_missing_document_id() {
"https://docs.meilisearch.com/errors#missing_document_id" "https://docs.meilisearch.com/errors#missing_document_id"
); );
} }
#[actix_rt::test]
async fn update_faceted_document() {
let server = Server::new().await;
let index = server.index("test");
let (response, code) = index
.update_settings(json!({
"rankingRules": ["facet:asc"],
}))
.await;
assert_eq!("202", code.as_str(), "{:?}", response);
index.wait_task(0).await;
let documents: Vec<_> = (0..1000)
.map(|id| {
json!({
"doc_id": id,
"facet": (id/3),
})
})
.collect();
let (_response, code) = index.add_documents(documents.into(), None).await;
assert_eq!(code, 202);
index.wait_task(1).await;
let documents = json!([
{
"doc_id": 9,
"facet": 1.5,
}
]);
let (response, code) = index.update_documents(documents, None).await;
assert_eq!(code, 202, "response: {}", response);
index.wait_task(2).await;
index
.search(json!({"limit": 10}), |response, code| {
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"doc_id": 0,
"facet": 0
},
{
"doc_id": 1,
"facet": 0
},
{
"doc_id": 2,
"facet": 0
},
{
"doc_id": 3,
"facet": 1
},
{
"doc_id": 4,
"facet": 1
},
{
"doc_id": 5,
"facet": 1
},
{
"doc_id": 9,
"facet": 1.5
},
{
"doc_id": 6,
"facet": 2
},
{
"doc_id": 7,
"facet": 2
},
{
"doc_id": 8,
"facet": 2
}
]
"###);
})
.await;
}

View File

@ -18,15 +18,39 @@ use crate::update::index_documents::valid_lmdb_key;
use crate::update::MergeFn; use crate::update::MergeFn;
use crate::{CboRoaringBitmapCodec, Index, Result}; use crate::{CboRoaringBitmapCodec, Index, Result};
enum InsertionResult { /// Enum used as a return value for the facet incremental indexing.
///
/// - `ModificationResult::InPlace` means that modifying the `facet_value` into the `level` did not have
/// an effect on the number of keys in that level. Therefore, it did not increase the number of children
/// of the parent node.
///
/// - `ModificationResult::Insert` means that modifying the `facet_value` into the `level` resulted
/// in the addition of a new key in that level, and that therefore the number of children
/// of the parent node should be incremented.
///
/// - `ModificationResult::Remove` means that modifying the `facet_value` into the `level` resulted in a change in the
/// number of keys in the level. For example, removing a document id from the facet value `3` could
/// cause it to have no corresponding document in level 0 anymore, and therefore the key was deleted
/// entirely. In that case, `ModificationResult::Remove` is returned. The parent of the deleted key must
/// then adjust its group size. If its group size falls to 0, then it will need to be deleted as well.
///
/// - `ModificationResult::Reduce/Expand` means that modifying the `facet_value` into the `level` resulted in a change in the
/// bounds of the keys of the level. For example, removing a document id from the facet value
/// `3` might have caused the facet value `3` to have no corresponding document in level 0. Therefore,
/// in level 1, the key with the left bound `3` had to be changed to the next facet value (e.g. 4).
/// In that case `ModificationResult::Reduce` is returned. The parent of the reduced key may need to adjust
/// its left bound as well.
///
/// - `ModificationResult::Nothing` means that modifying the `facet_value` didn't have any impact into the `level`.
/// This case is reachable when a document id is removed from a sub-level node but is still present in another one.
/// For example, removing `2` from a document containing `2` and `3`, the document id will removed form the `level 0` but should remain in the group node [1..4] in `level 1`.
enum ModificationResult {
InPlace, InPlace,
Expand, Expand,
Insert, Insert,
}
enum DeletionResult {
InPlace,
Reduce { next: Option<Vec<u8>> }, Reduce { next: Option<Vec<u8>> },
Remove { next: Option<Vec<u8>> }, Remove { next: Option<Vec<u8>> },
Nothing,
} }
/// Algorithm to incrementally insert and delete elememts into the /// Algorithm to incrementally insert and delete elememts into the
@ -65,8 +89,9 @@ impl FacetsUpdateIncremental {
#[tracing::instrument(level = "trace", skip_all, target = "indexing::facets::incremental")] #[tracing::instrument(level = "trace", skip_all, target = "indexing::facets::incremental")]
pub fn execute(self, wtxn: &mut RwTxn) -> crate::Result<()> { pub fn execute(self, wtxn: &mut RwTxn) -> crate::Result<()> {
let mut current_field_id = None;
let mut facet_level_may_be_updated = false;
let mut iter = self.delta_data.into_stream_merger_iter()?; let mut iter = self.delta_data.into_stream_merger_iter()?;
while let Some((key, value)) = iter.next()? { while let Some((key, value)) = iter.next()? {
if !valid_lmdb_key(key) { if !valid_lmdb_key(key) {
continue; continue;
@ -74,25 +99,47 @@ impl FacetsUpdateIncremental {
let key = FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(key) let key = FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(key)
.map_err(heed::Error::Encoding)?; .map_err(heed::Error::Encoding)?;
if facet_level_may_be_updated
&& current_field_id.map_or(false, |fid| fid != key.field_id)
{
// Only add or remove a level after making all the field modifications.
self.inner.add_or_delete_level(wtxn, current_field_id.unwrap())?;
facet_level_may_be_updated = false;
}
current_field_id = Some(key.field_id);
let value = KvReader::new(value); let value = KvReader::new(value);
let docids_to_delete = value let docids_to_delete = value
.get(DelAdd::Deletion) .get(DelAdd::Deletion)
.map(CboRoaringBitmapCodec::bytes_decode) .map(CboRoaringBitmapCodec::bytes_decode)
.map(|o| o.map_err(heed::Error::Encoding)); .map(|o| o.map_err(heed::Error::Encoding))
.transpose()?;
let docids_to_add = value let docids_to_add = value
.get(DelAdd::Addition) .get(DelAdd::Addition)
.map(CboRoaringBitmapCodec::bytes_decode) .map(CboRoaringBitmapCodec::bytes_decode)
.map(|o| o.map_err(heed::Error::Encoding)); .map(|o| o.map_err(heed::Error::Encoding))
.transpose()?;
if let Some(docids_to_delete) = docids_to_delete { let level_size_changed = self.inner.modify(
let docids_to_delete = docids_to_delete?; wtxn,
self.inner.delete(wtxn, key.field_id, key.left_bound, &docids_to_delete)?; key.field_id,
key.left_bound,
docids_to_add.as_ref(),
docids_to_delete.as_ref(),
)?;
if level_size_changed {
// if a node has been added or removed from the highest level,
// we may have to update the facet level.
facet_level_may_be_updated = true;
} }
}
if let Some(docids_to_add) = docids_to_add { if let Some(field_id) = current_field_id {
let docids_to_add = docids_to_add?; if facet_level_may_be_updated {
self.inner.insert(wtxn, key.field_id, key.left_bound, &docids_to_add)?; self.inner.add_or_delete_level(wtxn, field_id)?;
} }
} }
@ -166,138 +213,78 @@ impl FacetsUpdateIncrementalInner {
/// ///
/// ## Return /// ## Return
/// See documentation of `insert_in_level` /// See documentation of `insert_in_level`
fn insert_in_level_0( fn modify_in_level_0(
&self, &self,
txn: &mut RwTxn, txn: &mut RwTxn,
field_id: u16, field_id: u16,
facet_value: &[u8], facet_value: &[u8],
docids: &RoaringBitmap, add_docids: Option<&RoaringBitmap>,
) -> Result<InsertionResult> { del_docids: Option<&RoaringBitmap>,
) -> Result<ModificationResult> {
let key = FacetGroupKey { field_id, level: 0, left_bound: facet_value }; let key = FacetGroupKey { field_id, level: 0, left_bound: facet_value };
let value = FacetGroupValue { bitmap: docids.clone(), size: 1 };
let mut level0_prefix = vec![]; let old_value = self.db.get(txn, &key)?;
level0_prefix.extend_from_slice(&field_id.to_be_bytes()); match (old_value, add_docids, del_docids) {
level0_prefix.push(0); // Addition + deletion on an existing value
(Some(FacetGroupValue { bitmap, .. }), Some(add_docids), Some(del_docids)) => {
let mut iter = let value = FacetGroupValue { bitmap: (bitmap - del_docids) | add_docids, size: 1 };
self.db.remap_types::<Bytes, DecodeIgnore>().prefix_iter(txn, &level0_prefix)?; self.db.put(txn, &key, &value)?;
Ok(ModificationResult::InPlace)
if iter.next().is_none() { }
drop(iter); // Addition on an existing value
self.db.put(txn, &key, &value)?; (Some(FacetGroupValue { bitmap, .. }), Some(add_docids), None) => {
Ok(InsertionResult::Insert) let value = FacetGroupValue { bitmap: bitmap | add_docids, size: 1 };
} else { self.db.put(txn, &key, &value)?;
drop(iter); Ok(ModificationResult::InPlace)
let old_value = self.db.get(txn, &key)?; }
match old_value { // Addition of a new value (ignore deletion)
Some(mut updated_value) => { (None, Some(add_docids), _) => {
// now merge the two let value = FacetGroupValue { bitmap: add_docids.clone(), size: 1 };
updated_value.bitmap |= value.bitmap; self.db.put(txn, &key, &value)?;
self.db.put(txn, &key, &updated_value)?; Ok(ModificationResult::Insert)
Ok(InsertionResult::InPlace) }
} // Deletion on an existing value, fully delete the key if the resulted value is empty.
None => { (Some(FacetGroupValue { mut bitmap, .. }), None, Some(del_docids)) => {
bitmap -= del_docids;
if bitmap.is_empty() {
// Full deletion
let mut next_key = None;
if let Some((next, _)) =
self.db.remap_data_type::<DecodeIgnore>().get_greater_than(txn, &key)?
{
if next.field_id == field_id && next.level == 0 {
next_key = Some(next.left_bound.to_vec());
}
}
self.db.delete(txn, &key)?;
Ok(ModificationResult::Remove { next: next_key })
} else {
// Partial deletion
let value = FacetGroupValue { bitmap, size: 1 };
self.db.put(txn, &key, &value)?; self.db.put(txn, &key, &value)?;
Ok(InsertionResult::Insert) Ok(ModificationResult::InPlace)
} }
} }
// Otherwise do nothing (None + no addition + deletion == Some + no addition + no deletion == Nothing),
// may be unreachable at some point.
(None, None, _) | (Some(_), None, None) => Ok(ModificationResult::Nothing),
} }
} }
/// Insert the given facet value and corresponding document ids in all the levels of the database up to the given `level`. /// Split a level node into two balanced nodes.
/// This function works recursively.
/// ///
/// ## Return /// # Return
/// Returns the effect of adding the facet value to the database on the given `level`. /// Returns `ModificationResult::Insert` if the split is successful.
/// fn split_group(
/// - `InsertionResult::InPlace` means that inserting the `facet_value` into the `level` did not have
/// an effect on the number of keys in that level. Therefore, it did not increase the number of children
/// of the parent node.
///
/// - `InsertionResult::Insert` means that inserting the `facet_value` into the `level` resulted
/// in the addition of a new key in that level, and that therefore the number of children
/// of the parent node should be incremented.
fn insert_in_level(
&self, &self,
txn: &mut RwTxn, txn: &mut RwTxn,
field_id: u16, field_id: u16,
level: u8, level: u8,
facet_value: &[u8], insertion_key: FacetGroupKey<Vec<u8>>,
docids: &RoaringBitmap, insertion_value: FacetGroupValue,
) -> Result<InsertionResult> { ) -> Result<ModificationResult> {
if level == 0 { let size_left = insertion_value.size / 2;
return self.insert_in_level_0(txn, field_id, facet_value, docids); let size_right = insertion_value.size - size_left;
}
let max_group_size = self.max_group_size;
let result = self.insert_in_level(txn, field_id, level - 1, facet_value, docids)?;
// level below inserted an element
let (insertion_key, insertion_value) =
self.find_insertion_key_value(field_id, level, facet_value, txn)?;
match result {
// because we know that we inserted in place, the facet_value is not a new one
// thus it doesn't extend a group, and thus the insertion key computed above is
// still correct
InsertionResult::InPlace => {
let mut updated_value = insertion_value;
updated_value.bitmap |= docids;
self.db.put(txn, &insertion_key.as_ref(), &updated_value)?;
return Ok(InsertionResult::InPlace);
}
InsertionResult::Expand => {}
InsertionResult::Insert => {}
}
// Here we know that inserting the facet value in the level below resulted in the creation
// of a new key. Therefore, it may be the case that we need to modify the left bound of the
// insertion key (see documentation of `find_insertion_key_value` for an example of when that
// could happen).
let (insertion_key, insertion_key_was_modified) = {
let mut new_insertion_key = insertion_key.clone();
let mut key_should_be_modified = false;
if facet_value < insertion_key.left_bound.as_slice() {
new_insertion_key.left_bound = facet_value.to_vec();
key_should_be_modified = true;
}
if key_should_be_modified {
let is_deleted = self.db.delete(txn, &insertion_key.as_ref())?;
assert!(is_deleted);
self.db.put(txn, &new_insertion_key.as_ref(), &insertion_value)?;
}
(new_insertion_key, key_should_be_modified)
};
// Now we know that the insertion key contains the `facet_value`.
// We still need to update the insertion value by:
// 1. Incrementing the number of children (since the recursive call returned `InsertionResult::Insert`)
// 2. Merge the previous docids with the new one
let mut updated_value = insertion_value;
if matches!(result, InsertionResult::Insert) {
updated_value.size += 1;
}
if updated_value.size < max_group_size {
updated_value.bitmap |= docids;
self.db.put(txn, &insertion_key.as_ref(), &updated_value)?;
if insertion_key_was_modified {
return Ok(InsertionResult::Expand);
} else {
return Ok(InsertionResult::InPlace);
}
}
// We've increased the group size of the value and realised it has become greater than or equal to `max_group_size`
// Therefore it must be split into two nodes.
let size_left = updated_value.size / 2;
let size_right = updated_value.size - size_left;
let level_below = level - 1; let level_below = level - 1;
@ -351,34 +338,228 @@ impl FacetsUpdateIncrementalInner {
self.db.put(txn, &group_left.0.as_ref(), &group_left.1)?; self.db.put(txn, &group_left.0.as_ref(), &group_left.1)?;
self.db.put(txn, &group_right.0.as_ref(), &group_right.1)?; self.db.put(txn, &group_right.0.as_ref(), &group_right.1)?;
Ok(InsertionResult::Insert) Ok(ModificationResult::Insert)
} }
/// Insert the given facet value and corresponding document ids in the database. /// Remove the docids still present in the related sub-level nodes from the del_docids.
pub fn insert( ///
/// This process is needed to avoid removing docids from a group node where the docid is present in several sub-nodes.
fn trim_del_docids<'a>(
&self,
txn: &mut RwTxn,
field_id: u16,
level: u8,
insertion_key: &FacetGroupKey<Vec<u8>>,
insertion_value_size: usize,
del_docids: &'a RoaringBitmap,
) -> Result<std::borrow::Cow<'a, RoaringBitmap>> {
let level_below = level - 1;
let start_key = FacetGroupKey {
field_id,
level: level_below,
left_bound: insertion_key.left_bound.as_slice(),
};
let mut del_docids = std::borrow::Cow::Borrowed(del_docids);
let iter = self.db.range(txn, &(start_key..))?.take(insertion_value_size);
for next in iter {
let (_, value) = next?;
// if a sublevel bitmap as common docids with del_docids,
// then these docids shouldn't be removed and so, remove them from the deletion list.
if !value.bitmap.is_disjoint(&del_docids) {
*del_docids.to_mut() -= value.bitmap;
}
}
Ok(del_docids)
}
/// Modify the given facet value and corresponding document ids in all the levels of the database up to the given `level`.
/// This function works recursively.
///
/// ## Return
/// Returns the effect of modifying the facet value to the database on the given `level`.
///
fn modify_in_level(
&self,
txn: &mut RwTxn,
field_id: u16,
level: u8,
facet_value: &[u8],
add_docids: Option<&RoaringBitmap>,
del_docids: Option<&RoaringBitmap>,
) -> Result<ModificationResult> {
if level == 0 {
return self.modify_in_level_0(txn, field_id, facet_value, add_docids, del_docids);
}
let result =
self.modify_in_level(txn, field_id, level - 1, facet_value, add_docids, del_docids)?;
// level below inserted an element
if let ModificationResult::Nothing = result {
// if the previous level has not been modified,
// early return ModificationResult::Nothing.
return Ok(ModificationResult::Nothing);
}
let (insertion_key, insertion_value) =
self.find_insertion_key_value(field_id, level, facet_value, txn)?;
let insertion_value_size = insertion_value.size as usize;
let mut insertion_value_was_modified = false;
let mut updated_value = insertion_value;
if let ModificationResult::Insert = result {
// if a key has been inserted in the sub-level raise the value size.
updated_value.size += 1;
insertion_value_was_modified = true;
} else if let ModificationResult::Remove { .. } = result {
if updated_value.size <= 1 {
// if the only remaining node is the one to delete,
// delete the key instead and early return.
let is_deleted = self.db.delete(txn, &insertion_key.as_ref())?;
assert!(is_deleted);
return Ok(result);
} else {
// Reduce the value size
updated_value.size -= 1;
insertion_value_was_modified = true;
}
}
let (insertion_key, insertion_key_modification) =
if let ModificationResult::InPlace = result {
(insertion_key, ModificationResult::InPlace)
} else {
// Inserting or deleting the facet value in the level below resulted in the creation
// of a new key. Therefore, it may be the case that we need to modify the left bound of the
// insertion key (see documentation of `find_insertion_key_value` for an example of when that
// could happen).
let mut new_insertion_key = insertion_key.clone();
let mut key_modification = ModificationResult::InPlace;
if let ModificationResult::Remove { next } | ModificationResult::Reduce { next } =
result
{
// if the deleted facet_value is the left_bound of the current node,
// the left_bound should be updated reducing the current node.
let reduced_range = facet_value == insertion_key.left_bound;
if reduced_range {
new_insertion_key.left_bound = next.clone().unwrap();
key_modification = ModificationResult::Reduce { next };
}
} else if facet_value < insertion_key.left_bound.as_slice() {
// if the added facet_value is the under the left_bound of the current node,
// the left_bound should be updated expanding the current node.
new_insertion_key.left_bound = facet_value.to_vec();
key_modification = ModificationResult::Expand;
}
if matches!(
key_modification,
ModificationResult::Expand | ModificationResult::Reduce { .. }
) {
// if the node should be updated, delete it, it will be recreated using a new key later.
let is_deleted = self.db.delete(txn, &insertion_key.as_ref())?;
assert!(is_deleted);
}
(new_insertion_key, key_modification)
};
if updated_value.size < self.max_group_size {
// If there are docids to delete, trim them avoiding unexpected removal.
if let Some(del_docids) = del_docids
.map(|ids| {
self.trim_del_docids(
txn,
field_id,
level,
&insertion_key,
insertion_value_size,
ids,
)
})
.transpose()?
.filter(|ids| !ids.is_empty())
{
updated_value.bitmap -= &*del_docids;
insertion_value_was_modified = true;
}
if let Some(add_docids) = add_docids {
updated_value.bitmap |= add_docids;
insertion_value_was_modified = true;
}
if insertion_value_was_modified
|| matches!(
insertion_key_modification,
ModificationResult::Expand | ModificationResult::Reduce { .. }
)
{
// if any modification occured, insert it in the database.
self.db.put(txn, &insertion_key.as_ref(), &updated_value)?;
Ok(insertion_key_modification)
} else {
// this case is reachable when a docid is removed from a sub-level node but is still present in another one.
// For instance, a document containing 2 and 3, if 2 is removed, the docid should remain in the group node [1..4].
Ok(ModificationResult::Nothing)
}
} else {
// We've increased the group size of the value and realised it has become greater than or equal to `max_group_size`
// Therefore it must be split into two nodes.
self.split_group(txn, field_id, level, insertion_key, updated_value)
}
}
/// Modify the given facet value and corresponding document ids in the database.
/// If no more document ids correspond to the facet value, delete it completely.
///
/// ## Return
/// Returns `true` if some tree-nodes of the highest level have been removed or added implying a potential
/// addition or deletion of a facet level.
/// Otherwise returns `false` if the tree-nodes have been modified in place.
pub fn modify(
&self, &self,
txn: &mut RwTxn, txn: &mut RwTxn,
field_id: u16, field_id: u16,
facet_value: &[u8], facet_value: &[u8],
docids: &RoaringBitmap, add_docids: Option<&RoaringBitmap>,
) -> Result<()> { del_docids: Option<&RoaringBitmap>,
if docids.is_empty() { ) -> Result<bool> {
return Ok(()); if add_docids.map_or(true, RoaringBitmap::is_empty)
&& del_docids.map_or(true, RoaringBitmap::is_empty)
{
return Ok(false);
} }
let group_size = self.group_size;
let highest_level = get_highest_level(txn, self.db, field_id)?; let highest_level = get_highest_level(txn, self.db, field_id)?;
let result = self.insert_in_level(txn, field_id, highest_level, facet_value, docids)?; let result = self.modify_in_level(
txn,
field_id,
highest_level,
facet_value,
add_docids,
del_docids,
)?;
match result { match result {
InsertionResult::InPlace => return Ok(()), ModificationResult::InPlace
InsertionResult::Expand => return Ok(()), | ModificationResult::Expand
InsertionResult::Insert => {} | ModificationResult::Nothing
| ModificationResult::Reduce { .. } => Ok(false),
ModificationResult::Insert | ModificationResult::Remove { .. } => Ok(true),
} }
}
// Here we check whether the highest level has exceeded `min_level_size` * `self.group_size`. /// Check whether the highest level has exceeded `min_level_size` * `self.group_size`.
// If it has, we must build an addition level above it. /// If it has, we must build an addition level above it.
/// Then check whether the highest level is under `min_level_size`.
/// If it has, we must remove the complete level.
pub(crate) fn add_or_delete_level(&self, txn: &mut RwTxn, field_id: u16) -> Result<()> {
let highest_level = get_highest_level(txn, self.db, field_id)?;
let mut highest_level_prefix = vec![]; let mut highest_level_prefix = vec![];
highest_level_prefix.extend_from_slice(&field_id.to_be_bytes()); highest_level_prefix.extend_from_slice(&field_id.to_be_bytes());
highest_level_prefix.push(highest_level); highest_level_prefix.push(highest_level);
@ -386,14 +567,48 @@ impl FacetsUpdateIncrementalInner {
let size_highest_level = let size_highest_level =
self.db.remap_types::<Bytes, Bytes>().prefix_iter(txn, &highest_level_prefix)?.count(); self.db.remap_types::<Bytes, Bytes>().prefix_iter(txn, &highest_level_prefix)?.count();
if size_highest_level < self.group_size as usize * self.min_level_size as usize { if size_highest_level >= self.group_size as usize * self.min_level_size as usize {
return Ok(()); self.add_level(txn, field_id, highest_level, &highest_level_prefix, size_highest_level)
} else if size_highest_level < self.min_level_size as usize && highest_level != 0 {
self.delete_level(txn, &highest_level_prefix)
} else {
Ok(())
} }
}
/// Delete a level.
fn delete_level(&self, txn: &mut RwTxn, highest_level_prefix: &[u8]) -> Result<()> {
let mut to_delete = vec![];
let mut iter =
self.db.remap_types::<Bytes, Bytes>().prefix_iter(txn, highest_level_prefix)?;
for el in iter.by_ref() {
let (k, _) = el?;
to_delete.push(
FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(k)
.map_err(Error::Encoding)?
.into_owned(),
);
}
drop(iter);
for k in to_delete {
self.db.delete(txn, &k.as_ref())?;
}
Ok(())
}
/// Build an additional level for the field id.
fn add_level(
&self,
txn: &mut RwTxn,
field_id: u16,
highest_level: u8,
highest_level_prefix: &[u8],
size_highest_level: usize,
) -> Result<()> {
let mut groups_iter = self let mut groups_iter = self
.db .db
.remap_types::<Bytes, FacetGroupValueCodec>() .remap_types::<Bytes, FacetGroupValueCodec>()
.prefix_iter(txn, &highest_level_prefix)?; .prefix_iter(txn, highest_level_prefix)?;
let nbr_new_groups = size_highest_level / self.group_size as usize; let nbr_new_groups = size_highest_level / self.group_size as usize;
let nbr_leftover_elements = size_highest_level % self.group_size as usize; let nbr_leftover_elements = size_highest_level % self.group_size as usize;
@ -402,7 +617,7 @@ impl FacetsUpdateIncrementalInner {
for _ in 0..nbr_new_groups { for _ in 0..nbr_new_groups {
let mut first_key = None; let mut first_key = None;
let mut values = RoaringBitmap::new(); let mut values = RoaringBitmap::new();
for _ in 0..group_size { for _ in 0..self.group_size {
let (key_bytes, value_i) = groups_iter.next().unwrap()?; let (key_bytes, value_i) = groups_iter.next().unwrap()?;
let key_i = FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(key_bytes) let key_i = FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(key_bytes)
.map_err(Error::Encoding)?; .map_err(Error::Encoding)?;
@ -417,7 +632,7 @@ impl FacetsUpdateIncrementalInner {
level: highest_level + 1, level: highest_level + 1,
left_bound: first_key.unwrap().left_bound, left_bound: first_key.unwrap().left_bound,
}; };
let value = FacetGroupValue { size: group_size, bitmap: values }; let value = FacetGroupValue { size: self.group_size, bitmap: values };
to_add.push((key.into_owned(), value)); to_add.push((key.into_owned(), value));
} }
// now we add the rest of the level, in case its size is > group_size * min_level_size // now we add the rest of the level, in case its size is > group_size * min_level_size
@ -452,173 +667,6 @@ impl FacetsUpdateIncrementalInner {
} }
Ok(()) Ok(())
} }
/// Delete the given document id from the given facet value in the database, from level 0 to the
/// the given level.
///
/// ## Return
/// Returns the effect of removing the document id from the database on the given `level`.
///
/// - `DeletionResult::InPlace` means that deleting the document id did not have
/// an effect on the keys in that level.
///
/// - `DeletionResult::Reduce` means that deleting the document id resulted in a change in the
/// number of keys in the level. For example, removing a document id from the facet value `3` could
/// cause it to have no corresponding document in level 0 anymore, and therefore the key was deleted
/// entirely. In that case, `DeletionResult::Remove` is returned. The parent of the deleted key must
/// then adjust its group size. If its group size falls to 0, then it will need to be deleted as well.
///
/// - `DeletionResult::Reduce` means that deleting the document id resulted in a change in the
/// bounds of the keys of the level. For example, removing a document id from the facet value
/// `3` might have caused the facet value `3` to have no corresponding document in level 0. Therefore,
/// in level 1, the key with the left bound `3` had to be changed to the next facet value (e.g. 4).
/// In that case `DeletionResult::Reduce` is returned. The parent of the reduced key may need to adjust
/// its left bound as well.
fn delete_in_level(
&self,
txn: &mut RwTxn,
field_id: u16,
level: u8,
facet_value: &[u8],
docids: &RoaringBitmap,
) -> Result<DeletionResult> {
if level == 0 {
return self.delete_in_level_0(txn, field_id, facet_value, docids);
}
let (deletion_key, mut bitmap) =
self.find_insertion_key_value(field_id, level, facet_value, txn)?;
let result = self.delete_in_level(txn, field_id, level - 1, facet_value, docids)?;
let mut decrease_size = false;
let next_key = match result {
DeletionResult::InPlace => {
bitmap.bitmap -= docids;
self.db.put(txn, &deletion_key.as_ref(), &bitmap)?;
return Ok(DeletionResult::InPlace);
}
DeletionResult::Reduce { next } => next,
DeletionResult::Remove { next } => {
decrease_size = true;
next
}
};
// If either DeletionResult::Reduce or DeletionResult::Remove was returned,
// then we may need to adjust the left_bound of the deletion key.
// If DeletionResult::Remove was returned, then we need to decrease the group
// size of the deletion key.
let mut updated_value = bitmap;
if decrease_size {
updated_value.size -= 1;
}
if updated_value.size == 0 {
self.db.delete(txn, &deletion_key.as_ref())?;
Ok(DeletionResult::Remove { next: next_key })
} else {
let mut updated_deletion_key = deletion_key.clone();
let reduced_range = facet_value == deletion_key.left_bound;
if reduced_range {
updated_deletion_key.left_bound = next_key.clone().unwrap();
}
updated_value.bitmap -= docids;
let _ = self.db.delete(txn, &deletion_key.as_ref())?;
self.db.put(txn, &updated_deletion_key.as_ref(), &updated_value)?;
if reduced_range {
Ok(DeletionResult::Reduce { next: next_key })
} else {
Ok(DeletionResult::InPlace)
}
}
}
fn delete_in_level_0(
&self,
txn: &mut RwTxn,
field_id: u16,
facet_value: &[u8],
docids: &RoaringBitmap,
) -> Result<DeletionResult> {
let key = FacetGroupKey { field_id, level: 0, left_bound: facet_value };
let mut bitmap = self.db.get(txn, &key)?.unwrap().bitmap;
bitmap -= docids;
if bitmap.is_empty() {
let mut next_key = None;
if let Some((next, _)) =
self.db.remap_data_type::<DecodeIgnore>().get_greater_than(txn, &key)?
{
if next.field_id == field_id && next.level == 0 {
next_key = Some(next.left_bound.to_vec());
}
}
self.db.delete(txn, &key)?;
Ok(DeletionResult::Remove { next: next_key })
} else {
self.db.put(txn, &key, &FacetGroupValue { size: 1, bitmap })?;
Ok(DeletionResult::InPlace)
}
}
pub fn delete(
&self,
txn: &mut RwTxn,
field_id: u16,
facet_value: &[u8],
docids: &RoaringBitmap,
) -> Result<()> {
if self
.db
.remap_data_type::<DecodeIgnore>()
.get(txn, &FacetGroupKey { field_id, level: 0, left_bound: facet_value })?
.is_none()
{
return Ok(());
}
let highest_level = get_highest_level(txn, self.db, field_id)?;
let result = self.delete_in_level(txn, field_id, highest_level, facet_value, docids)?;
match result {
DeletionResult::InPlace => return Ok(()),
DeletionResult::Reduce { .. } => return Ok(()),
DeletionResult::Remove { .. } => {}
}
// if we either removed a key from the highest level, its size may have fallen
// below `min_level_size`, in which case we need to remove the entire level
let mut highest_level_prefix = vec![];
highest_level_prefix.extend_from_slice(&field_id.to_be_bytes());
highest_level_prefix.push(highest_level);
if highest_level == 0
|| self
.db
.remap_types::<Bytes, Bytes>()
.prefix_iter(txn, &highest_level_prefix)?
.count()
>= self.min_level_size as usize
{
return Ok(());
}
let mut to_delete = vec![];
let mut iter =
self.db.remap_types::<Bytes, Bytes>().prefix_iter(txn, &highest_level_prefix)?;
for el in iter.by_ref() {
let (k, _) = el?;
to_delete.push(
FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(k)
.map_err(Error::Encoding)?
.into_owned(),
);
}
drop(iter);
for k in to_delete {
self.db.delete(txn, &k.as_ref())?;
}
Ok(())
}
} }
impl<'a> FacetGroupKey<&'a [u8]> { impl<'a> FacetGroupKey<&'a [u8]> {

View File

@ -429,7 +429,8 @@ pub(crate) mod test_helpers {
max_group_size: self.max_group_size.get(), max_group_size: self.max_group_size.get(),
}; };
let key_bytes = BoundCodec::bytes_encode(key).unwrap(); let key_bytes = BoundCodec::bytes_encode(key).unwrap();
update.insert(wtxn, field_id, &key_bytes, docids).unwrap(); update.modify(wtxn, field_id, &key_bytes, Some(docids), None).unwrap();
update.add_or_delete_level(wtxn, field_id).unwrap();
} }
pub fn delete_single_docid<'a>( pub fn delete_single_docid<'a>(
&self, &self,
@ -455,7 +456,8 @@ pub(crate) mod test_helpers {
max_group_size: self.max_group_size.get(), max_group_size: self.max_group_size.get(),
}; };
let key_bytes = BoundCodec::bytes_encode(key).unwrap(); let key_bytes = BoundCodec::bytes_encode(key).unwrap();
update.delete(wtxn, field_id, &key_bytes, docids).unwrap(); update.modify(wtxn, field_id, &key_bytes, None, Some(docids)).unwrap();
update.add_or_delete_level(wtxn, field_id).unwrap();
} }
pub fn bulk_insert<'a, 'b>( pub fn bulk_insert<'a, 'b>(