mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-30 00:34:26 +01:00
Make facet deletion incremental
This commit is contained in:
parent
0ade699873
commit
1165ba2171
@ -1,4 +1,5 @@
|
|||||||
use std::collections::btree_map::Entry;
|
use std::collections::btree_map::Entry;
|
||||||
|
use std::collections::{HashMap, HashSet};
|
||||||
|
|
||||||
use fst::IntoStreamer;
|
use fst::IntoStreamer;
|
||||||
use heed::types::{ByteSlice, DecodeIgnore, Str};
|
use heed::types::{ByteSlice, DecodeIgnore, Str};
|
||||||
@ -8,17 +9,16 @@ use serde::{Deserialize, Serialize};
|
|||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
use time::OffsetDateTime;
|
use time::OffsetDateTime;
|
||||||
|
|
||||||
use super::{ClearDocuments, FacetsUpdateBulk};
|
use super::facet::delete::FacetsDelete;
|
||||||
|
use super::ClearDocuments;
|
||||||
use crate::error::{InternalError, UserError};
|
use crate::error::{InternalError, UserError};
|
||||||
use crate::facet::FacetType;
|
use crate::facet::FacetType;
|
||||||
use crate::heed_codec::facet::{
|
use crate::heed_codec::facet::FieldDocIdFacetCodec;
|
||||||
ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetIgnoreCodec,
|
|
||||||
};
|
|
||||||
use crate::heed_codec::CboRoaringBitmapCodec;
|
use crate::heed_codec::CboRoaringBitmapCodec;
|
||||||
use crate::index::{db_name, main_key};
|
use crate::index::{db_name, main_key};
|
||||||
use crate::{
|
use crate::{
|
||||||
ExternalDocumentsIds, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index, Result,
|
ExternalDocumentsIds, FieldId, FieldIdMapMissingEntry, Index, Result, RoaringBitmapCodec,
|
||||||
RoaringBitmapCodec, SmallString32, BEU32,
|
SmallString32, BEU32,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub struct DeleteDocuments<'t, 'u, 'i> {
|
pub struct DeleteDocuments<'t, 'u, 'i> {
|
||||||
@ -444,13 +444,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for facet_type in [FacetType::Number, FacetType::String] {
|
for facet_type in [FacetType::Number, FacetType::String] {
|
||||||
remove_docids_from_facet_id_docids(
|
let mut affected_facet_values = HashMap::new();
|
||||||
self.wtxn,
|
|
||||||
self.index,
|
|
||||||
&self.to_delete_docids,
|
|
||||||
fields_ids_map.clone(),
|
|
||||||
facet_type,
|
|
||||||
)?;
|
|
||||||
for field_id in self.index.faceted_fields_ids(self.wtxn)? {
|
for field_id in self.index.faceted_fields_ids(self.wtxn)? {
|
||||||
// Remove docids from the number faceted documents ids
|
// Remove docids from the number faceted documents ids
|
||||||
let mut docids =
|
let mut docids =
|
||||||
@ -458,15 +452,25 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
docids -= &self.to_delete_docids;
|
docids -= &self.to_delete_docids;
|
||||||
self.index.put_faceted_documents_ids(self.wtxn, field_id, facet_type, &docids)?;
|
self.index.put_faceted_documents_ids(self.wtxn, field_id, facet_type, &docids)?;
|
||||||
|
|
||||||
remove_docids_from_field_id_docid_facet_value(
|
let facet_values = remove_docids_from_field_id_docid_facet_value(
|
||||||
&self.index,
|
&self.index,
|
||||||
self.wtxn,
|
self.wtxn,
|
||||||
facet_type,
|
facet_type,
|
||||||
field_id,
|
field_id,
|
||||||
&self.to_delete_docids,
|
&self.to_delete_docids,
|
||||||
)?;
|
)?;
|
||||||
|
if !facet_values.is_empty() {
|
||||||
|
affected_facet_values.insert(field_id, facet_values);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
FacetsDelete::new(
|
||||||
|
self.index,
|
||||||
|
facet_type,
|
||||||
|
affected_facet_values,
|
||||||
|
&self.to_delete_docids,
|
||||||
|
)
|
||||||
|
.execute(self.wtxn)?;
|
||||||
|
}
|
||||||
|
|
||||||
// We delete the documents ids that are under the facet field id values.
|
// We delete the documents ids that are under the facet field id values.
|
||||||
remove_docids_from_facet_id_exists_docids(
|
remove_docids_from_facet_id_exists_docids(
|
||||||
@ -546,7 +550,7 @@ fn remove_docids_from_field_id_docid_facet_value<'i, 'a>(
|
|||||||
facet_type: FacetType,
|
facet_type: FacetType,
|
||||||
field_id: FieldId,
|
field_id: FieldId,
|
||||||
to_remove: &RoaringBitmap,
|
to_remove: &RoaringBitmap,
|
||||||
) -> heed::Result<()> {
|
) -> heed::Result<HashSet<Vec<u8>>> {
|
||||||
let db = match facet_type {
|
let db = match facet_type {
|
||||||
FacetType::String => {
|
FacetType::String => {
|
||||||
index.field_id_docid_facet_strings.remap_types::<ByteSlice, DecodeIgnore>()
|
index.field_id_docid_facet_strings.remap_types::<ByteSlice, DecodeIgnore>()
|
||||||
@ -555,19 +559,23 @@ fn remove_docids_from_field_id_docid_facet_value<'i, 'a>(
|
|||||||
index.field_id_docid_facet_f64s.remap_types::<ByteSlice, DecodeIgnore>()
|
index.field_id_docid_facet_f64s.remap_types::<ByteSlice, DecodeIgnore>()
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
let mut all_affected_facet_values = HashSet::default();
|
||||||
let mut iter = db
|
let mut iter = db
|
||||||
.prefix_iter_mut(wtxn, &field_id.to_be_bytes())?
|
.prefix_iter_mut(wtxn, &field_id.to_be_bytes())?
|
||||||
.remap_key_type::<FieldDocIdFacetIgnoreCodec>();
|
.remap_key_type::<FieldDocIdFacetCodec<ByteSlice>>();
|
||||||
|
|
||||||
while let Some(result) = iter.next() {
|
while let Some(result) = iter.next() {
|
||||||
let ((_, docid, _), _) = result?;
|
let ((_, docid, facet_value), _) = result?;
|
||||||
if to_remove.contains(docid) {
|
if to_remove.contains(docid) {
|
||||||
|
if !all_affected_facet_values.contains(facet_value) {
|
||||||
|
all_affected_facet_values.insert(facet_value.to_owned());
|
||||||
|
}
|
||||||
// safety: we don't keep references from inside the LMDB database.
|
// safety: we don't keep references from inside the LMDB database.
|
||||||
unsafe { iter.del_current()? };
|
unsafe { iter.del_current()? };
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(all_affected_facet_values)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn remove_docids_from_facet_id_exists_docids<'a, C>(
|
fn remove_docids_from_facet_id_exists_docids<'a, C>(
|
||||||
@ -595,54 +603,6 @@ where
|
|||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
fn remove_docids_from_facet_id_docids<'a>(
|
|
||||||
wtxn: &'a mut heed::RwTxn,
|
|
||||||
index: &Index,
|
|
||||||
to_remove: &RoaringBitmap,
|
|
||||||
fields_ids_map: FieldsIdsMap,
|
|
||||||
facet_type: FacetType,
|
|
||||||
) -> Result<()> {
|
|
||||||
let db = match facet_type {
|
|
||||||
FacetType::String => {
|
|
||||||
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
|
|
||||||
}
|
|
||||||
FacetType::Number => {
|
|
||||||
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
|
|
||||||
}
|
|
||||||
};
|
|
||||||
let mut modified = false;
|
|
||||||
for field_id in fields_ids_map.ids() {
|
|
||||||
let mut level0_prefix = vec![];
|
|
||||||
level0_prefix.extend_from_slice(&field_id.to_be_bytes());
|
|
||||||
level0_prefix.push(0);
|
|
||||||
let mut iter = db
|
|
||||||
.as_polymorph()
|
|
||||||
.prefix_iter_mut::<_, ByteSlice, FacetGroupValueCodec>(wtxn, &level0_prefix)?;
|
|
||||||
|
|
||||||
while let Some(result) = iter.next() {
|
|
||||||
let (bytes, mut value) = result?;
|
|
||||||
let previous_len = value.bitmap.len();
|
|
||||||
value.bitmap -= to_remove;
|
|
||||||
if value.bitmap.is_empty() {
|
|
||||||
// safety: we don't keep references from inside the LMDB database.
|
|
||||||
unsafe { iter.del_current()? };
|
|
||||||
modified = true;
|
|
||||||
} else if value.bitmap.len() != previous_len {
|
|
||||||
let bytes = bytes.to_owned();
|
|
||||||
// safety: we don't keep references from inside the LMDB database.
|
|
||||||
unsafe { iter.put_current(&bytes, &value)? };
|
|
||||||
modified = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if !modified {
|
|
||||||
return Ok(());
|
|
||||||
}
|
|
||||||
let builder = FacetsUpdateBulk::new_not_updating_level_0(index, facet_type);
|
|
||||||
builder.execute(wtxn)?;
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
@ -29,6 +29,7 @@ pub struct FacetsUpdateBulk<'i> {
|
|||||||
group_size: u8,
|
group_size: u8,
|
||||||
min_level_size: u8,
|
min_level_size: u8,
|
||||||
facet_type: FacetType,
|
facet_type: FacetType,
|
||||||
|
field_ids: Vec<FieldId>,
|
||||||
// None if level 0 does not need to be updated
|
// None if level 0 does not need to be updated
|
||||||
new_data: Option<grenad::Reader<File>>,
|
new_data: Option<grenad::Reader<File>>,
|
||||||
}
|
}
|
||||||
@ -36,20 +37,30 @@ pub struct FacetsUpdateBulk<'i> {
|
|||||||
impl<'i> FacetsUpdateBulk<'i> {
|
impl<'i> FacetsUpdateBulk<'i> {
|
||||||
pub fn new(
|
pub fn new(
|
||||||
index: &'i Index,
|
index: &'i Index,
|
||||||
|
field_ids: Vec<FieldId>,
|
||||||
facet_type: FacetType,
|
facet_type: FacetType,
|
||||||
new_data: grenad::Reader<File>,
|
new_data: grenad::Reader<File>,
|
||||||
group_size: u8,
|
group_size: u8,
|
||||||
min_level_size: u8,
|
min_level_size: u8,
|
||||||
) -> FacetsUpdateBulk<'i> {
|
) -> FacetsUpdateBulk<'i> {
|
||||||
FacetsUpdateBulk { index, group_size, min_level_size, facet_type, new_data: Some(new_data) }
|
FacetsUpdateBulk {
|
||||||
|
index,
|
||||||
|
field_ids,
|
||||||
|
group_size,
|
||||||
|
min_level_size,
|
||||||
|
facet_type,
|
||||||
|
new_data: Some(new_data),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn new_not_updating_level_0(
|
pub fn new_not_updating_level_0(
|
||||||
index: &'i Index,
|
index: &'i Index,
|
||||||
|
field_ids: Vec<FieldId>,
|
||||||
facet_type: FacetType,
|
facet_type: FacetType,
|
||||||
) -> FacetsUpdateBulk<'i> {
|
) -> FacetsUpdateBulk<'i> {
|
||||||
FacetsUpdateBulk {
|
FacetsUpdateBulk {
|
||||||
index,
|
index,
|
||||||
|
field_ids,
|
||||||
group_size: FACET_GROUP_SIZE,
|
group_size: FACET_GROUP_SIZE,
|
||||||
min_level_size: FACET_MIN_LEVEL_SIZE,
|
min_level_size: FACET_MIN_LEVEL_SIZE,
|
||||||
facet_type,
|
facet_type,
|
||||||
@ -61,7 +72,7 @@ impl<'i> FacetsUpdateBulk<'i> {
|
|||||||
pub fn execute(self, wtxn: &mut heed::RwTxn) -> Result<()> {
|
pub fn execute(self, wtxn: &mut heed::RwTxn) -> Result<()> {
|
||||||
debug!("Computing and writing the facet values levels docids into LMDB on disk...");
|
debug!("Computing and writing the facet values levels docids into LMDB on disk...");
|
||||||
|
|
||||||
let Self { index, group_size, min_level_size, facet_type, new_data } = self;
|
let Self { index, field_ids, group_size, min_level_size, facet_type, new_data } = self;
|
||||||
|
|
||||||
let db = match facet_type {
|
let db = match facet_type {
|
||||||
FacetType::String => {
|
FacetType::String => {
|
||||||
@ -76,8 +87,6 @@ impl<'i> FacetsUpdateBulk<'i> {
|
|||||||
|
|
||||||
let inner = FacetsUpdateBulkInner { db, new_data, group_size, min_level_size };
|
let inner = FacetsUpdateBulkInner { db, new_data, group_size, min_level_size };
|
||||||
|
|
||||||
let field_ids = index.faceted_fields_ids(wtxn)?.iter().copied().collect::<Box<[_]>>();
|
|
||||||
|
|
||||||
inner.update(wtxn, &field_ids, |wtxn, field_id, all_docids| {
|
inner.update(wtxn, &field_ids, |wtxn, field_id, all_docids| {
|
||||||
index.put_faceted_documents_ids(wtxn, field_id, facet_type, &all_docids)?;
|
index.put_faceted_documents_ids(wtxn, field_id, facet_type, &all_docids)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
@ -405,7 +414,7 @@ mod tests {
|
|||||||
index.verify_structure_validity(&wtxn, 1);
|
index.verify_structure_validity(&wtxn, 1);
|
||||||
// delete all the elements for the facet id 0
|
// delete all the elements for the facet id 0
|
||||||
for i in 0..100u32 {
|
for i in 0..100u32 {
|
||||||
index.delete(&mut wtxn, 0, &(i as f64), i);
|
index.delete_single_docid(&mut wtxn, 0, &(i as f64), i);
|
||||||
}
|
}
|
||||||
index.verify_structure_validity(&wtxn, 0);
|
index.verify_structure_validity(&wtxn, 0);
|
||||||
index.verify_structure_validity(&wtxn, 1);
|
index.verify_structure_validity(&wtxn, 1);
|
||||||
|
92
milli/src/update/facet/delete.rs
Normal file
92
milli/src/update/facet/delete.rs
Normal file
@ -0,0 +1,92 @@
|
|||||||
|
use super::{FACET_GROUP_SIZE, FACET_MAX_GROUP_SIZE, FACET_MIN_LEVEL_SIZE};
|
||||||
|
use crate::{
|
||||||
|
facet::FacetType,
|
||||||
|
heed_codec::facet::{ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec},
|
||||||
|
update::{FacetsUpdateBulk, FacetsUpdateIncrementalInner},
|
||||||
|
FieldId, Index, Result,
|
||||||
|
};
|
||||||
|
use heed::RwTxn;
|
||||||
|
use roaring::RoaringBitmap;
|
||||||
|
use std::collections::{HashMap, HashSet};
|
||||||
|
|
||||||
|
pub struct FacetsDelete<'i, 'b> {
|
||||||
|
index: &'i Index,
|
||||||
|
database: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||||
|
facet_type: FacetType,
|
||||||
|
affected_facet_values: HashMap<FieldId, HashSet<Vec<u8>>>,
|
||||||
|
docids_to_delete: &'b RoaringBitmap,
|
||||||
|
group_size: u8,
|
||||||
|
max_group_size: u8,
|
||||||
|
min_level_size: u8,
|
||||||
|
}
|
||||||
|
impl<'i, 'b> FacetsDelete<'i, 'b> {
|
||||||
|
pub fn new(
|
||||||
|
index: &'i Index,
|
||||||
|
facet_type: FacetType,
|
||||||
|
affected_facet_values: HashMap<FieldId, HashSet<Vec<u8>>>,
|
||||||
|
docids_to_delete: &'b RoaringBitmap,
|
||||||
|
) -> Self {
|
||||||
|
let database = match facet_type {
|
||||||
|
FacetType::String => {
|
||||||
|
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
|
||||||
|
}
|
||||||
|
FacetType::Number => {
|
||||||
|
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
|
||||||
|
}
|
||||||
|
};
|
||||||
|
Self {
|
||||||
|
index,
|
||||||
|
database,
|
||||||
|
facet_type,
|
||||||
|
affected_facet_values,
|
||||||
|
docids_to_delete,
|
||||||
|
group_size: FACET_GROUP_SIZE,
|
||||||
|
max_group_size: FACET_MAX_GROUP_SIZE,
|
||||||
|
min_level_size: FACET_MIN_LEVEL_SIZE,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn execute(self, wtxn: &mut RwTxn) -> Result<()> {
|
||||||
|
for (field_id, affected_facet_values) in self.affected_facet_values {
|
||||||
|
if affected_facet_values.len() >= (self.database.len(wtxn)? / 50) {
|
||||||
|
// Bulk delete
|
||||||
|
let mut modified = false;
|
||||||
|
|
||||||
|
for facet_value in affected_facet_values {
|
||||||
|
let key =
|
||||||
|
FacetGroupKey { field_id, level: 0, left_bound: facet_value.as_slice() };
|
||||||
|
let mut old = self.database.get(wtxn, &key)?.unwrap();
|
||||||
|
let previous_len = old.bitmap.len();
|
||||||
|
old.bitmap -= self.docids_to_delete;
|
||||||
|
if old.bitmap.is_empty() {
|
||||||
|
modified = true;
|
||||||
|
self.database.delete(wtxn, &key)?;
|
||||||
|
} else if old.bitmap.len() != previous_len {
|
||||||
|
modified = true;
|
||||||
|
self.database.put(wtxn, &key, &old)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if modified {
|
||||||
|
let builder = FacetsUpdateBulk::new_not_updating_level_0(
|
||||||
|
self.index,
|
||||||
|
vec![field_id],
|
||||||
|
self.facet_type,
|
||||||
|
);
|
||||||
|
builder.execute(wtxn)?;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Incremental
|
||||||
|
let inc = FacetsUpdateIncrementalInner {
|
||||||
|
db: self.database,
|
||||||
|
group_size: self.group_size,
|
||||||
|
min_level_size: self.min_level_size,
|
||||||
|
max_group_size: self.max_group_size,
|
||||||
|
};
|
||||||
|
for facet_value in affected_facet_values {
|
||||||
|
inc.delete(wtxn, field_id, facet_value.as_slice(), &self.docids_to_delete)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
@ -485,20 +485,20 @@ impl FacetsUpdateIncrementalInner {
|
|||||||
field_id: u16,
|
field_id: u16,
|
||||||
level: u8,
|
level: u8,
|
||||||
facet_value: &[u8],
|
facet_value: &[u8],
|
||||||
docid: u32,
|
docids: &RoaringBitmap,
|
||||||
) -> Result<DeletionResult> {
|
) -> Result<DeletionResult> {
|
||||||
if level == 0 {
|
if level == 0 {
|
||||||
return self.delete_in_level_0(txn, field_id, facet_value, docid);
|
return self.delete_in_level_0(txn, field_id, facet_value, docids);
|
||||||
}
|
}
|
||||||
let (deletion_key, mut bitmap) =
|
let (deletion_key, mut bitmap) =
|
||||||
self.find_insertion_key_value(field_id, level, facet_value, txn)?;
|
self.find_insertion_key_value(field_id, level, facet_value, txn)?;
|
||||||
|
|
||||||
let result = self.delete_in_level(txn, field_id, level - 1, facet_value.clone(), docid)?;
|
let result = self.delete_in_level(txn, field_id, level - 1, facet_value.clone(), docids)?;
|
||||||
|
|
||||||
let mut decrease_size = false;
|
let mut decrease_size = false;
|
||||||
let next_key = match result {
|
let next_key = match result {
|
||||||
DeletionResult::InPlace => {
|
DeletionResult::InPlace => {
|
||||||
bitmap.bitmap.remove(docid);
|
bitmap.bitmap -= docids;
|
||||||
self.db.put(txn, &deletion_key.as_ref(), &bitmap)?;
|
self.db.put(txn, &deletion_key.as_ref(), &bitmap)?;
|
||||||
return Ok(DeletionResult::InPlace);
|
return Ok(DeletionResult::InPlace);
|
||||||
}
|
}
|
||||||
@ -527,7 +527,7 @@ impl FacetsUpdateIncrementalInner {
|
|||||||
if reduced_range {
|
if reduced_range {
|
||||||
updated_deletion_key.left_bound = next_key.clone().unwrap();
|
updated_deletion_key.left_bound = next_key.clone().unwrap();
|
||||||
}
|
}
|
||||||
updated_value.bitmap.remove(docid);
|
updated_value.bitmap -= docids;
|
||||||
let _ = self.db.delete(txn, &deletion_key.as_ref())?;
|
let _ = self.db.delete(txn, &deletion_key.as_ref())?;
|
||||||
self.db.put(txn, &updated_deletion_key.as_ref(), &updated_value)?;
|
self.db.put(txn, &updated_deletion_key.as_ref(), &updated_value)?;
|
||||||
if reduced_range {
|
if reduced_range {
|
||||||
@ -543,11 +543,11 @@ impl FacetsUpdateIncrementalInner {
|
|||||||
txn: &'t mut RwTxn,
|
txn: &'t mut RwTxn,
|
||||||
field_id: u16,
|
field_id: u16,
|
||||||
facet_value: &[u8],
|
facet_value: &[u8],
|
||||||
docid: u32,
|
docids: &RoaringBitmap,
|
||||||
) -> Result<DeletionResult> {
|
) -> Result<DeletionResult> {
|
||||||
let key = FacetGroupKey { field_id, level: 0, left_bound: facet_value };
|
let key = FacetGroupKey { field_id, level: 0, left_bound: facet_value };
|
||||||
let mut bitmap = self.db.get(&txn, &key)?.unwrap().bitmap;
|
let mut bitmap = self.db.get(&txn, &key)?.unwrap().bitmap;
|
||||||
bitmap.remove(docid);
|
bitmap -= docids;
|
||||||
|
|
||||||
if bitmap.is_empty() {
|
if bitmap.is_empty() {
|
||||||
let mut next_key = None;
|
let mut next_key = None;
|
||||||
@ -571,7 +571,7 @@ impl FacetsUpdateIncrementalInner {
|
|||||||
txn: &'t mut RwTxn,
|
txn: &'t mut RwTxn,
|
||||||
field_id: u16,
|
field_id: u16,
|
||||||
facet_value: &[u8],
|
facet_value: &[u8],
|
||||||
docid: u32,
|
docids: &RoaringBitmap,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
if self
|
if self
|
||||||
.db
|
.db
|
||||||
@ -584,7 +584,7 @@ impl FacetsUpdateIncrementalInner {
|
|||||||
let highest_level = get_highest_level(&txn, self.db, field_id)?;
|
let highest_level = get_highest_level(&txn, self.db, field_id)?;
|
||||||
|
|
||||||
let result =
|
let result =
|
||||||
self.delete_in_level(txn, field_id, highest_level as u8, facet_value, docid)?;
|
self.delete_in_level(txn, field_id, highest_level as u8, facet_value, docids)?;
|
||||||
match result {
|
match result {
|
||||||
DeletionResult::InPlace => return Ok(()),
|
DeletionResult::InPlace => return Ok(()),
|
||||||
DeletionResult::Reduce { .. } => return Ok(()),
|
DeletionResult::Reduce { .. } => return Ok(()),
|
||||||
@ -807,7 +807,7 @@ mod tests {
|
|||||||
|
|
||||||
for i in (200..256).into_iter().rev() {
|
for i in (200..256).into_iter().rev() {
|
||||||
index.verify_structure_validity(&txn, 0);
|
index.verify_structure_validity(&txn, 0);
|
||||||
index.delete(&mut txn, 0, &(i as f64), i as u32);
|
index.delete_single_docid(&mut txn, 0, &(i as f64), i as u32);
|
||||||
}
|
}
|
||||||
index.verify_structure_validity(&txn, 0);
|
index.verify_structure_validity(&txn, 0);
|
||||||
txn.commit().unwrap();
|
txn.commit().unwrap();
|
||||||
@ -816,7 +816,7 @@ mod tests {
|
|||||||
|
|
||||||
for i in (150..200).into_iter().rev() {
|
for i in (150..200).into_iter().rev() {
|
||||||
index.verify_structure_validity(&txn, 0);
|
index.verify_structure_validity(&txn, 0);
|
||||||
index.delete(&mut txn, 0, &(i as f64), i as u32);
|
index.delete_single_docid(&mut txn, 0, &(i as f64), i as u32);
|
||||||
}
|
}
|
||||||
index.verify_structure_validity(&txn, 0);
|
index.verify_structure_validity(&txn, 0);
|
||||||
txn.commit().unwrap();
|
txn.commit().unwrap();
|
||||||
@ -824,7 +824,7 @@ mod tests {
|
|||||||
let mut txn = index.env.write_txn().unwrap();
|
let mut txn = index.env.write_txn().unwrap();
|
||||||
for i in (100..150).into_iter().rev() {
|
for i in (100..150).into_iter().rev() {
|
||||||
index.verify_structure_validity(&txn, 0);
|
index.verify_structure_validity(&txn, 0);
|
||||||
index.delete(&mut txn, 0, &(i as f64), i as u32);
|
index.delete_single_docid(&mut txn, 0, &(i as f64), i as u32);
|
||||||
}
|
}
|
||||||
index.verify_structure_validity(&txn, 0);
|
index.verify_structure_validity(&txn, 0);
|
||||||
txn.commit().unwrap();
|
txn.commit().unwrap();
|
||||||
@ -832,14 +832,14 @@ mod tests {
|
|||||||
let mut txn = index.env.write_txn().unwrap();
|
let mut txn = index.env.write_txn().unwrap();
|
||||||
for i in (17..100).into_iter().rev() {
|
for i in (17..100).into_iter().rev() {
|
||||||
index.verify_structure_validity(&txn, 0);
|
index.verify_structure_validity(&txn, 0);
|
||||||
index.delete(&mut txn, 0, &(i as f64), i as u32);
|
index.delete_single_docid(&mut txn, 0, &(i as f64), i as u32);
|
||||||
}
|
}
|
||||||
index.verify_structure_validity(&txn, 0);
|
index.verify_structure_validity(&txn, 0);
|
||||||
txn.commit().unwrap();
|
txn.commit().unwrap();
|
||||||
milli_snap!(format!("{index}"), 17);
|
milli_snap!(format!("{index}"), 17);
|
||||||
let mut txn = index.env.write_txn().unwrap();
|
let mut txn = index.env.write_txn().unwrap();
|
||||||
for i in (15..17).into_iter().rev() {
|
for i in (15..17).into_iter().rev() {
|
||||||
index.delete(&mut txn, 0, &(i as f64), i as u32);
|
index.delete_single_docid(&mut txn, 0, &(i as f64), i as u32);
|
||||||
}
|
}
|
||||||
index.verify_structure_validity(&txn, 0);
|
index.verify_structure_validity(&txn, 0);
|
||||||
txn.commit().unwrap();
|
txn.commit().unwrap();
|
||||||
@ -847,7 +847,7 @@ mod tests {
|
|||||||
let mut txn = index.env.write_txn().unwrap();
|
let mut txn = index.env.write_txn().unwrap();
|
||||||
for i in (0..15).into_iter().rev() {
|
for i in (0..15).into_iter().rev() {
|
||||||
index.verify_structure_validity(&txn, 0);
|
index.verify_structure_validity(&txn, 0);
|
||||||
index.delete(&mut txn, 0, &(i as f64), i as u32);
|
index.delete_single_docid(&mut txn, 0, &(i as f64), i as u32);
|
||||||
}
|
}
|
||||||
index.verify_structure_validity(&txn, 0);
|
index.verify_structure_validity(&txn, 0);
|
||||||
txn.commit().unwrap();
|
txn.commit().unwrap();
|
||||||
@ -867,7 +867,7 @@ mod tests {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for i in 0..128 {
|
for i in 0..128 {
|
||||||
index.delete(&mut txn, 0, &(i as f64), i as u32);
|
index.delete_single_docid(&mut txn, 0, &(i as f64), i as u32);
|
||||||
}
|
}
|
||||||
index.verify_structure_validity(&txn, 0);
|
index.verify_structure_validity(&txn, 0);
|
||||||
txn.commit().unwrap();
|
txn.commit().unwrap();
|
||||||
@ -875,7 +875,7 @@ mod tests {
|
|||||||
let mut txn = index.env.write_txn().unwrap();
|
let mut txn = index.env.write_txn().unwrap();
|
||||||
for i in 128..216 {
|
for i in 128..216 {
|
||||||
index.verify_structure_validity(&txn, 0);
|
index.verify_structure_validity(&txn, 0);
|
||||||
index.delete(&mut txn, 0, &(i as f64), i as u32);
|
index.delete_single_docid(&mut txn, 0, &(i as f64), i as u32);
|
||||||
}
|
}
|
||||||
index.verify_structure_validity(&txn, 0);
|
index.verify_structure_validity(&txn, 0);
|
||||||
txn.commit().unwrap();
|
txn.commit().unwrap();
|
||||||
@ -883,7 +883,7 @@ mod tests {
|
|||||||
let mut txn = index.env.write_txn().unwrap();
|
let mut txn = index.env.write_txn().unwrap();
|
||||||
for i in 216..256 {
|
for i in 216..256 {
|
||||||
index.verify_structure_validity(&txn, 0);
|
index.verify_structure_validity(&txn, 0);
|
||||||
index.delete(&mut txn, 0, &(i as f64), i as u32);
|
index.delete_single_docid(&mut txn, 0, &(i as f64), i as u32);
|
||||||
}
|
}
|
||||||
index.verify_structure_validity(&txn, 0);
|
index.verify_structure_validity(&txn, 0);
|
||||||
txn.commit().unwrap();
|
txn.commit().unwrap();
|
||||||
@ -908,7 +908,7 @@ mod tests {
|
|||||||
for i in 0..128 {
|
for i in 0..128 {
|
||||||
let key = keys[i];
|
let key = keys[i];
|
||||||
index.verify_structure_validity(&txn, 0);
|
index.verify_structure_validity(&txn, 0);
|
||||||
index.delete(&mut txn, 0, &(key as f64), key as u32);
|
index.delete_single_docid(&mut txn, 0, &(key as f64), key as u32);
|
||||||
}
|
}
|
||||||
index.verify_structure_validity(&txn, 0);
|
index.verify_structure_validity(&txn, 0);
|
||||||
txn.commit().unwrap();
|
txn.commit().unwrap();
|
||||||
@ -917,7 +917,7 @@ mod tests {
|
|||||||
for i in 128..216 {
|
for i in 128..216 {
|
||||||
let key = keys[i];
|
let key = keys[i];
|
||||||
index.verify_structure_validity(&txn, 0);
|
index.verify_structure_validity(&txn, 0);
|
||||||
index.delete(&mut txn, 0, &(key as f64), key as u32);
|
index.delete_single_docid(&mut txn, 0, &(key as f64), key as u32);
|
||||||
}
|
}
|
||||||
index.verify_structure_validity(&txn, 0);
|
index.verify_structure_validity(&txn, 0);
|
||||||
txn.commit().unwrap();
|
txn.commit().unwrap();
|
||||||
@ -926,7 +926,7 @@ mod tests {
|
|||||||
for i in 216..256 {
|
for i in 216..256 {
|
||||||
let key = keys[i];
|
let key = keys[i];
|
||||||
index.verify_structure_validity(&txn, 0);
|
index.verify_structure_validity(&txn, 0);
|
||||||
index.delete(&mut txn, 0, &(key as f64), key as u32);
|
index.delete_single_docid(&mut txn, 0, &(key as f64), key as u32);
|
||||||
}
|
}
|
||||||
index.verify_structure_validity(&txn, 0);
|
index.verify_structure_validity(&txn, 0);
|
||||||
txn.commit().unwrap();
|
txn.commit().unwrap();
|
||||||
@ -979,7 +979,7 @@ mod tests {
|
|||||||
|
|
||||||
for &key in keys.iter() {
|
for &key in keys.iter() {
|
||||||
index.verify_structure_validity(&txn, 0);
|
index.verify_structure_validity(&txn, 0);
|
||||||
index.delete(&mut txn, 0, &(key as f64), key + 100);
|
index.delete_single_docid(&mut txn, 0, &(key as f64), key + 100);
|
||||||
}
|
}
|
||||||
index.verify_structure_validity(&txn, 0);
|
index.verify_structure_validity(&txn, 0);
|
||||||
txn.commit().unwrap();
|
txn.commit().unwrap();
|
||||||
@ -1010,7 +1010,7 @@ mod tests {
|
|||||||
|
|
||||||
for &key in keys.iter() {
|
for &key in keys.iter() {
|
||||||
index.verify_structure_validity(&txn, 0);
|
index.verify_structure_validity(&txn, 0);
|
||||||
index.delete(&mut txn, 0, &format!("{key:x}").as_str(), key + 100);
|
index.delete_single_docid(&mut txn, 0, &format!("{key:x}").as_str(), key + 100);
|
||||||
}
|
}
|
||||||
index.verify_structure_validity(&txn, 0);
|
index.verify_structure_validity(&txn, 0);
|
||||||
txn.commit().unwrap();
|
txn.commit().unwrap();
|
||||||
@ -1131,7 +1131,7 @@ mod fuzz {
|
|||||||
OperationKind::Delete(value) => {
|
OperationKind::Delete(value) => {
|
||||||
if let Some(keys) = value_to_keys.get(value) {
|
if let Some(keys) = value_to_keys.get(value) {
|
||||||
for key in keys {
|
for key in keys {
|
||||||
index.delete(&mut txn, *field_id, key, *value as u32);
|
index.delete_single_docid(&mut txn, *field_id, key, *value as u32);
|
||||||
trivial_db.delete(*field_id, *key, *value as u32);
|
trivial_db.delete(*field_id, *key, *value as u32);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -74,15 +74,15 @@ pub const FACET_MAX_GROUP_SIZE: u8 = 8;
|
|||||||
pub const FACET_GROUP_SIZE: u8 = 4;
|
pub const FACET_GROUP_SIZE: u8 = 4;
|
||||||
pub const FACET_MIN_LEVEL_SIZE: u8 = 5;
|
pub const FACET_MIN_LEVEL_SIZE: u8 = 5;
|
||||||
|
|
||||||
use std::fs::File;
|
|
||||||
|
|
||||||
use self::incremental::FacetsUpdateIncremental;
|
use self::incremental::FacetsUpdateIncremental;
|
||||||
use super::FacetsUpdateBulk;
|
use super::FacetsUpdateBulk;
|
||||||
use crate::facet::FacetType;
|
use crate::facet::FacetType;
|
||||||
use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec};
|
use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec};
|
||||||
use crate::{Index, Result};
|
use crate::{Index, Result};
|
||||||
|
use std::fs::File;
|
||||||
|
|
||||||
pub mod bulk;
|
pub mod bulk;
|
||||||
|
pub mod delete;
|
||||||
pub mod incremental;
|
pub mod incremental;
|
||||||
|
|
||||||
pub struct FacetsUpdate<'i> {
|
pub struct FacetsUpdate<'i> {
|
||||||
@ -120,8 +120,11 @@ impl<'i> FacetsUpdate<'i> {
|
|||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
if self.new_data.len() >= (self.database.len(wtxn)? as u64 / 50) {
|
if self.new_data.len() >= (self.database.len(wtxn)? as u64 / 50) {
|
||||||
|
let field_ids =
|
||||||
|
self.index.faceted_fields_ids(wtxn)?.iter().copied().collect::<Vec<_>>();
|
||||||
let bulk_update = FacetsUpdateBulk::new(
|
let bulk_update = FacetsUpdateBulk::new(
|
||||||
self.index,
|
self.index,
|
||||||
|
field_ids,
|
||||||
self.facet_type,
|
self.facet_type,
|
||||||
self.new_data,
|
self.new_data,
|
||||||
self.group_size,
|
self.group_size,
|
||||||
@ -273,12 +276,12 @@ pub(crate) mod tests {
|
|||||||
let key_bytes = BoundCodec::bytes_encode(&key).unwrap();
|
let key_bytes = BoundCodec::bytes_encode(&key).unwrap();
|
||||||
update.insert(wtxn, field_id, &key_bytes, docids).unwrap();
|
update.insert(wtxn, field_id, &key_bytes, docids).unwrap();
|
||||||
}
|
}
|
||||||
pub fn delete<'a>(
|
pub fn delete_single_docid<'a>(
|
||||||
&self,
|
&self,
|
||||||
wtxn: &'a mut RwTxn,
|
wtxn: &'a mut RwTxn,
|
||||||
field_id: u16,
|
field_id: u16,
|
||||||
key: &'a <BoundCodec as BytesEncode<'a>>::EItem,
|
key: &'a <BoundCodec as BytesEncode<'a>>::EItem,
|
||||||
value: u32,
|
docid: u32,
|
||||||
) {
|
) {
|
||||||
let update = FacetsUpdateIncrementalInner {
|
let update = FacetsUpdateIncrementalInner {
|
||||||
db: self.content,
|
db: self.content,
|
||||||
@ -287,7 +290,25 @@ pub(crate) mod tests {
|
|||||||
max_group_size: self.max_group_size.get(),
|
max_group_size: self.max_group_size.get(),
|
||||||
};
|
};
|
||||||
let key_bytes = BoundCodec::bytes_encode(&key).unwrap();
|
let key_bytes = BoundCodec::bytes_encode(&key).unwrap();
|
||||||
update.delete(wtxn, field_id, &key_bytes, value).unwrap();
|
let mut docids = RoaringBitmap::new();
|
||||||
|
docids.insert(docid);
|
||||||
|
update.delete(wtxn, field_id, &key_bytes, &docids).unwrap();
|
||||||
|
}
|
||||||
|
pub fn delete<'a>(
|
||||||
|
&self,
|
||||||
|
wtxn: &'a mut RwTxn,
|
||||||
|
field_id: u16,
|
||||||
|
key: &'a <BoundCodec as BytesEncode<'a>>::EItem,
|
||||||
|
docids: &RoaringBitmap,
|
||||||
|
) {
|
||||||
|
let update = FacetsUpdateIncrementalInner {
|
||||||
|
db: self.content,
|
||||||
|
group_size: self.group_size.get(),
|
||||||
|
min_level_size: self.min_level_size.get(),
|
||||||
|
max_group_size: self.max_group_size.get(),
|
||||||
|
};
|
||||||
|
let key_bytes = BoundCodec::bytes_encode(&key).unwrap();
|
||||||
|
update.delete(wtxn, field_id, &key_bytes, docids).unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn bulk_insert<'a, 'b>(
|
pub fn bulk_insert<'a, 'b>(
|
||||||
|
Loading…
Reference in New Issue
Block a user