mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-11 14:04:31 +01:00
Reintroduce facet deletion functionality
This commit is contained in:
parent
6cc91824c1
commit
22d80eeaf9
@ -3,19 +3,21 @@ use std::collections::btree_map::Entry;
|
|||||||
use fst::IntoStreamer;
|
use fst::IntoStreamer;
|
||||||
use heed::types::{ByteSlice, Str};
|
use heed::types::{ByteSlice, Str};
|
||||||
use heed::{BytesDecode, BytesEncode, Database};
|
use heed::{BytesDecode, BytesEncode, Database};
|
||||||
|
use obkv::Key;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use serde_json::Value;
|
use serde_json::Value;
|
||||||
use time::OffsetDateTime;
|
use time::OffsetDateTime;
|
||||||
|
|
||||||
use super::ClearDocuments;
|
use super::{ClearDocuments, Facets};
|
||||||
use crate::error::{InternalError, SerializationError, UserError};
|
use crate::error::{InternalError, SerializationError, UserError};
|
||||||
// use crate::heed_codec::facet::FacetStringZeroBoundsValueCodec;
|
// use crate::heed_codec::facet::FacetStringZeroBoundsValueCodec;
|
||||||
|
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec, MyByteSlice};
|
||||||
use crate::heed_codec::CboRoaringBitmapCodec;
|
use crate::heed_codec::CboRoaringBitmapCodec;
|
||||||
use crate::index::{db_name, main_key};
|
use crate::index::{db_name, main_key};
|
||||||
use crate::{
|
use crate::{
|
||||||
DocumentId, ExternalDocumentsIds, FieldId, FieldIdMapMissingEntry, Index, Result,
|
fields_ids_map, DocumentId, ExternalDocumentsIds, FieldId, FieldIdMapMissingEntry,
|
||||||
RoaringBitmapCodec, SmallString32, BEU32,
|
FieldsIdsMap, Index, Result, RoaringBitmapCodec, SmallString32, BEU32,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub struct DeleteDocuments<'t, 'u, 'i> {
|
pub struct DeleteDocuments<'t, 'u, 'i> {
|
||||||
@ -62,6 +64,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
|
|
||||||
pub fn execute(mut self) -> Result<DocumentDeletionResult> {
|
pub fn execute(mut self) -> Result<DocumentDeletionResult> {
|
||||||
self.index.set_updated_at(self.wtxn, &OffsetDateTime::now_utc())?;
|
self.index.set_updated_at(self.wtxn, &OffsetDateTime::now_utc())?;
|
||||||
|
|
||||||
// We retrieve the current documents ids that are in the database.
|
// We retrieve the current documents ids that are in the database.
|
||||||
let mut documents_ids = self.index.documents_ids(self.wtxn)?;
|
let mut documents_ids = self.index.documents_ids(self.wtxn)?;
|
||||||
let mut soft_deleted_docids = self.index.soft_deleted_documents_ids(self.wtxn)?;
|
let mut soft_deleted_docids = self.index.soft_deleted_documents_ids(self.wtxn)?;
|
||||||
@ -439,25 +442,27 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
self.index.put_geo_faceted_documents_ids(self.wtxn, &geo_faceted_doc_ids)?;
|
self.index.put_geo_faceted_documents_ids(self.wtxn, &geo_faceted_doc_ids)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
remove_docids_from_facet_id_docids(
|
||||||
|
self.wtxn,
|
||||||
|
self.index,
|
||||||
|
facet_id_f64_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>(),
|
||||||
|
&self.to_delete_docids,
|
||||||
|
fields_ids_map.clone(),
|
||||||
|
)?;
|
||||||
|
remove_docids_from_facet_id_docids(
|
||||||
|
self.wtxn,
|
||||||
|
self.index,
|
||||||
|
facet_id_string_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>(),
|
||||||
|
&self.to_delete_docids,
|
||||||
|
fields_ids_map.clone(),
|
||||||
|
)?;
|
||||||
// We delete the documents ids that are under the facet field id values.
|
// We delete the documents ids that are under the facet field id values.
|
||||||
// TODO: remove_docids_from_facet_field_id_docids(
|
remove_docids_from_facet_id_exists_docids(
|
||||||
// self.wtxn,
|
|
||||||
// facet_id_f64_docids,
|
|
||||||
// &self.to_delete_docids,
|
|
||||||
// )?;
|
|
||||||
// We delete the documents ids that are under the facet field id values.
|
|
||||||
remove_docids_from_facet_field_id_docids(
|
|
||||||
self.wtxn,
|
self.wtxn,
|
||||||
facet_id_exists_docids,
|
facet_id_exists_docids,
|
||||||
&self.to_delete_docids,
|
&self.to_delete_docids,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
remove_docids_from_facet_field_id_string_docids(
|
|
||||||
self.wtxn,
|
|
||||||
facet_id_string_docids,
|
|
||||||
&self.to_delete_docids,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
// Remove the documents ids from the faceted documents ids.
|
// Remove the documents ids from the faceted documents ids.
|
||||||
for field_id in self.index.faceted_fields_ids(self.wtxn)? {
|
for field_id in self.index.faceted_fields_ids(self.wtxn)? {
|
||||||
// Remove docids from the number faceted documents ids
|
// Remove docids from the number faceted documents ids
|
||||||
@ -580,67 +585,7 @@ where
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn remove_docids_from_facet_field_id_string_docids<'a, C, D>(
|
fn remove_docids_from_facet_id_exists_docids<'a, C>(
|
||||||
wtxn: &'a mut heed::RwTxn,
|
|
||||||
db: &heed::Database<C, D>,
|
|
||||||
to_remove: &RoaringBitmap,
|
|
||||||
) -> crate::Result<()> {
|
|
||||||
// let db_name = Some(crate::index::db_name::FACET_ID_STRING_DOCIDS);
|
|
||||||
// let mut iter = db.remap_types::<ByteSlice, ByteSlice>().iter_mut(wtxn)?;
|
|
||||||
// while let Some(result) = iter.next() {
|
|
||||||
// let (key, val) = result?;
|
|
||||||
// match FacetLevelValueU32Codec::bytes_decode(key) {
|
|
||||||
// Some(_) => {
|
|
||||||
// // If we are able to parse this key it means it is a facet string group
|
|
||||||
// // level key. We must then parse the value using the appropriate codec.
|
|
||||||
// let (group, mut docids) =
|
|
||||||
// FacetStringZeroBoundsValueCodec::<CboRoaringBitmapCodec>::bytes_decode(val)
|
|
||||||
// .ok_or_else(|| SerializationError::Decoding { db_name })?;
|
|
||||||
|
|
||||||
// let previous_len = docids.len();
|
|
||||||
// docids -= to_remove;
|
|
||||||
// if docids.is_empty() {
|
|
||||||
// // safety: we don't keep references from inside the LMDB database.
|
|
||||||
// unsafe { iter.del_current()? };
|
|
||||||
// } else if docids.len() != previous_len {
|
|
||||||
// let key = key.to_owned();
|
|
||||||
// let val = &(group, docids);
|
|
||||||
// let value_bytes =
|
|
||||||
// FacetStringZeroBoundsValueCodec::<CboRoaringBitmapCodec>::bytes_encode(val)
|
|
||||||
// .ok_or_else(|| SerializationError::Encoding { db_name })?;
|
|
||||||
|
|
||||||
// // safety: we don't keep references from inside the LMDB database.
|
|
||||||
// unsafe { iter.put_current(&key, &value_bytes)? };
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// None => {
|
|
||||||
// // The key corresponds to a level zero facet string.
|
|
||||||
// let (original_value, mut docids) =
|
|
||||||
// FacetStringLevelZeroValueCodec::bytes_decode(val)
|
|
||||||
// .ok_or_else(|| SerializationError::Decoding { db_name })?;
|
|
||||||
|
|
||||||
// let previous_len = docids.len();
|
|
||||||
// docids -= to_remove;
|
|
||||||
// if docids.is_empty() {
|
|
||||||
// // safety: we don't keep references from inside the LMDB database.
|
|
||||||
// unsafe { iter.del_current()? };
|
|
||||||
// } else if docids.len() != previous_len {
|
|
||||||
// let key = key.to_owned();
|
|
||||||
// let val = &(original_value, docids);
|
|
||||||
// let value_bytes = FacetStringLevelZeroValueCodec::bytes_encode(val)
|
|
||||||
// .ok_or_else(|| SerializationError::Encoding { db_name })?;
|
|
||||||
|
|
||||||
// // safety: we don't keep references from inside the LMDB database.
|
|
||||||
// unsafe { iter.put_current(&key, &value_bytes)? };
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
fn remove_docids_from_facet_field_id_docids<'a, C>(
|
|
||||||
wtxn: &'a mut heed::RwTxn,
|
wtxn: &'a mut heed::RwTxn,
|
||||||
db: &heed::Database<C, CboRoaringBitmapCodec>,
|
db: &heed::Database<C, CboRoaringBitmapCodec>,
|
||||||
to_remove: &RoaringBitmap,
|
to_remove: &RoaringBitmap,
|
||||||
@ -665,6 +610,46 @@ where
|
|||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
fn remove_docids_from_facet_id_docids<'a>(
|
||||||
|
wtxn: &'a mut heed::RwTxn,
|
||||||
|
index: &Index,
|
||||||
|
db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
|
||||||
|
to_remove: &RoaringBitmap,
|
||||||
|
fields_ids_map: FieldsIdsMap,
|
||||||
|
) -> Result<()> {
|
||||||
|
let mut modified = false;
|
||||||
|
for field_id in fields_ids_map.ids() {
|
||||||
|
let mut level0_prefix = vec![];
|
||||||
|
level0_prefix.extend_from_slice(&field_id.to_be_bytes());
|
||||||
|
level0_prefix.push(0);
|
||||||
|
let mut iter = db
|
||||||
|
.as_polymorph()
|
||||||
|
.prefix_iter_mut::<_, ByteSlice, FacetGroupValueCodec>(wtxn, &level0_prefix)?;
|
||||||
|
|
||||||
|
while let Some(result) = iter.next() {
|
||||||
|
let (bytes, mut value) = result?;
|
||||||
|
let previous_len = value.bitmap.len();
|
||||||
|
value.bitmap -= to_remove;
|
||||||
|
if value.bitmap.is_empty() {
|
||||||
|
// safety: we don't keep references from inside the LMDB database.
|
||||||
|
unsafe { iter.del_current()? };
|
||||||
|
modified = true;
|
||||||
|
} else if value.bitmap.len() != previous_len {
|
||||||
|
let bytes = bytes.to_owned();
|
||||||
|
// safety: we don't keep references from inside the LMDB database.
|
||||||
|
unsafe { iter.put_current(&bytes, &value)? };
|
||||||
|
modified = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !modified {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
let builder = Facets::new(index, db);
|
||||||
|
builder.execute(wtxn)?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user