747: Soft-deletion computation no longer depends on the mapsize r=irevoire a=dureuill

# Pull Request

## Related issue

Related to https://github.com/meilisearch/meilisearch/issues/3231: After removing `--max-index-size`, the `mapsize` will always be unrelated to the actual max size the user wants for their DB, so it doesn't make sense to use these values any longer.

This implements solution 2.3 from https://github.com/meilisearch/meilisearch/issues/3231#issuecomment-1348628824

## What does this PR do?

### User-visible

- Soft-deleted are no longer deleted when there is less than 10% of the mapsize available or when they take more than 10% of the mapsize
- Instead, they are deleted when they are more soft deleted than regular documents, or when they take more than 1GiB disk space (estimated).

### Implementation standpoint

1. Adds a `DeletionStrategy` struct to replace the boolean `disable_soft_deletion` that we had up until now. This enum allows us to specify that we want "always hard", "always soft", or to use the dynamic soft-deletion strategy (default).
2. Uses the current strategy when deleting documents, with the new heuristics being used in the `DeletionStrategy::Dynamic` variant.
3. Updates the tests to use the appropriate DeletionStrategy whenever needed (one of `AlwaysHard` or `AlwaysSoft` depending on the test)

Note to reviewers: this PR is optimized for a commit-by-commit review.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
This commit is contained in:
bors[bot] 2022-12-19 17:46:18 +00:00 committed by GitHub
commit 97fb64e40e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
49 changed files with 174 additions and 126 deletions

View File

@ -1192,8 +1192,8 @@ pub(crate) mod tests {
use crate::error::{Error, InternalError}; use crate::error::{Error, InternalError};
use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS}; use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS};
use crate::update::{ use crate::update::{
self, DeleteDocuments, IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod, self, DeleteDocuments, DeletionStrategy, IndexDocuments, IndexDocumentsConfig,
IndexerConfig, Settings, IndexDocumentsMethod, IndexerConfig, Settings,
}; };
use crate::{db_snap, obkv_to_json, Index}; use crate::{db_snap, obkv_to_json, Index};
@ -1282,6 +1282,17 @@ pub(crate) mod tests {
builder.execute(drop, || false)?; builder.execute(drop, || false)?;
Ok(()) Ok(())
} }
pub fn delete_document(&self, external_document_id: &str) {
let mut wtxn = self.write_txn().unwrap();
let mut delete = DeleteDocuments::new(&mut wtxn, &self).unwrap();
delete.strategy(self.index_documents_config.deletion_strategy);
delete.delete_external_id(external_document_id);
delete.execute().unwrap();
wtxn.commit().unwrap();
}
} }
#[test] #[test]
@ -1487,7 +1498,9 @@ pub(crate) mod tests {
use big_s::S; use big_s::S;
use maplit::hashset; use maplit::hashset;
let index = TempIndex::new(); let mut index = TempIndex::new();
index.index_documents_config.deletion_strategy = DeletionStrategy::AlwaysSoft;
let index = index;
index index
.update_settings(|settings| { .update_settings(|settings| {
@ -1657,7 +1670,8 @@ pub(crate) mod tests {
} }
// Second Batch: replace the documents with soft-deletion // Second Batch: replace the documents with soft-deletion
{ {
index.index_documents_config.disable_soft_deletion = false; index.index_documents_config.deletion_strategy =
crate::update::DeletionStrategy::AlwaysSoft;
let mut docs1 = vec![]; let mut docs1 = vec![];
for i in 0..3 { for i in 0..3 {
docs1.push(serde_json::json!( docs1.push(serde_json::json!(
@ -1726,7 +1740,7 @@ pub(crate) mod tests {
drop(rtxn); drop(rtxn);
// Third Batch: replace the documents with soft-deletion again // Third Batch: replace the documents with soft-deletion again
{ {
index.index_documents_config.disable_soft_deletion = false; index.index_documents_config.deletion_strategy = DeletionStrategy::AlwaysSoft;
let mut docs1 = vec![]; let mut docs1 = vec![];
for i in 0..3 { for i in 0..3 {
docs1.push(serde_json::json!( docs1.push(serde_json::json!(
@ -1795,7 +1809,7 @@ pub(crate) mod tests {
// Fourth Batch: replace the documents without soft-deletion // Fourth Batch: replace the documents without soft-deletion
{ {
index.index_documents_config.disable_soft_deletion = true; index.index_documents_config.deletion_strategy = DeletionStrategy::AlwaysHard;
let mut docs1 = vec![]; let mut docs1 = vec![];
for i in 0..3 { for i in 0..3 {
docs1.push(serde_json::json!( docs1.push(serde_json::json!(
@ -1867,6 +1881,7 @@ pub(crate) mod tests {
fn bug_3021_first() { fn bug_3021_first() {
// https://github.com/meilisearch/meilisearch/issues/3021 // https://github.com/meilisearch/meilisearch/issues/3021
let mut index = TempIndex::new(); let mut index = TempIndex::new();
index.index_documents_config.deletion_strategy = DeletionStrategy::AlwaysSoft;
index.index_documents_config.update_method = IndexDocumentsMethod::ReplaceDocuments; index.index_documents_config.update_method = IndexDocumentsMethod::ReplaceDocuments;
index index
@ -1891,11 +1906,7 @@ pub(crate) mod tests {
"###); "###);
db_snap!(index, soft_deleted_documents_ids, 1, @"[]"); db_snap!(index, soft_deleted_documents_ids, 1, @"[]");
let mut wtxn = index.write_txn().unwrap(); index.delete_document("34");
let mut delete = DeleteDocuments::new(&mut wtxn, &index).unwrap();
delete.delete_external_id("34");
delete.execute().unwrap();
wtxn.commit().unwrap();
db_snap!(index, documents_ids, @"[0, ]"); db_snap!(index, documents_ids, @"[0, ]");
db_snap!(index, external_documents_ids, 2, @r###" db_snap!(index, external_documents_ids, 2, @r###"
@ -1936,11 +1947,7 @@ pub(crate) mod tests {
db_snap!(index, soft_deleted_documents_ids, 4, @"[]"); db_snap!(index, soft_deleted_documents_ids, 4, @"[]");
// We do the test again, but deleting the document with id 0 instead of id 1 now // We do the test again, but deleting the document with id 0 instead of id 1 now
let mut wtxn = index.write_txn().unwrap(); index.delete_document("38");
let mut delete = DeleteDocuments::new(&mut wtxn, &index).unwrap();
delete.delete_external_id("38");
delete.execute().unwrap();
wtxn.commit().unwrap();
db_snap!(index, documents_ids, @"[1, ]"); db_snap!(index, documents_ids, @"[1, ]");
db_snap!(index, external_documents_ids, 5, @r###" db_snap!(index, external_documents_ids, 5, @r###"
@ -1987,6 +1994,7 @@ pub(crate) mod tests {
fn bug_3021_second() { fn bug_3021_second() {
// https://github.com/meilisearch/meilisearch/issues/3021 // https://github.com/meilisearch/meilisearch/issues/3021
let mut index = TempIndex::new(); let mut index = TempIndex::new();
index.index_documents_config.deletion_strategy = DeletionStrategy::AlwaysSoft;
index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments; index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments;
index index
@ -2011,11 +2019,7 @@ pub(crate) mod tests {
"###); "###);
db_snap!(index, soft_deleted_documents_ids, 1, @"[]"); db_snap!(index, soft_deleted_documents_ids, 1, @"[]");
let mut wtxn = index.write_txn().unwrap(); index.delete_document("34");
let mut delete = DeleteDocuments::new(&mut wtxn, &index).unwrap();
delete.delete_external_id("34");
delete.execute().unwrap();
wtxn.commit().unwrap();
db_snap!(index, documents_ids, @"[0, ]"); db_snap!(index, documents_ids, @"[0, ]");
db_snap!(index, external_documents_ids, 2, @r###" db_snap!(index, external_documents_ids, 2, @r###"
@ -2116,6 +2120,7 @@ pub(crate) mod tests {
fn bug_3021_third() { fn bug_3021_third() {
// https://github.com/meilisearch/meilisearch/issues/3021 // https://github.com/meilisearch/meilisearch/issues/3021
let mut index = TempIndex::new(); let mut index = TempIndex::new();
index.index_documents_config.deletion_strategy = DeletionStrategy::AlwaysSoft;
index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments; index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments;
index index
@ -2142,11 +2147,7 @@ pub(crate) mod tests {
"###); "###);
db_snap!(index, soft_deleted_documents_ids, 1, @"[]"); db_snap!(index, soft_deleted_documents_ids, 1, @"[]");
let mut wtxn = index.write_txn().unwrap(); index.delete_document("3");
let mut delete = DeleteDocuments::new(&mut wtxn, &index).unwrap();
delete.delete_external_id("3");
delete.execute().unwrap();
wtxn.commit().unwrap();
db_snap!(index, documents_ids, @"[1, 2, ]"); db_snap!(index, documents_ids, @"[1, 2, ]");
db_snap!(index, external_documents_ids, 2, @r###" db_snap!(index, external_documents_ids, 2, @r###"
@ -2158,7 +2159,7 @@ pub(crate) mod tests {
"###); "###);
db_snap!(index, soft_deleted_documents_ids, 2, @"[0, ]"); db_snap!(index, soft_deleted_documents_ids, 2, @"[0, ]");
index.index_documents_config.disable_soft_deletion = true; index.index_documents_config.deletion_strategy = DeletionStrategy::AlwaysHard;
index.add_documents(documents!([{ "primary_key": "4", "a": 2 }])).unwrap(); index.add_documents(documents!([{ "primary_key": "4", "a": 2 }])).unwrap();

View File

@ -26,7 +26,7 @@ pub struct DeleteDocuments<'t, 'u, 'i> {
index: &'i Index, index: &'i Index,
external_documents_ids: ExternalDocumentsIds<'static>, external_documents_ids: ExternalDocumentsIds<'static>,
to_delete_docids: RoaringBitmap, to_delete_docids: RoaringBitmap,
disable_soft_deletion: bool, strategy: DeletionStrategy,
} }
/// Result of a [`DeleteDocuments`] operation. /// Result of a [`DeleteDocuments`] operation.
@ -36,6 +36,36 @@ pub struct DocumentDeletionResult {
pub remaining_documents: u64, pub remaining_documents: u64,
} }
/// Strategy for deleting documents.
///
/// - Soft-deleted documents are simply marked as deleted without being actually removed from DB.
/// - Hard-deleted documents are definitely suppressed from the DB.
///
/// Soft-deleted documents trade disk space for runtime performance.
///
/// Note that any of these variants can be used at any given moment for any indexation in a database.
/// For instance, you can use an [`AlwaysSoft`] followed by an [`AlwaysHard`] option without issue.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
pub enum DeletionStrategy {
#[default]
/// Definitely suppress documents according to the number or size of soft-deleted documents
Dynamic,
/// Never definitely suppress documents
AlwaysSoft,
/// Always definitely suppress documents
AlwaysHard,
}
impl std::fmt::Display for DeletionStrategy {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
DeletionStrategy::Dynamic => write!(f, "dynamic"),
DeletionStrategy::AlwaysSoft => write!(f, "always_soft"),
DeletionStrategy::AlwaysHard => write!(f, "always_hard"),
}
}
}
/// Result of a [`DeleteDocuments`] operation, used for internal purposes. /// Result of a [`DeleteDocuments`] operation, used for internal purposes.
/// ///
/// It is a superset of the [`DocumentDeletionResult`] structure, giving /// It is a superset of the [`DocumentDeletionResult`] structure, giving
@ -59,12 +89,12 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
index, index,
external_documents_ids, external_documents_ids,
to_delete_docids: RoaringBitmap::new(), to_delete_docids: RoaringBitmap::new(),
disable_soft_deletion: false, strategy: Default::default(),
}) })
} }
pub fn disable_soft_deletion(&mut self, disable: bool) { pub fn strategy(&mut self, strategy: DeletionStrategy) {
self.disable_soft_deletion = disable; self.strategy = strategy;
} }
pub fn delete_document(&mut self, docid: u32) { pub fn delete_document(&mut self, docid: u32) {
@ -156,33 +186,34 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
soft_deleted_docids |= &self.to_delete_docids; soft_deleted_docids |= &self.to_delete_docids;
// if we have less documents to delete than the threshold we simply save them in // decide for a hard or soft deletion depending on the strategy
// the `soft_deleted_documents_ids` bitmap and early exit. let soft_deletion = match self.strategy {
DeletionStrategy::Dynamic => {
// decide to keep the soft deleted in the DB for now if they meet 2 criteria:
// 1. There is less than a fixed rate of 50% of soft-deleted to actual documents, *and*
// 2. Soft-deleted occupy an average of less than a fixed size on disk
let size_used = self.index.used_size()?; let size_used = self.index.used_size()?;
let map_size = self.index.env.map_size()? as u64;
let nb_documents = self.index.number_of_documents(self.wtxn)?; let nb_documents = self.index.number_of_documents(self.wtxn)?;
let nb_soft_deleted = soft_deleted_docids.len(); let nb_soft_deleted = soft_deleted_docids.len();
let percentage_available = 100 - (size_used * 100 / map_size); (nb_soft_deleted < nb_documents) && {
const SOFT_DELETED_SIZE_BYTE_THRESHOLD: u64 = 1_073_741_824; // 1GiB
// nb_documents + nb_soft_deleted !=0 because if nb_documents is 0 we short-circuit earlier, and then we moved the documents to delete
// from the documents_docids to the soft_deleted_docids.
let estimated_document_size = size_used / (nb_documents + nb_soft_deleted); let estimated_document_size = size_used / (nb_documents + nb_soft_deleted);
let estimated_size_used_by_soft_deleted = estimated_document_size * nb_soft_deleted; let estimated_size_used_by_soft_deleted =
let percentage_used_by_soft_deleted_documents = estimated_document_size * nb_soft_deleted;
estimated_size_used_by_soft_deleted * 100 / map_size; estimated_size_used_by_soft_deleted < SOFT_DELETED_SIZE_BYTE_THRESHOLD
}
}
DeletionStrategy::AlwaysSoft => true,
DeletionStrategy::AlwaysHard => false,
};
// if we have more than 10% of disk space available and the soft deleted if soft_deletion {
// documents uses less than 10% of the total space available, // Keep the soft-deleted in the DB
// we skip the deletion. Eg.
// - With 100Go of disk and 20Go used including 5Go of soft-deleted documents
// We dont delete anything.
// - With 100Go of disk and 95Go used including 1mo of soft-deleted documents
// We run the deletion.
// - With 100Go of disk and 50Go used including 15Go of soft-deleted documents
// We run the deletion.
if !self.disable_soft_deletion
&& percentage_available > 10
&& percentage_used_by_soft_deleted_documents < 10
{
self.index.put_soft_deleted_documents_ids(self.wtxn, &soft_deleted_docids)?; self.index.put_soft_deleted_documents_ids(self.wtxn, &soft_deleted_docids)?;
return Ok(DetailedDocumentDeletionResult { return Ok(DetailedDocumentDeletionResult {
deleted_documents: self.to_delete_docids.len(), deleted_documents: self.to_delete_docids.len(),
@ -191,7 +222,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
}); });
} }
// There is more than documents to delete than the threshold we needs to delete them all // Erase soft-deleted from DB
self.to_delete_docids = soft_deleted_docids; self.to_delete_docids = soft_deleted_docids;
// and we can reset the soft deleted bitmap // and we can reset the soft deleted bitmap
self.index.put_soft_deleted_documents_ids(self.wtxn, &RoaringBitmap::new())?; self.index.put_soft_deleted_documents_ids(self.wtxn, &RoaringBitmap::new())?;
@ -654,7 +685,7 @@ mod tests {
wtxn: &mut RwTxn<'t, '_>, wtxn: &mut RwTxn<'t, '_>,
index: &'t Index, index: &'t Index,
external_ids: &[&str], external_ids: &[&str],
disable_soft_deletion: bool, strategy: DeletionStrategy,
) -> Vec<u32> { ) -> Vec<u32> {
let external_document_ids = index.external_documents_ids(wtxn).unwrap(); let external_document_ids = index.external_documents_ids(wtxn).unwrap();
let ids_to_delete: Vec<u32> = external_ids let ids_to_delete: Vec<u32> = external_ids
@ -664,14 +695,14 @@ mod tests {
// Delete some documents. // Delete some documents.
let mut builder = DeleteDocuments::new(wtxn, index).unwrap(); let mut builder = DeleteDocuments::new(wtxn, index).unwrap();
builder.disable_soft_deletion(disable_soft_deletion); builder.strategy(strategy);
external_ids.iter().for_each(|id| drop(builder.delete_external_id(id))); external_ids.iter().for_each(|id| drop(builder.delete_external_id(id)));
builder.execute().unwrap(); builder.execute().unwrap();
ids_to_delete ids_to_delete
} }
fn delete_documents_with_numbers_as_primary_key_(disable_soft_deletion: bool) { fn delete_documents_with_numbers_as_primary_key_(deletion_strategy: DeletionStrategy) {
let index = TempIndex::new(); let index = TempIndex::new();
let mut wtxn = index.write_txn().unwrap(); let mut wtxn = index.write_txn().unwrap();
@ -691,17 +722,17 @@ mod tests {
builder.delete_document(0); builder.delete_document(0);
builder.delete_document(1); builder.delete_document(1);
builder.delete_document(2); builder.delete_document(2);
builder.disable_soft_deletion(disable_soft_deletion); builder.strategy(deletion_strategy);
builder.execute().unwrap(); builder.execute().unwrap();
wtxn.commit().unwrap(); wtxn.commit().unwrap();
// All these snapshots should be empty since the database was cleared // All these snapshots should be empty since the database was cleared
db_snap!(index, documents_ids, disable_soft_deletion); db_snap!(index, documents_ids, deletion_strategy);
db_snap!(index, word_docids, disable_soft_deletion); db_snap!(index, word_docids, deletion_strategy);
db_snap!(index, word_pair_proximity_docids, disable_soft_deletion); db_snap!(index, word_pair_proximity_docids, deletion_strategy);
db_snap!(index, facet_id_exists_docids, disable_soft_deletion); db_snap!(index, facet_id_exists_docids, deletion_strategy);
db_snap!(index, soft_deleted_documents_ids, disable_soft_deletion); db_snap!(index, soft_deleted_documents_ids, deletion_strategy);
let rtxn = index.read_txn().unwrap(); let rtxn = index.read_txn().unwrap();
@ -710,11 +741,11 @@ mod tests {
#[test] #[test]
fn delete_documents_with_numbers_as_primary_key() { fn delete_documents_with_numbers_as_primary_key() {
delete_documents_with_numbers_as_primary_key_(true); delete_documents_with_numbers_as_primary_key_(DeletionStrategy::AlwaysHard);
delete_documents_with_numbers_as_primary_key_(false); delete_documents_with_numbers_as_primary_key_(DeletionStrategy::AlwaysSoft);
} }
fn delete_documents_with_strange_primary_key_(disable_soft_deletion: bool) { fn delete_documents_with_strange_primary_key_(strategy: DeletionStrategy) {
let index = TempIndex::new(); let index = TempIndex::new();
index index
@ -740,24 +771,24 @@ mod tests {
let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap(); let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
builder.delete_external_id("0"); builder.delete_external_id("0");
builder.delete_external_id("1"); builder.delete_external_id("1");
builder.disable_soft_deletion(disable_soft_deletion); builder.strategy(strategy);
builder.execute().unwrap(); builder.execute().unwrap();
wtxn.commit().unwrap(); wtxn.commit().unwrap();
db_snap!(index, documents_ids, disable_soft_deletion); db_snap!(index, documents_ids, strategy);
db_snap!(index, word_docids, disable_soft_deletion); db_snap!(index, word_docids, strategy);
db_snap!(index, word_pair_proximity_docids, disable_soft_deletion); db_snap!(index, word_pair_proximity_docids, strategy);
db_snap!(index, soft_deleted_documents_ids, disable_soft_deletion); db_snap!(index, soft_deleted_documents_ids, strategy);
} }
#[test] #[test]
fn delete_documents_with_strange_primary_key() { fn delete_documents_with_strange_primary_key() {
delete_documents_with_strange_primary_key_(true); delete_documents_with_strange_primary_key_(DeletionStrategy::AlwaysHard);
delete_documents_with_strange_primary_key_(false); delete_documents_with_strange_primary_key_(DeletionStrategy::AlwaysSoft);
} }
fn filtered_placeholder_search_should_not_return_deleted_documents_( fn filtered_placeholder_search_should_not_return_deleted_documents_(
disable_soft_deletion: bool, deletion_strategy: DeletionStrategy,
) { ) {
let index = TempIndex::new(); let index = TempIndex::new();
@ -801,7 +832,7 @@ mod tests {
) )
.unwrap(); .unwrap();
delete_documents(&mut wtxn, &index, &["1_4", "1_70", "1_72"], disable_soft_deletion); delete_documents(&mut wtxn, &index, &["1_4", "1_70", "1_72"], deletion_strategy);
// Placeholder search with filter // Placeholder search with filter
let filter = Filter::from_str("label = sign").unwrap().unwrap(); let filter = Filter::from_str("label = sign").unwrap().unwrap();
@ -810,21 +841,27 @@ mod tests {
wtxn.commit().unwrap(); wtxn.commit().unwrap();
db_snap!(index, soft_deleted_documents_ids, disable_soft_deletion); db_snap!(index, soft_deleted_documents_ids, deletion_strategy);
db_snap!(index, word_docids, disable_soft_deletion); db_snap!(index, word_docids, deletion_strategy);
db_snap!(index, facet_id_f64_docids, disable_soft_deletion); db_snap!(index, facet_id_f64_docids, deletion_strategy);
db_snap!(index, word_pair_proximity_docids, disable_soft_deletion); db_snap!(index, word_pair_proximity_docids, deletion_strategy);
db_snap!(index, facet_id_exists_docids, disable_soft_deletion); db_snap!(index, facet_id_exists_docids, deletion_strategy);
db_snap!(index, facet_id_string_docids, disable_soft_deletion); db_snap!(index, facet_id_string_docids, deletion_strategy);
} }
#[test] #[test]
fn filtered_placeholder_search_should_not_return_deleted_documents() { fn filtered_placeholder_search_should_not_return_deleted_documents() {
filtered_placeholder_search_should_not_return_deleted_documents_(true); filtered_placeholder_search_should_not_return_deleted_documents_(
filtered_placeholder_search_should_not_return_deleted_documents_(false); DeletionStrategy::AlwaysHard,
);
filtered_placeholder_search_should_not_return_deleted_documents_(
DeletionStrategy::AlwaysSoft,
);
} }
fn placeholder_search_should_not_return_deleted_documents_(disable_soft_deletion: bool) { fn placeholder_search_should_not_return_deleted_documents_(
deletion_strategy: DeletionStrategy,
) {
let index = TempIndex::new(); let index = TempIndex::new();
let mut wtxn = index.write_txn().unwrap(); let mut wtxn = index.write_txn().unwrap();
@ -865,8 +902,7 @@ mod tests {
) )
.unwrap(); .unwrap();
let deleted_internal_ids = let deleted_internal_ids = delete_documents(&mut wtxn, &index, &["1_4"], deletion_strategy);
delete_documents(&mut wtxn, &index, &["1_4"], disable_soft_deletion);
// Placeholder search // Placeholder search
let results = index.search(&wtxn).execute().unwrap(); let results = index.search(&wtxn).execute().unwrap();
@ -884,11 +920,11 @@ mod tests {
#[test] #[test]
fn placeholder_search_should_not_return_deleted_documents() { fn placeholder_search_should_not_return_deleted_documents() {
placeholder_search_should_not_return_deleted_documents_(true); placeholder_search_should_not_return_deleted_documents_(DeletionStrategy::AlwaysHard);
placeholder_search_should_not_return_deleted_documents_(false); placeholder_search_should_not_return_deleted_documents_(DeletionStrategy::AlwaysSoft);
} }
fn search_should_not_return_deleted_documents_(disable_soft_deletion: bool) { fn search_should_not_return_deleted_documents_(deletion_strategy: DeletionStrategy) {
let index = TempIndex::new(); let index = TempIndex::new();
let mut wtxn = index.write_txn().unwrap(); let mut wtxn = index.write_txn().unwrap();
@ -930,7 +966,7 @@ mod tests {
.unwrap(); .unwrap();
let deleted_internal_ids = let deleted_internal_ids =
delete_documents(&mut wtxn, &index, &["1_7", "1_52"], disable_soft_deletion); delete_documents(&mut wtxn, &index, &["1_7", "1_52"], deletion_strategy);
// search for abstract // search for abstract
let results = index.search(&wtxn).query("abstract").execute().unwrap(); let results = index.search(&wtxn).query("abstract").execute().unwrap();
@ -945,17 +981,17 @@ mod tests {
wtxn.commit().unwrap(); wtxn.commit().unwrap();
db_snap!(index, soft_deleted_documents_ids, disable_soft_deletion); db_snap!(index, soft_deleted_documents_ids, deletion_strategy);
} }
#[test] #[test]
fn search_should_not_return_deleted_documents() { fn search_should_not_return_deleted_documents() {
search_should_not_return_deleted_documents_(true); search_should_not_return_deleted_documents_(DeletionStrategy::AlwaysHard);
search_should_not_return_deleted_documents_(false); search_should_not_return_deleted_documents_(DeletionStrategy::AlwaysSoft);
} }
fn geo_filtered_placeholder_search_should_not_return_deleted_documents_( fn geo_filtered_placeholder_search_should_not_return_deleted_documents_(
disable_soft_deletion: bool, deletion_strategy: DeletionStrategy,
) { ) {
let index = TempIndex::new(); let index = TempIndex::new();
@ -993,7 +1029,7 @@ mod tests {
let external_ids_to_delete = ["5", "6", "7", "12", "17", "19"]; let external_ids_to_delete = ["5", "6", "7", "12", "17", "19"];
let deleted_internal_ids = let deleted_internal_ids =
delete_documents(&mut wtxn, &index, &external_ids_to_delete, disable_soft_deletion); delete_documents(&mut wtxn, &index, &external_ids_to_delete, deletion_strategy);
// Placeholder search with geo filter // Placeholder search with geo filter
let filter = Filter::from_str("_geoRadius(50.6924, 3.1763, 20000)").unwrap().unwrap(); let filter = Filter::from_str("_geoRadius(50.6924, 3.1763, 20000)").unwrap().unwrap();
@ -1009,18 +1045,22 @@ mod tests {
wtxn.commit().unwrap(); wtxn.commit().unwrap();
db_snap!(index, soft_deleted_documents_ids, disable_soft_deletion); db_snap!(index, soft_deleted_documents_ids, deletion_strategy);
db_snap!(index, facet_id_f64_docids, disable_soft_deletion); db_snap!(index, facet_id_f64_docids, deletion_strategy);
db_snap!(index, facet_id_string_docids, disable_soft_deletion); db_snap!(index, facet_id_string_docids, deletion_strategy);
} }
#[test] #[test]
fn geo_filtered_placeholder_search_should_not_return_deleted_documents() { fn geo_filtered_placeholder_search_should_not_return_deleted_documents() {
geo_filtered_placeholder_search_should_not_return_deleted_documents_(true); geo_filtered_placeholder_search_should_not_return_deleted_documents_(
geo_filtered_placeholder_search_should_not_return_deleted_documents_(false); DeletionStrategy::AlwaysHard,
);
geo_filtered_placeholder_search_should_not_return_deleted_documents_(
DeletionStrategy::AlwaysSoft,
);
} }
fn get_documents_should_not_return_deleted_documents_(disable_soft_deletion: bool) { fn get_documents_should_not_return_deleted_documents_(deletion_strategy: DeletionStrategy) {
let index = TempIndex::new(); let index = TempIndex::new();
let mut wtxn = index.write_txn().unwrap(); let mut wtxn = index.write_txn().unwrap();
@ -1063,7 +1103,7 @@ mod tests {
let deleted_external_ids = ["1_7", "1_52"]; let deleted_external_ids = ["1_7", "1_52"];
let deleted_internal_ids = let deleted_internal_ids =
delete_documents(&mut wtxn, &index, &deleted_external_ids, disable_soft_deletion); delete_documents(&mut wtxn, &index, &deleted_external_ids, deletion_strategy);
// list all documents // list all documents
let results = index.all_documents(&wtxn).unwrap(); let results = index.all_documents(&wtxn).unwrap();
@ -1094,16 +1134,16 @@ mod tests {
wtxn.commit().unwrap(); wtxn.commit().unwrap();
db_snap!(index, soft_deleted_documents_ids, disable_soft_deletion); db_snap!(index, soft_deleted_documents_ids, deletion_strategy);
} }
#[test] #[test]
fn get_documents_should_not_return_deleted_documents() { fn get_documents_should_not_return_deleted_documents() {
get_documents_should_not_return_deleted_documents_(true); get_documents_should_not_return_deleted_documents_(DeletionStrategy::AlwaysHard);
get_documents_should_not_return_deleted_documents_(false); get_documents_should_not_return_deleted_documents_(DeletionStrategy::AlwaysSoft);
} }
fn stats_should_not_return_deleted_documents_(disable_soft_deletion: bool) { fn stats_should_not_return_deleted_documents_(deletion_strategy: DeletionStrategy) {
let index = TempIndex::new(); let index = TempIndex::new();
let mut wtxn = index.write_txn().unwrap(); let mut wtxn = index.write_txn().unwrap();
@ -1137,7 +1177,7 @@ mod tests {
{ "docid": "1_69", "label": ["geometry"]} { "docid": "1_69", "label": ["geometry"]}
])).unwrap(); ])).unwrap();
delete_documents(&mut wtxn, &index, &["1_7", "1_52"], disable_soft_deletion); delete_documents(&mut wtxn, &index, &["1_7", "1_52"], deletion_strategy);
// count internal documents // count internal documents
let results = index.number_of_documents(&wtxn).unwrap(); let results = index.number_of_documents(&wtxn).unwrap();
@ -1151,12 +1191,12 @@ mod tests {
wtxn.commit().unwrap(); wtxn.commit().unwrap();
db_snap!(index, soft_deleted_documents_ids, disable_soft_deletion); db_snap!(index, soft_deleted_documents_ids, deletion_strategy);
} }
#[test] #[test]
fn stats_should_not_return_deleted_documents() { fn stats_should_not_return_deleted_documents() {
stats_should_not_return_deleted_documents_(true); stats_should_not_return_deleted_documents_(DeletionStrategy::AlwaysHard);
stats_should_not_return_deleted_documents_(false); stats_should_not_return_deleted_documents_(DeletionStrategy::AlwaysSoft);
} }
} }

View File

@ -122,7 +122,7 @@ mod tests {
use crate::documents::documents_batch_reader_from_objects; use crate::documents::documents_batch_reader_from_objects;
use crate::index::tests::TempIndex; use crate::index::tests::TempIndex;
use crate::update::facet::test_helpers::ordered_string; use crate::update::facet::test_helpers::ordered_string;
use crate::update::DeleteDocuments; use crate::update::{DeleteDocuments, DeletionStrategy};
#[test] #[test]
fn delete_mixed_incremental_and_bulk() { fn delete_mixed_incremental_and_bulk() {
@ -165,7 +165,7 @@ mod tests {
let mut wtxn = index.env.write_txn().unwrap(); let mut wtxn = index.env.write_txn().unwrap();
let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap(); let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
builder.disable_soft_deletion(true); builder.strategy(DeletionStrategy::AlwaysHard);
builder.delete_documents(&RoaringBitmap::from_iter(0..100)); builder.delete_documents(&RoaringBitmap::from_iter(0..100));
// by deleting the first 100 documents, we expect that: // by deleting the first 100 documents, we expect that:
// - the "id" part of the DB will be updated in bulk, since #affected_facet_value = 100 which is > database_len / 150 (= 13) // - the "id" part of the DB will be updated in bulk, since #affected_facet_value = 100 which is > database_len / 150 (= 13)
@ -224,7 +224,7 @@ mod tests {
let mut wtxn = index.env.write_txn().unwrap(); let mut wtxn = index.env.write_txn().unwrap();
let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap(); let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
builder.disable_soft_deletion(true); builder.strategy(DeletionStrategy::AlwaysHard);
builder.delete_documents(&RoaringBitmap::from_iter(0..100)); builder.delete_documents(&RoaringBitmap::from_iter(0..100));
// by deleting the first 100 documents, we expect that: // by deleting the first 100 documents, we expect that:
// - the "id" part of the DB will be updated in bulk, since #affected_facet_value = 100 which is > database_len / 150 (= 13) // - the "id" part of the DB will be updated in bulk, since #affected_facet_value = 100 which is > database_len / 150 (= 13)
@ -283,7 +283,7 @@ mod tests {
for docid in docids_to_delete.into_iter().take(990) { for docid in docids_to_delete.into_iter().take(990) {
let mut wtxn = index.env.write_txn().unwrap(); let mut wtxn = index.env.write_txn().unwrap();
let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap(); let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
builder.disable_soft_deletion(true); builder.strategy(DeletionStrategy::AlwaysHard);
builder.delete_documents(&RoaringBitmap::from_iter([docid])); builder.delete_documents(&RoaringBitmap::from_iter([docid]));
builder.execute().unwrap(); builder.execute().unwrap();
wtxn.commit().unwrap(); wtxn.commit().unwrap();

View File

@ -463,11 +463,14 @@ mod tests {
use crate::db_snap; use crate::db_snap;
use crate::documents::documents_batch_reader_from_objects; use crate::documents::documents_batch_reader_from_objects;
use crate::index::tests::TempIndex; use crate::index::tests::TempIndex;
use crate::update::DeletionStrategy;
#[test] #[test]
fn replace_all_identical_soft_deletion_then_hard_deletion() { fn replace_all_identical_soft_deletion_then_hard_deletion() {
let mut index = TempIndex::new_with_map_size(4096 * 1000 * 100); let mut index = TempIndex::new_with_map_size(4096 * 1000 * 100);
index.index_documents_config.deletion_strategy = DeletionStrategy::AlwaysSoft;
index index
.update_settings(|settings| { .update_settings(|settings| {
settings.set_primary_key("id".to_owned()); settings.set_primary_key("id".to_owned());
@ -521,7 +524,7 @@ mod tests {
db_snap!(index, soft_deleted_documents_ids, "replaced_1_soft", @"6c975deb900f286d2f6456d2d5c3a123"); db_snap!(index, soft_deleted_documents_ids, "replaced_1_soft", @"6c975deb900f286d2f6456d2d5c3a123");
// Then replace the last document while disabling soft_deletion // Then replace the last document while disabling soft_deletion
index.index_documents_config.disable_soft_deletion = true; index.index_documents_config.deletion_strategy = DeletionStrategy::AlwaysHard;
let mut documents = vec![]; let mut documents = vec![];
for i in 999..1000 { for i in 999..1000 {
documents.push( documents.push(

View File

@ -35,8 +35,8 @@ use crate::documents::{obkv_to_object, DocumentsBatchReader};
use crate::error::{Error, InternalError, UserError}; use crate::error::{Error, InternalError, UserError};
pub use crate::update::index_documents::helpers::CursorClonableMmap; pub use crate::update::index_documents::helpers::CursorClonableMmap;
use crate::update::{ use crate::update::{
self, IndexerConfig, PrefixWordPairsProximityDocids, UpdateIndexingStep, WordPrefixDocids, self, DeletionStrategy, IndexerConfig, PrefixWordPairsProximityDocids, UpdateIndexingStep,
WordPrefixPositionDocids, WordsPrefixesFst, WordPrefixDocids, WordPrefixPositionDocids, WordsPrefixesFst,
}; };
use crate::{Index, Result, RoaringBitmapCodec}; use crate::{Index, Result, RoaringBitmapCodec};
@ -88,7 +88,7 @@ pub struct IndexDocumentsConfig {
pub words_positions_level_group_size: Option<NonZeroU32>, pub words_positions_level_group_size: Option<NonZeroU32>,
pub words_positions_min_level_size: Option<NonZeroU32>, pub words_positions_min_level_size: Option<NonZeroU32>,
pub update_method: IndexDocumentsMethod, pub update_method: IndexDocumentsMethod,
pub disable_soft_deletion: bool, pub deletion_strategy: DeletionStrategy,
pub autogenerate_docids: bool, pub autogenerate_docids: bool,
} }
@ -332,7 +332,7 @@ where
// able to simply insert all the documents even if they already exist in the database. // able to simply insert all the documents even if they already exist in the database.
if !replaced_documents_ids.is_empty() { if !replaced_documents_ids.is_empty() {
let mut deletion_builder = update::DeleteDocuments::new(self.wtxn, self.index)?; let mut deletion_builder = update::DeleteDocuments::new(self.wtxn, self.index)?;
deletion_builder.disable_soft_deletion(self.config.disable_soft_deletion); deletion_builder.strategy(self.config.deletion_strategy);
debug!("documents to delete {:?}", replaced_documents_ids); debug!("documents to delete {:?}", replaced_documents_ids);
deletion_builder.delete_documents(&replaced_documents_ids); deletion_builder.delete_documents(&replaced_documents_ids);
let deleted_documents_result = deletion_builder.execute_inner()?; let deleted_documents_result = deletion_builder.execute_inner()?;

View File

@ -1,6 +1,6 @@
pub use self::available_documents_ids::AvailableDocumentsIds; pub use self::available_documents_ids::AvailableDocumentsIds;
pub use self::clear_documents::ClearDocuments; pub use self::clear_documents::ClearDocuments;
pub use self::delete_documents::{DeleteDocuments, DocumentDeletionResult}; pub use self::delete_documents::{DeleteDocuments, DeletionStrategy, DocumentDeletionResult};
pub use self::facet::bulk::FacetsUpdateBulk; pub use self::facet::bulk::FacetsUpdateBulk;
pub use self::facet::incremental::FacetsUpdateIncrementalInner; pub use self::facet::incremental::FacetsUpdateIncrementalInner;
pub use self::index_documents::{ pub use self::index_documents::{

View File

@ -163,7 +163,7 @@ mod tests {
use crate::db_snap; use crate::db_snap;
use crate::documents::{DocumentsBatchBuilder, DocumentsBatchReader}; use crate::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
use crate::index::tests::TempIndex; use crate::index::tests::TempIndex;
use crate::update::{DeleteDocuments, IndexDocumentsMethod}; use crate::update::{DeleteDocuments, DeletionStrategy, IndexDocumentsMethod};
fn documents_with_enough_different_words_for_prefixes( fn documents_with_enough_different_words_for_prefixes(
prefixes: &[&str], prefixes: &[&str],
@ -351,7 +351,7 @@ mod tests {
let mut wtxn = index.write_txn().unwrap(); let mut wtxn = index.write_txn().unwrap();
let mut delete = DeleteDocuments::new(&mut wtxn, &index).unwrap(); let mut delete = DeleteDocuments::new(&mut wtxn, &index).unwrap();
delete.disable_soft_deletion(true); delete.strategy(DeletionStrategy::AlwaysHard);
delete.delete_documents(&RoaringBitmap::from_iter([50])); delete.delete_documents(&RoaringBitmap::from_iter([50]));
delete.execute().unwrap(); delete.execute().unwrap();
wtxn.commit().unwrap(); wtxn.commit().unwrap();
@ -363,7 +363,7 @@ mod tests {
let mut wtxn = index.write_txn().unwrap(); let mut wtxn = index.write_txn().unwrap();
let mut delete = DeleteDocuments::new(&mut wtxn, &index).unwrap(); let mut delete = DeleteDocuments::new(&mut wtxn, &index).unwrap();
delete.disable_soft_deletion(true); delete.strategy(DeletionStrategy::AlwaysHard);
delete.delete_documents(&RoaringBitmap::from_iter(0..50)); delete.delete_documents(&RoaringBitmap::from_iter(0..50));
delete.execute().unwrap(); delete.execute().unwrap();
wtxn.commit().unwrap(); wtxn.commit().unwrap();
@ -435,6 +435,7 @@ mod tests {
let mut wtxn = index.write_txn().unwrap(); let mut wtxn = index.write_txn().unwrap();
let mut delete = DeleteDocuments::new(&mut wtxn, &index).unwrap(); let mut delete = DeleteDocuments::new(&mut wtxn, &index).unwrap();
delete.strategy(DeletionStrategy::AlwaysSoft);
delete.delete_documents(&RoaringBitmap::from_iter([50])); delete.delete_documents(&RoaringBitmap::from_iter([50]));
delete.execute().unwrap(); delete.execute().unwrap();
wtxn.commit().unwrap(); wtxn.commit().unwrap();
@ -446,6 +447,8 @@ mod tests {
let mut wtxn = index.write_txn().unwrap(); let mut wtxn = index.write_txn().unwrap();
let mut delete = DeleteDocuments::new(&mut wtxn, &index).unwrap(); let mut delete = DeleteDocuments::new(&mut wtxn, &index).unwrap();
delete.strategy(DeletionStrategy::AlwaysSoft);
delete.delete_documents(&RoaringBitmap::from_iter(0..50)); delete.delete_documents(&RoaringBitmap::from_iter(0..50));
delete.execute().unwrap(); delete.execute().unwrap();
wtxn.commit().unwrap(); wtxn.commit().unwrap();
@ -471,6 +474,7 @@ mod tests {
let mut index = TempIndex::new(); let mut index = TempIndex::new();
index.index_documents_config.words_prefix_threshold = Some(50); index.index_documents_config.words_prefix_threshold = Some(50);
index.index_documents_config.update_method = IndexDocumentsMethod::ReplaceDocuments; index.index_documents_config.update_method = IndexDocumentsMethod::ReplaceDocuments;
index.index_documents_config.deletion_strategy = DeletionStrategy::AlwaysSoft;
index index
.update_settings(|settings| { .update_settings(|settings| {
@ -530,7 +534,7 @@ mod tests {
fn replace_hard_deletion() { fn replace_hard_deletion() {
let mut index = TempIndex::new(); let mut index = TempIndex::new();
index.index_documents_config.words_prefix_threshold = Some(50); index.index_documents_config.words_prefix_threshold = Some(50);
index.index_documents_config.disable_soft_deletion = true; index.index_documents_config.deletion_strategy = DeletionStrategy::AlwaysHard;
index.index_documents_config.update_method = IndexDocumentsMethod::ReplaceDocuments; index.index_documents_config.update_method = IndexDocumentsMethod::ReplaceDocuments;
index index