747: Soft-deletion computation no longer depends on the mapsize r=irevoire a=dureuill

# Pull Request

## Related issue

Related to https://github.com/meilisearch/meilisearch/issues/3231: After removing `--max-index-size`, the `mapsize` will always be unrelated to the actual max size the user wants for their DB, so it doesn't make sense to use these values any longer.

This implements solution 2.3 from https://github.com/meilisearch/meilisearch/issues/3231#issuecomment-1348628824

## What does this PR do?

### User-visible

- Soft-deleted are no longer deleted when there is less than 10% of the mapsize available or when they take more than 10% of the mapsize
- Instead, they are deleted when they are more soft deleted than regular documents, or when they take more than 1GiB disk space (estimated).

### Implementation standpoint

1. Adds a `DeletionStrategy` struct to replace the boolean `disable_soft_deletion` that we had up until now. This enum allows us to specify that we want "always hard", "always soft", or to use the dynamic soft-deletion strategy (default).
2. Uses the current strategy when deleting documents, with the new heuristics being used in the `DeletionStrategy::Dynamic` variant.
3. Updates the tests to use the appropriate DeletionStrategy whenever needed (one of `AlwaysHard` or `AlwaysSoft` depending on the test)

Note to reviewers: this PR is optimized for a commit-by-commit review.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
Co-authored-by: Tamo <tamo@meilisearch.com>
This commit is contained in:
bors[bot] 2022-12-19 17:46:18 +00:00 committed by GitHub
commit 97fb64e40e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
49 changed files with 174 additions and 126 deletions

View File

@ -1192,8 +1192,8 @@ pub(crate) mod tests {
use crate::error::{Error, InternalError};
use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS};
use crate::update::{
self, DeleteDocuments, IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod,
IndexerConfig, Settings,
self, DeleteDocuments, DeletionStrategy, IndexDocuments, IndexDocumentsConfig,
IndexDocumentsMethod, IndexerConfig, Settings,
};
use crate::{db_snap, obkv_to_json, Index};
@ -1282,6 +1282,17 @@ pub(crate) mod tests {
builder.execute(drop, || false)?;
Ok(())
}
pub fn delete_document(&self, external_document_id: &str) {
let mut wtxn = self.write_txn().unwrap();
let mut delete = DeleteDocuments::new(&mut wtxn, &self).unwrap();
delete.strategy(self.index_documents_config.deletion_strategy);
delete.delete_external_id(external_document_id);
delete.execute().unwrap();
wtxn.commit().unwrap();
}
}
#[test]
@ -1487,7 +1498,9 @@ pub(crate) mod tests {
use big_s::S;
use maplit::hashset;
let index = TempIndex::new();
let mut index = TempIndex::new();
index.index_documents_config.deletion_strategy = DeletionStrategy::AlwaysSoft;
let index = index;
index
.update_settings(|settings| {
@ -1657,7 +1670,8 @@ pub(crate) mod tests {
}
// Second Batch: replace the documents with soft-deletion
{
index.index_documents_config.disable_soft_deletion = false;
index.index_documents_config.deletion_strategy =
crate::update::DeletionStrategy::AlwaysSoft;
let mut docs1 = vec![];
for i in 0..3 {
docs1.push(serde_json::json!(
@ -1726,7 +1740,7 @@ pub(crate) mod tests {
drop(rtxn);
// Third Batch: replace the documents with soft-deletion again
{
index.index_documents_config.disable_soft_deletion = false;
index.index_documents_config.deletion_strategy = DeletionStrategy::AlwaysSoft;
let mut docs1 = vec![];
for i in 0..3 {
docs1.push(serde_json::json!(
@ -1795,7 +1809,7 @@ pub(crate) mod tests {
// Fourth Batch: replace the documents without soft-deletion
{
index.index_documents_config.disable_soft_deletion = true;
index.index_documents_config.deletion_strategy = DeletionStrategy::AlwaysHard;
let mut docs1 = vec![];
for i in 0..3 {
docs1.push(serde_json::json!(
@ -1867,6 +1881,7 @@ pub(crate) mod tests {
fn bug_3021_first() {
// https://github.com/meilisearch/meilisearch/issues/3021
let mut index = TempIndex::new();
index.index_documents_config.deletion_strategy = DeletionStrategy::AlwaysSoft;
index.index_documents_config.update_method = IndexDocumentsMethod::ReplaceDocuments;
index
@ -1891,11 +1906,7 @@ pub(crate) mod tests {
"###);
db_snap!(index, soft_deleted_documents_ids, 1, @"[]");
let mut wtxn = index.write_txn().unwrap();
let mut delete = DeleteDocuments::new(&mut wtxn, &index).unwrap();
delete.delete_external_id("34");
delete.execute().unwrap();
wtxn.commit().unwrap();
index.delete_document("34");
db_snap!(index, documents_ids, @"[0, ]");
db_snap!(index, external_documents_ids, 2, @r###"
@ -1936,11 +1947,7 @@ pub(crate) mod tests {
db_snap!(index, soft_deleted_documents_ids, 4, @"[]");
// We do the test again, but deleting the document with id 0 instead of id 1 now
let mut wtxn = index.write_txn().unwrap();
let mut delete = DeleteDocuments::new(&mut wtxn, &index).unwrap();
delete.delete_external_id("38");
delete.execute().unwrap();
wtxn.commit().unwrap();
index.delete_document("38");
db_snap!(index, documents_ids, @"[1, ]");
db_snap!(index, external_documents_ids, 5, @r###"
@ -1987,6 +1994,7 @@ pub(crate) mod tests {
fn bug_3021_second() {
// https://github.com/meilisearch/meilisearch/issues/3021
let mut index = TempIndex::new();
index.index_documents_config.deletion_strategy = DeletionStrategy::AlwaysSoft;
index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments;
index
@ -2011,11 +2019,7 @@ pub(crate) mod tests {
"###);
db_snap!(index, soft_deleted_documents_ids, 1, @"[]");
let mut wtxn = index.write_txn().unwrap();
let mut delete = DeleteDocuments::new(&mut wtxn, &index).unwrap();
delete.delete_external_id("34");
delete.execute().unwrap();
wtxn.commit().unwrap();
index.delete_document("34");
db_snap!(index, documents_ids, @"[0, ]");
db_snap!(index, external_documents_ids, 2, @r###"
@ -2116,6 +2120,7 @@ pub(crate) mod tests {
fn bug_3021_third() {
// https://github.com/meilisearch/meilisearch/issues/3021
let mut index = TempIndex::new();
index.index_documents_config.deletion_strategy = DeletionStrategy::AlwaysSoft;
index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments;
index
@ -2142,11 +2147,7 @@ pub(crate) mod tests {
"###);
db_snap!(index, soft_deleted_documents_ids, 1, @"[]");
let mut wtxn = index.write_txn().unwrap();
let mut delete = DeleteDocuments::new(&mut wtxn, &index).unwrap();
delete.delete_external_id("3");
delete.execute().unwrap();
wtxn.commit().unwrap();
index.delete_document("3");
db_snap!(index, documents_ids, @"[1, 2, ]");
db_snap!(index, external_documents_ids, 2, @r###"
@ -2158,7 +2159,7 @@ pub(crate) mod tests {
"###);
db_snap!(index, soft_deleted_documents_ids, 2, @"[0, ]");
index.index_documents_config.disable_soft_deletion = true;
index.index_documents_config.deletion_strategy = DeletionStrategy::AlwaysHard;
index.add_documents(documents!([{ "primary_key": "4", "a": 2 }])).unwrap();

View File

@ -26,7 +26,7 @@ pub struct DeleteDocuments<'t, 'u, 'i> {
index: &'i Index,
external_documents_ids: ExternalDocumentsIds<'static>,
to_delete_docids: RoaringBitmap,
disable_soft_deletion: bool,
strategy: DeletionStrategy,
}
/// Result of a [`DeleteDocuments`] operation.
@ -36,6 +36,36 @@ pub struct DocumentDeletionResult {
pub remaining_documents: u64,
}
/// Strategy for deleting documents.
///
/// - Soft-deleted documents are simply marked as deleted without being actually removed from DB.
/// - Hard-deleted documents are definitely suppressed from the DB.
///
/// Soft-deleted documents trade disk space for runtime performance.
///
/// Note that any of these variants can be used at any given moment for any indexation in a database.
/// For instance, you can use an [`AlwaysSoft`] followed by an [`AlwaysHard`] option without issue.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
pub enum DeletionStrategy {
#[default]
/// Definitely suppress documents according to the number or size of soft-deleted documents
Dynamic,
/// Never definitely suppress documents
AlwaysSoft,
/// Always definitely suppress documents
AlwaysHard,
}
impl std::fmt::Display for DeletionStrategy {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
DeletionStrategy::Dynamic => write!(f, "dynamic"),
DeletionStrategy::AlwaysSoft => write!(f, "always_soft"),
DeletionStrategy::AlwaysHard => write!(f, "always_hard"),
}
}
}
/// Result of a [`DeleteDocuments`] operation, used for internal purposes.
///
/// It is a superset of the [`DocumentDeletionResult`] structure, giving
@ -59,12 +89,12 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
index,
external_documents_ids,
to_delete_docids: RoaringBitmap::new(),
disable_soft_deletion: false,
strategy: Default::default(),
})
}
pub fn disable_soft_deletion(&mut self, disable: bool) {
self.disable_soft_deletion = disable;
pub fn strategy(&mut self, strategy: DeletionStrategy) {
self.strategy = strategy;
}
pub fn delete_document(&mut self, docid: u32) {
@ -156,33 +186,34 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
soft_deleted_docids |= &self.to_delete_docids;
// if we have less documents to delete than the threshold we simply save them in
// the `soft_deleted_documents_ids` bitmap and early exit.
let size_used = self.index.used_size()?;
let map_size = self.index.env.map_size()? as u64;
let nb_documents = self.index.number_of_documents(self.wtxn)?;
let nb_soft_deleted = soft_deleted_docids.len();
// decide for a hard or soft deletion depending on the strategy
let soft_deletion = match self.strategy {
DeletionStrategy::Dynamic => {
// decide to keep the soft deleted in the DB for now if they meet 2 criteria:
// 1. There is less than a fixed rate of 50% of soft-deleted to actual documents, *and*
// 2. Soft-deleted occupy an average of less than a fixed size on disk
let percentage_available = 100 - (size_used * 100 / map_size);
let estimated_document_size = size_used / (nb_documents + nb_soft_deleted);
let estimated_size_used_by_soft_deleted = estimated_document_size * nb_soft_deleted;
let percentage_used_by_soft_deleted_documents =
estimated_size_used_by_soft_deleted * 100 / map_size;
let size_used = self.index.used_size()?;
let nb_documents = self.index.number_of_documents(self.wtxn)?;
let nb_soft_deleted = soft_deleted_docids.len();
// if we have more than 10% of disk space available and the soft deleted
// documents uses less than 10% of the total space available,
// we skip the deletion. Eg.
// - With 100Go of disk and 20Go used including 5Go of soft-deleted documents
// We dont delete anything.
// - With 100Go of disk and 95Go used including 1mo of soft-deleted documents
// We run the deletion.
// - With 100Go of disk and 50Go used including 15Go of soft-deleted documents
// We run the deletion.
(nb_soft_deleted < nb_documents) && {
const SOFT_DELETED_SIZE_BYTE_THRESHOLD: u64 = 1_073_741_824; // 1GiB
if !self.disable_soft_deletion
&& percentage_available > 10
&& percentage_used_by_soft_deleted_documents < 10
{
// nb_documents + nb_soft_deleted !=0 because if nb_documents is 0 we short-circuit earlier, and then we moved the documents to delete
// from the documents_docids to the soft_deleted_docids.
let estimated_document_size = size_used / (nb_documents + nb_soft_deleted);
let estimated_size_used_by_soft_deleted =
estimated_document_size * nb_soft_deleted;
estimated_size_used_by_soft_deleted < SOFT_DELETED_SIZE_BYTE_THRESHOLD
}
}
DeletionStrategy::AlwaysSoft => true,
DeletionStrategy::AlwaysHard => false,
};
if soft_deletion {
// Keep the soft-deleted in the DB
self.index.put_soft_deleted_documents_ids(self.wtxn, &soft_deleted_docids)?;
return Ok(DetailedDocumentDeletionResult {
deleted_documents: self.to_delete_docids.len(),
@ -191,7 +222,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
});
}
// There is more than documents to delete than the threshold we needs to delete them all
// Erase soft-deleted from DB
self.to_delete_docids = soft_deleted_docids;
// and we can reset the soft deleted bitmap
self.index.put_soft_deleted_documents_ids(self.wtxn, &RoaringBitmap::new())?;
@ -654,7 +685,7 @@ mod tests {
wtxn: &mut RwTxn<'t, '_>,
index: &'t Index,
external_ids: &[&str],
disable_soft_deletion: bool,
strategy: DeletionStrategy,
) -> Vec<u32> {
let external_document_ids = index.external_documents_ids(wtxn).unwrap();
let ids_to_delete: Vec<u32> = external_ids
@ -664,14 +695,14 @@ mod tests {
// Delete some documents.
let mut builder = DeleteDocuments::new(wtxn, index).unwrap();
builder.disable_soft_deletion(disable_soft_deletion);
builder.strategy(strategy);
external_ids.iter().for_each(|id| drop(builder.delete_external_id(id)));
builder.execute().unwrap();
ids_to_delete
}
fn delete_documents_with_numbers_as_primary_key_(disable_soft_deletion: bool) {
fn delete_documents_with_numbers_as_primary_key_(deletion_strategy: DeletionStrategy) {
let index = TempIndex::new();
let mut wtxn = index.write_txn().unwrap();
@ -691,17 +722,17 @@ mod tests {
builder.delete_document(0);
builder.delete_document(1);
builder.delete_document(2);
builder.disable_soft_deletion(disable_soft_deletion);
builder.strategy(deletion_strategy);
builder.execute().unwrap();
wtxn.commit().unwrap();
// All these snapshots should be empty since the database was cleared
db_snap!(index, documents_ids, disable_soft_deletion);
db_snap!(index, word_docids, disable_soft_deletion);
db_snap!(index, word_pair_proximity_docids, disable_soft_deletion);
db_snap!(index, facet_id_exists_docids, disable_soft_deletion);
db_snap!(index, soft_deleted_documents_ids, disable_soft_deletion);
db_snap!(index, documents_ids, deletion_strategy);
db_snap!(index, word_docids, deletion_strategy);
db_snap!(index, word_pair_proximity_docids, deletion_strategy);
db_snap!(index, facet_id_exists_docids, deletion_strategy);
db_snap!(index, soft_deleted_documents_ids, deletion_strategy);
let rtxn = index.read_txn().unwrap();
@ -710,11 +741,11 @@ mod tests {
#[test]
fn delete_documents_with_numbers_as_primary_key() {
delete_documents_with_numbers_as_primary_key_(true);
delete_documents_with_numbers_as_primary_key_(false);
delete_documents_with_numbers_as_primary_key_(DeletionStrategy::AlwaysHard);
delete_documents_with_numbers_as_primary_key_(DeletionStrategy::AlwaysSoft);
}
fn delete_documents_with_strange_primary_key_(disable_soft_deletion: bool) {
fn delete_documents_with_strange_primary_key_(strategy: DeletionStrategy) {
let index = TempIndex::new();
index
@ -740,24 +771,24 @@ mod tests {
let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
builder.delete_external_id("0");
builder.delete_external_id("1");
builder.disable_soft_deletion(disable_soft_deletion);
builder.strategy(strategy);
builder.execute().unwrap();
wtxn.commit().unwrap();
db_snap!(index, documents_ids, disable_soft_deletion);
db_snap!(index, word_docids, disable_soft_deletion);
db_snap!(index, word_pair_proximity_docids, disable_soft_deletion);
db_snap!(index, soft_deleted_documents_ids, disable_soft_deletion);
db_snap!(index, documents_ids, strategy);
db_snap!(index, word_docids, strategy);
db_snap!(index, word_pair_proximity_docids, strategy);
db_snap!(index, soft_deleted_documents_ids, strategy);
}
#[test]
fn delete_documents_with_strange_primary_key() {
delete_documents_with_strange_primary_key_(true);
delete_documents_with_strange_primary_key_(false);
delete_documents_with_strange_primary_key_(DeletionStrategy::AlwaysHard);
delete_documents_with_strange_primary_key_(DeletionStrategy::AlwaysSoft);
}
fn filtered_placeholder_search_should_not_return_deleted_documents_(
disable_soft_deletion: bool,
deletion_strategy: DeletionStrategy,
) {
let index = TempIndex::new();
@ -801,7 +832,7 @@ mod tests {
)
.unwrap();
delete_documents(&mut wtxn, &index, &["1_4", "1_70", "1_72"], disable_soft_deletion);
delete_documents(&mut wtxn, &index, &["1_4", "1_70", "1_72"], deletion_strategy);
// Placeholder search with filter
let filter = Filter::from_str("label = sign").unwrap().unwrap();
@ -810,21 +841,27 @@ mod tests {
wtxn.commit().unwrap();
db_snap!(index, soft_deleted_documents_ids, disable_soft_deletion);
db_snap!(index, word_docids, disable_soft_deletion);
db_snap!(index, facet_id_f64_docids, disable_soft_deletion);
db_snap!(index, word_pair_proximity_docids, disable_soft_deletion);
db_snap!(index, facet_id_exists_docids, disable_soft_deletion);
db_snap!(index, facet_id_string_docids, disable_soft_deletion);
db_snap!(index, soft_deleted_documents_ids, deletion_strategy);
db_snap!(index, word_docids, deletion_strategy);
db_snap!(index, facet_id_f64_docids, deletion_strategy);
db_snap!(index, word_pair_proximity_docids, deletion_strategy);
db_snap!(index, facet_id_exists_docids, deletion_strategy);
db_snap!(index, facet_id_string_docids, deletion_strategy);
}
#[test]
fn filtered_placeholder_search_should_not_return_deleted_documents() {
filtered_placeholder_search_should_not_return_deleted_documents_(true);
filtered_placeholder_search_should_not_return_deleted_documents_(false);
filtered_placeholder_search_should_not_return_deleted_documents_(
DeletionStrategy::AlwaysHard,
);
filtered_placeholder_search_should_not_return_deleted_documents_(
DeletionStrategy::AlwaysSoft,
);
}
fn placeholder_search_should_not_return_deleted_documents_(disable_soft_deletion: bool) {
fn placeholder_search_should_not_return_deleted_documents_(
deletion_strategy: DeletionStrategy,
) {
let index = TempIndex::new();
let mut wtxn = index.write_txn().unwrap();
@ -865,8 +902,7 @@ mod tests {
)
.unwrap();
let deleted_internal_ids =
delete_documents(&mut wtxn, &index, &["1_4"], disable_soft_deletion);
let deleted_internal_ids = delete_documents(&mut wtxn, &index, &["1_4"], deletion_strategy);
// Placeholder search
let results = index.search(&wtxn).execute().unwrap();
@ -884,11 +920,11 @@ mod tests {
#[test]
fn placeholder_search_should_not_return_deleted_documents() {
placeholder_search_should_not_return_deleted_documents_(true);
placeholder_search_should_not_return_deleted_documents_(false);
placeholder_search_should_not_return_deleted_documents_(DeletionStrategy::AlwaysHard);
placeholder_search_should_not_return_deleted_documents_(DeletionStrategy::AlwaysSoft);
}
fn search_should_not_return_deleted_documents_(disable_soft_deletion: bool) {
fn search_should_not_return_deleted_documents_(deletion_strategy: DeletionStrategy) {
let index = TempIndex::new();
let mut wtxn = index.write_txn().unwrap();
@ -930,7 +966,7 @@ mod tests {
.unwrap();
let deleted_internal_ids =
delete_documents(&mut wtxn, &index, &["1_7", "1_52"], disable_soft_deletion);
delete_documents(&mut wtxn, &index, &["1_7", "1_52"], deletion_strategy);
// search for abstract
let results = index.search(&wtxn).query("abstract").execute().unwrap();
@ -945,17 +981,17 @@ mod tests {
wtxn.commit().unwrap();
db_snap!(index, soft_deleted_documents_ids, disable_soft_deletion);
db_snap!(index, soft_deleted_documents_ids, deletion_strategy);
}
#[test]
fn search_should_not_return_deleted_documents() {
search_should_not_return_deleted_documents_(true);
search_should_not_return_deleted_documents_(false);
search_should_not_return_deleted_documents_(DeletionStrategy::AlwaysHard);
search_should_not_return_deleted_documents_(DeletionStrategy::AlwaysSoft);
}
fn geo_filtered_placeholder_search_should_not_return_deleted_documents_(
disable_soft_deletion: bool,
deletion_strategy: DeletionStrategy,
) {
let index = TempIndex::new();
@ -993,7 +1029,7 @@ mod tests {
let external_ids_to_delete = ["5", "6", "7", "12", "17", "19"];
let deleted_internal_ids =
delete_documents(&mut wtxn, &index, &external_ids_to_delete, disable_soft_deletion);
delete_documents(&mut wtxn, &index, &external_ids_to_delete, deletion_strategy);
// Placeholder search with geo filter
let filter = Filter::from_str("_geoRadius(50.6924, 3.1763, 20000)").unwrap().unwrap();
@ -1009,18 +1045,22 @@ mod tests {
wtxn.commit().unwrap();
db_snap!(index, soft_deleted_documents_ids, disable_soft_deletion);
db_snap!(index, facet_id_f64_docids, disable_soft_deletion);
db_snap!(index, facet_id_string_docids, disable_soft_deletion);
db_snap!(index, soft_deleted_documents_ids, deletion_strategy);
db_snap!(index, facet_id_f64_docids, deletion_strategy);
db_snap!(index, facet_id_string_docids, deletion_strategy);
}
#[test]
fn geo_filtered_placeholder_search_should_not_return_deleted_documents() {
geo_filtered_placeholder_search_should_not_return_deleted_documents_(true);
geo_filtered_placeholder_search_should_not_return_deleted_documents_(false);
geo_filtered_placeholder_search_should_not_return_deleted_documents_(
DeletionStrategy::AlwaysHard,
);
geo_filtered_placeholder_search_should_not_return_deleted_documents_(
DeletionStrategy::AlwaysSoft,
);
}
fn get_documents_should_not_return_deleted_documents_(disable_soft_deletion: bool) {
fn get_documents_should_not_return_deleted_documents_(deletion_strategy: DeletionStrategy) {
let index = TempIndex::new();
let mut wtxn = index.write_txn().unwrap();
@ -1063,7 +1103,7 @@ mod tests {
let deleted_external_ids = ["1_7", "1_52"];
let deleted_internal_ids =
delete_documents(&mut wtxn, &index, &deleted_external_ids, disable_soft_deletion);
delete_documents(&mut wtxn, &index, &deleted_external_ids, deletion_strategy);
// list all documents
let results = index.all_documents(&wtxn).unwrap();
@ -1094,16 +1134,16 @@ mod tests {
wtxn.commit().unwrap();
db_snap!(index, soft_deleted_documents_ids, disable_soft_deletion);
db_snap!(index, soft_deleted_documents_ids, deletion_strategy);
}
#[test]
fn get_documents_should_not_return_deleted_documents() {
get_documents_should_not_return_deleted_documents_(true);
get_documents_should_not_return_deleted_documents_(false);
get_documents_should_not_return_deleted_documents_(DeletionStrategy::AlwaysHard);
get_documents_should_not_return_deleted_documents_(DeletionStrategy::AlwaysSoft);
}
fn stats_should_not_return_deleted_documents_(disable_soft_deletion: bool) {
fn stats_should_not_return_deleted_documents_(deletion_strategy: DeletionStrategy) {
let index = TempIndex::new();
let mut wtxn = index.write_txn().unwrap();
@ -1137,7 +1177,7 @@ mod tests {
{ "docid": "1_69", "label": ["geometry"]}
])).unwrap();
delete_documents(&mut wtxn, &index, &["1_7", "1_52"], disable_soft_deletion);
delete_documents(&mut wtxn, &index, &["1_7", "1_52"], deletion_strategy);
// count internal documents
let results = index.number_of_documents(&wtxn).unwrap();
@ -1151,12 +1191,12 @@ mod tests {
wtxn.commit().unwrap();
db_snap!(index, soft_deleted_documents_ids, disable_soft_deletion);
db_snap!(index, soft_deleted_documents_ids, deletion_strategy);
}
#[test]
fn stats_should_not_return_deleted_documents() {
stats_should_not_return_deleted_documents_(true);
stats_should_not_return_deleted_documents_(false);
stats_should_not_return_deleted_documents_(DeletionStrategy::AlwaysHard);
stats_should_not_return_deleted_documents_(DeletionStrategy::AlwaysSoft);
}
}

View File

@ -122,7 +122,7 @@ mod tests {
use crate::documents::documents_batch_reader_from_objects;
use crate::index::tests::TempIndex;
use crate::update::facet::test_helpers::ordered_string;
use crate::update::DeleteDocuments;
use crate::update::{DeleteDocuments, DeletionStrategy};
#[test]
fn delete_mixed_incremental_and_bulk() {
@ -165,7 +165,7 @@ mod tests {
let mut wtxn = index.env.write_txn().unwrap();
let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
builder.disable_soft_deletion(true);
builder.strategy(DeletionStrategy::AlwaysHard);
builder.delete_documents(&RoaringBitmap::from_iter(0..100));
// by deleting the first 100 documents, we expect that:
// - the "id" part of the DB will be updated in bulk, since #affected_facet_value = 100 which is > database_len / 150 (= 13)
@ -224,7 +224,7 @@ mod tests {
let mut wtxn = index.env.write_txn().unwrap();
let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
builder.disable_soft_deletion(true);
builder.strategy(DeletionStrategy::AlwaysHard);
builder.delete_documents(&RoaringBitmap::from_iter(0..100));
// by deleting the first 100 documents, we expect that:
// - the "id" part of the DB will be updated in bulk, since #affected_facet_value = 100 which is > database_len / 150 (= 13)
@ -283,7 +283,7 @@ mod tests {
for docid in docids_to_delete.into_iter().take(990) {
let mut wtxn = index.env.write_txn().unwrap();
let mut builder = DeleteDocuments::new(&mut wtxn, &index).unwrap();
builder.disable_soft_deletion(true);
builder.strategy(DeletionStrategy::AlwaysHard);
builder.delete_documents(&RoaringBitmap::from_iter([docid]));
builder.execute().unwrap();
wtxn.commit().unwrap();

View File

@ -463,11 +463,14 @@ mod tests {
use crate::db_snap;
use crate::documents::documents_batch_reader_from_objects;
use crate::index::tests::TempIndex;
use crate::update::DeletionStrategy;
#[test]
fn replace_all_identical_soft_deletion_then_hard_deletion() {
let mut index = TempIndex::new_with_map_size(4096 * 1000 * 100);
index.index_documents_config.deletion_strategy = DeletionStrategy::AlwaysSoft;
index
.update_settings(|settings| {
settings.set_primary_key("id".to_owned());
@ -521,7 +524,7 @@ mod tests {
db_snap!(index, soft_deleted_documents_ids, "replaced_1_soft", @"6c975deb900f286d2f6456d2d5c3a123");
// Then replace the last document while disabling soft_deletion
index.index_documents_config.disable_soft_deletion = true;
index.index_documents_config.deletion_strategy = DeletionStrategy::AlwaysHard;
let mut documents = vec![];
for i in 999..1000 {
documents.push(

View File

@ -35,8 +35,8 @@ use crate::documents::{obkv_to_object, DocumentsBatchReader};
use crate::error::{Error, InternalError, UserError};
pub use crate::update::index_documents::helpers::CursorClonableMmap;
use crate::update::{
self, IndexerConfig, PrefixWordPairsProximityDocids, UpdateIndexingStep, WordPrefixDocids,
WordPrefixPositionDocids, WordsPrefixesFst,
self, DeletionStrategy, IndexerConfig, PrefixWordPairsProximityDocids, UpdateIndexingStep,
WordPrefixDocids, WordPrefixPositionDocids, WordsPrefixesFst,
};
use crate::{Index, Result, RoaringBitmapCodec};
@ -88,7 +88,7 @@ pub struct IndexDocumentsConfig {
pub words_positions_level_group_size: Option<NonZeroU32>,
pub words_positions_min_level_size: Option<NonZeroU32>,
pub update_method: IndexDocumentsMethod,
pub disable_soft_deletion: bool,
pub deletion_strategy: DeletionStrategy,
pub autogenerate_docids: bool,
}
@ -332,7 +332,7 @@ where
// able to simply insert all the documents even if they already exist in the database.
if !replaced_documents_ids.is_empty() {
let mut deletion_builder = update::DeleteDocuments::new(self.wtxn, self.index)?;
deletion_builder.disable_soft_deletion(self.config.disable_soft_deletion);
deletion_builder.strategy(self.config.deletion_strategy);
debug!("documents to delete {:?}", replaced_documents_ids);
deletion_builder.delete_documents(&replaced_documents_ids);
let deleted_documents_result = deletion_builder.execute_inner()?;

View File

@ -1,6 +1,6 @@
pub use self::available_documents_ids::AvailableDocumentsIds;
pub use self::clear_documents::ClearDocuments;
pub use self::delete_documents::{DeleteDocuments, DocumentDeletionResult};
pub use self::delete_documents::{DeleteDocuments, DeletionStrategy, DocumentDeletionResult};
pub use self::facet::bulk::FacetsUpdateBulk;
pub use self::facet::incremental::FacetsUpdateIncrementalInner;
pub use self::index_documents::{

View File

@ -163,7 +163,7 @@ mod tests {
use crate::db_snap;
use crate::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
use crate::index::tests::TempIndex;
use crate::update::{DeleteDocuments, IndexDocumentsMethod};
use crate::update::{DeleteDocuments, DeletionStrategy, IndexDocumentsMethod};
fn documents_with_enough_different_words_for_prefixes(
prefixes: &[&str],
@ -351,7 +351,7 @@ mod tests {
let mut wtxn = index.write_txn().unwrap();
let mut delete = DeleteDocuments::new(&mut wtxn, &index).unwrap();
delete.disable_soft_deletion(true);
delete.strategy(DeletionStrategy::AlwaysHard);
delete.delete_documents(&RoaringBitmap::from_iter([50]));
delete.execute().unwrap();
wtxn.commit().unwrap();
@ -363,7 +363,7 @@ mod tests {
let mut wtxn = index.write_txn().unwrap();
let mut delete = DeleteDocuments::new(&mut wtxn, &index).unwrap();
delete.disable_soft_deletion(true);
delete.strategy(DeletionStrategy::AlwaysHard);
delete.delete_documents(&RoaringBitmap::from_iter(0..50));
delete.execute().unwrap();
wtxn.commit().unwrap();
@ -435,6 +435,7 @@ mod tests {
let mut wtxn = index.write_txn().unwrap();
let mut delete = DeleteDocuments::new(&mut wtxn, &index).unwrap();
delete.strategy(DeletionStrategy::AlwaysSoft);
delete.delete_documents(&RoaringBitmap::from_iter([50]));
delete.execute().unwrap();
wtxn.commit().unwrap();
@ -446,6 +447,8 @@ mod tests {
let mut wtxn = index.write_txn().unwrap();
let mut delete = DeleteDocuments::new(&mut wtxn, &index).unwrap();
delete.strategy(DeletionStrategy::AlwaysSoft);
delete.delete_documents(&RoaringBitmap::from_iter(0..50));
delete.execute().unwrap();
wtxn.commit().unwrap();
@ -471,6 +474,7 @@ mod tests {
let mut index = TempIndex::new();
index.index_documents_config.words_prefix_threshold = Some(50);
index.index_documents_config.update_method = IndexDocumentsMethod::ReplaceDocuments;
index.index_documents_config.deletion_strategy = DeletionStrategy::AlwaysSoft;
index
.update_settings(|settings| {
@ -530,7 +534,7 @@ mod tests {
fn replace_hard_deletion() {
let mut index = TempIndex::new();
index.index_documents_config.words_prefix_threshold = Some(50);
index.index_documents_config.disable_soft_deletion = true;
index.index_documents_config.deletion_strategy = DeletionStrategy::AlwaysHard;
index.index_documents_config.update_method = IndexDocumentsMethod::ReplaceDocuments;
index