diff --git a/crates/benchmarks/benches/indexing.rs b/crates/benchmarks/benches/indexing.rs index 7c1783a1a..4bd5315ff 100644 --- a/crates/benchmarks/benches/indexing.rs +++ b/crates/benchmarks/benches/indexing.rs @@ -10,7 +10,7 @@ use milli::documents::PrimaryKey; use milli::heed::{EnvOpenOptions, RwTxn}; use milli::progress::Progress; use milli::update::new::indexer; -use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings}; +use milli::update::{IndexerConfig, Settings}; use milli::vector::EmbeddingConfigs; use milli::Index; use rand::seq::SliceRandom; @@ -138,10 +138,9 @@ fn indexing_songs_default(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -205,10 +204,9 @@ fn reindexing_songs_default(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -250,10 +248,9 @@ fn reindexing_songs_default(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -319,10 +316,9 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -396,10 +392,9 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::SMOL_SONGS_1_2, "csv"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -441,10 +436,9 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::SMOL_SONGS_3_4, "csv"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -482,10 +476,9 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::SMOL_SONGS_4_4, "csv"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -549,11 +542,10 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -617,10 +609,9 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::SMOL_SONGS, "csv"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -684,10 +675,9 @@ fn indexing_wiki(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -750,10 +740,9 @@ fn reindexing_wiki(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -795,10 +784,9 @@ fn reindexing_wiki(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -863,10 +851,9 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES, "csv"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -939,11 +926,10 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES_1_2, "csv"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -985,11 +971,10 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES_3_4, "csv"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -1027,11 +1012,10 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::SMOL_WIKI_ARTICLES_4_4, "csv"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -1095,10 +1079,9 @@ fn indexing_movies_default(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::MOVIES, "json"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -1161,10 +1144,9 @@ fn reindexing_movies_default(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::MOVIES, "json"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -1206,10 +1188,9 @@ fn reindexing_movies_default(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::MOVIES, "json"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -1274,10 +1255,9 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::MOVIES, "json"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -1387,10 +1367,9 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::MOVIES_1_2, "json"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -1432,10 +1411,9 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::MOVIES_3_4, "json"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -1473,10 +1451,9 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::MOVIES_4_4, "json"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -1563,10 +1540,9 @@ fn indexing_nested_movies_default(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::NESTED_MOVIES, "json"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -1654,10 +1630,9 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::NESTED_MOVIES, "json"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -1737,10 +1712,9 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::NESTED_MOVIES, "json"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -1804,10 +1778,9 @@ fn indexing_geo(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -1870,10 +1843,9 @@ fn reindexing_geo(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -1915,10 +1887,9 @@ fn reindexing_geo(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer @@ -1983,10 +1954,9 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = - indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = utils::documents_from(datasets_paths::SMOL_ALL_COUNTRIES, "jsonl"); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer diff --git a/crates/benchmarks/benches/utils.rs b/crates/benchmarks/benches/utils.rs index b472b4f6b..5baeca869 100644 --- a/crates/benchmarks/benches/utils.rs +++ b/crates/benchmarks/benches/utils.rs @@ -12,7 +12,7 @@ use memmap2::Mmap; use milli::heed::EnvOpenOptions; use milli::progress::Progress; use milli::update::new::indexer; -use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings}; +use milli::update::{IndexerConfig, Settings}; use milli::vector::EmbeddingConfigs; use milli::{Criterion, Filter, Index, Object, TermsMatchingStrategy}; use serde_json::Value; @@ -99,8 +99,8 @@ pub fn base_setup(conf: &Conf) -> Index { let mut new_fields_ids_map = db_fields_ids_map.clone(); let documents = documents_from(conf.dataset, conf.dataset_format); - let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); - indexer.add_documents(&documents).unwrap(); + let mut indexer = indexer::DocumentOperation::new(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer diff --git a/crates/fuzzers/src/bin/fuzz-indexing.rs b/crates/fuzzers/src/bin/fuzz-indexing.rs index 1216083ca..e26303010 100644 --- a/crates/fuzzers/src/bin/fuzz-indexing.rs +++ b/crates/fuzzers/src/bin/fuzz-indexing.rs @@ -12,7 +12,7 @@ use milli::documents::mmap_from_objects; use milli::heed::EnvOpenOptions; use milli::progress::Progress; use milli::update::new::indexer; -use milli::update::{IndexDocumentsMethod, IndexerConfig}; +use milli::update::IndexerConfig; use milli::vector::EmbeddingConfigs; use milli::Index; use serde_json::Value; @@ -89,9 +89,7 @@ fn main() { let indexer_alloc = Bump::new(); let embedders = EmbeddingConfigs::default(); - let mut indexer = indexer::DocumentOperation::new( - IndexDocumentsMethod::ReplaceDocuments, - ); + let mut indexer = indexer::DocumentOperation::new(); let mut operations = Vec::new(); for op in batch.0 { @@ -115,7 +113,7 @@ fn main() { for op in &operations { match op { Either::Left(documents) => { - indexer.add_documents(documents).unwrap() + indexer.replace_documents(documents).unwrap() } Either::Right(ids) => indexer.delete_documents(ids), } diff --git a/crates/index-scheduler/src/scheduler/autobatcher.rs b/crates/index-scheduler/src/scheduler/autobatcher.rs index 7f55a9254..8f77af185 100644 --- a/crates/index-scheduler/src/scheduler/autobatcher.rs +++ b/crates/index-scheduler/src/scheduler/autobatcher.rs @@ -5,12 +5,8 @@ tasks affecting a single index into a [batch](crate::batch::Batch). The main function of the autobatcher is [`next_autobatch`]. */ -use std::ops::ControlFlow::{self, Break, Continue}; - -use meilisearch_types::milli::update::IndexDocumentsMethod::{ - self, ReplaceDocuments, UpdateDocuments, -}; use meilisearch_types::tasks::TaskId; +use std::ops::ControlFlow::{self, Break, Continue}; use crate::KindWithContent; @@ -19,19 +15,11 @@ use crate::KindWithContent; /// /// Only the non-prioritised tasks that can be grouped in a batch have a corresponding [`AutobatchKind`] enum AutobatchKind { - DocumentImport { - method: IndexDocumentsMethod, - allow_index_creation: bool, - primary_key: Option, - }, + DocumentImport { allow_index_creation: bool, primary_key: Option }, DocumentEdition, - DocumentDeletion { - by_filter: bool, - }, + DocumentDeletion { by_filter: bool }, DocumentClear, - Settings { - allow_index_creation: bool, - }, + Settings { allow_index_creation: bool }, IndexCreation, IndexDeletion, IndexUpdate, @@ -60,11 +48,8 @@ impl From for AutobatchKind { fn from(kind: KindWithContent) -> Self { match kind { KindWithContent::DocumentAdditionOrUpdate { - method, - allow_index_creation, - primary_key, - .. - } => AutobatchKind::DocumentImport { method, allow_index_creation, primary_key }, + allow_index_creation, primary_key, .. + } => AutobatchKind::DocumentImport { allow_index_creation, primary_key }, KindWithContent::DocumentEdition { .. } => AutobatchKind::DocumentEdition, KindWithContent::DocumentDeletion { .. } => { AutobatchKind::DocumentDeletion { by_filter: false } @@ -99,7 +84,6 @@ pub enum BatchKind { ids: Vec, }, DocumentOperation { - method: IndexDocumentsMethod, allow_index_creation: bool, primary_key: Option, operation_ids: Vec, @@ -172,12 +156,11 @@ impl BatchKind { K::IndexUpdate => (Break(BatchKind::IndexUpdate { id: task_id }), false), K::IndexSwap => (Break(BatchKind::IndexSwap { id: task_id }), false), K::DocumentClear => (Continue(BatchKind::DocumentClear { ids: vec![task_id] }), false), - K::DocumentImport { method, allow_index_creation, primary_key: pk } + K::DocumentImport { allow_index_creation, primary_key: pk } if primary_key.is_none() || pk.is_none() || primary_key == pk.as_deref() => { ( Continue(BatchKind::DocumentOperation { - method, allow_index_creation, primary_key: pk, operation_ids: vec![task_id], @@ -186,9 +169,8 @@ impl BatchKind { ) } // if the primary key set in the task was different than ours we should stop and make this batch fail asap. - K::DocumentImport { method, allow_index_creation, primary_key } => ( + K::DocumentImport { allow_index_creation, primary_key } => ( Break(BatchKind::DocumentOperation { - method, allow_index_creation, primary_key, operation_ids: vec![task_id], @@ -257,7 +239,7 @@ impl BatchKind { ( BatchKind::DocumentClear { mut ids } | BatchKind::DocumentDeletion { deletion_ids: mut ids, includes_by_filter: _ } - | BatchKind::DocumentOperation { method: _, allow_index_creation: _, primary_key: _, operation_ids: mut ids } + | BatchKind::DocumentOperation { allow_index_creation: _, primary_key: _, operation_ids: mut ids } | BatchKind::Settings { allow_index_creation: _, settings_ids: mut ids }, K::IndexDeletion, ) => { @@ -285,46 +267,32 @@ impl BatchKind { K::DocumentImport { .. } | K::Settings { .. }, ) => Break(this), ( - BatchKind::DocumentOperation { method: _, allow_index_creation: _, primary_key: _, mut operation_ids }, + BatchKind::DocumentOperation { allow_index_creation: _, primary_key: _, mut operation_ids }, K::DocumentClear, ) => { operation_ids.push(id); Continue(BatchKind::DocumentClear { ids: operation_ids }) } - // we can autobatch the same kind of document additions / updates + // we can autobatch different kind of document operations and mix replacements with updates ( - BatchKind::DocumentOperation { method: ReplaceDocuments, allow_index_creation, primary_key: _, mut operation_ids }, - K::DocumentImport { method: ReplaceDocuments, primary_key: pk, .. }, + BatchKind::DocumentOperation { allow_index_creation, primary_key: _, mut operation_ids }, + K::DocumentImport { primary_key: pk, .. }, ) => { operation_ids.push(id); Continue(BatchKind::DocumentOperation { - method: ReplaceDocuments, allow_index_creation, operation_ids, primary_key: pk, }) } ( - BatchKind::DocumentOperation { method: UpdateDocuments, allow_index_creation, primary_key: _, mut operation_ids }, - K::DocumentImport { method: UpdateDocuments, primary_key: pk, .. }, - ) => { - operation_ids.push(id); - Continue(BatchKind::DocumentOperation { - method: UpdateDocuments, - allow_index_creation, - primary_key: pk, - operation_ids, - }) - } - ( - BatchKind::DocumentOperation { method, allow_index_creation, primary_key, mut operation_ids }, + BatchKind::DocumentOperation { allow_index_creation, primary_key, mut operation_ids }, K::DocumentDeletion { by_filter: false }, ) => { operation_ids.push(id); Continue(BatchKind::DocumentOperation { - method, allow_index_creation, primary_key, operation_ids, @@ -337,13 +305,6 @@ impl BatchKind { ) => { Break(this) } - // but we can't autobatch documents if it's not the same kind - // this match branch MUST be AFTER the previous one - ( - this @ BatchKind::DocumentOperation { .. }, - K::DocumentImport { .. }, - ) => Break(this), - ( this @ BatchKind::DocumentOperation { .. }, K::Settings { .. }, @@ -361,12 +322,11 @@ impl BatchKind { // we can autobatch the deletion and import if the index already exists ( BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter: false }, - K::DocumentImport { method, allow_index_creation, primary_key } + K::DocumentImport { allow_index_creation, primary_key } ) if index_already_exists => { deletion_ids.push(id); Continue(BatchKind::DocumentOperation { - method, allow_index_creation, primary_key, operation_ids: deletion_ids, @@ -375,12 +335,11 @@ impl BatchKind { // we can autobatch the deletion and import if both can't create an index ( BatchKind::DocumentDeletion { mut deletion_ids, includes_by_filter: false }, - K::DocumentImport { method, allow_index_creation, primary_key } + K::DocumentImport { allow_index_creation, primary_key } ) if !allow_index_creation => { deletion_ids.push(id); Continue(BatchKind::DocumentOperation { - method, allow_index_creation, primary_key, operation_ids: deletion_ids, diff --git a/crates/index-scheduler/src/scheduler/autobatcher_test.rs b/crates/index-scheduler/src/scheduler/autobatcher_test.rs index 1e18b276d..486888cf5 100644 --- a/crates/index-scheduler/src/scheduler/autobatcher_test.rs +++ b/crates/index-scheduler/src/scheduler/autobatcher_test.rs @@ -92,29 +92,29 @@ fn idx_swap() -> KindWithContent { fn autobatch_simple_operation_together() { // we can autobatch one or multiple `ReplaceDocuments` together. // if the index exists. - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp( ReplaceDocuments, false , None), doc_imp(ReplaceDocuments, false , None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1, 2] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp( ReplaceDocuments, false , None), doc_imp(ReplaceDocuments, false , None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1, 2] }, false))"); // if it doesn't exists. - debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))"); - debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))"); + debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp( ReplaceDocuments, true , None), doc_imp(ReplaceDocuments, true , None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); // we can autobatch one or multiple `UpdateDocuments` together. // if the index exists. - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1, 2] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1, 2] }, false))"); // if it doesn't exists. - debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))"); - debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1, 2] }, false))"); + debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1, 2] }, true))"); + debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false,None, [doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1, 2] }, false))"); // we can autobatch one or multiple DocumentDeletion together debug_snapshot!(autobatch_from(true, None, [doc_del()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))"); @@ -140,53 +140,53 @@ fn autobatch_simple_operation_together() { debug_snapshot!(autobatch_from(false,None, [settings(false), settings(false), settings(false)]), @"Some((Settings { allow_index_creation: false, settings_ids: [0, 1, 2] }, false))"); // We can autobatch document addition with document deletion - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); - debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))"); - debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))"); - debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); - debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, None), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); - debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###); - debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###); - debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); - debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_del()]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_del()]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); + debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, None), doc_del()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))"); + debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, None), doc_del()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))"); + debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, None), doc_del()]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, None), doc_del()]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###); + debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, true))"###); + debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); + debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del()]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); // And the other way around - debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); - debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, true, Some("catto"))]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); - debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); - debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); - debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); - debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); - debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); - debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); + debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); + debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, true, Some("catto"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); + debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); + debug_snapshot!(autobatch_from(true, None, [doc_del(), doc_imp(UpdateDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); + debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); + debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(UpdateDocuments, false, Some("catto"))]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0, 1] }, false))"###); // But we can't autobatch document addition with document deletion by filter - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###); - debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###); - debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###); - debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###); - debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###); + debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, None), doc_del_fil()]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###); + debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, true, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("catto"), operation_ids: [0] }, true))"###); + debug_snapshot!(autobatch_from(false, None, [doc_imp(ReplaceDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###); + debug_snapshot!(autobatch_from(false, None, [doc_imp(UpdateDocuments, false, Some("catto")), doc_del_fil()]), @r###"Some((DocumentOperation { allow_index_creation: false, primary_key: Some("catto"), operation_ids: [0] }, false))"###); // And the other way around debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); @@ -203,27 +203,27 @@ fn autobatch_simple_operation_together() { } #[test] -fn simple_document_operation_dont_autobatch_with_other() { - // addition, updates and deletion by filter can't batch together - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); +fn simple_different_document_operations_autobatch_together() { + // addition and updates with deletion by filter can't batch together + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_del_fil()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_create()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_create()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_create()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_create()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_create()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))"); debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), idx_create()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_update()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_update()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_update()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_update()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_update()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))"); debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), idx_update()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_swap()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_swap()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), idx_swap()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), idx_swap()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); debug_snapshot!(autobatch_from(true, None, [doc_del(), idx_swap()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))"); debug_snapshot!(autobatch_from(true, None, [doc_del_fil(), idx_swap()]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: true }, false))"); } @@ -231,28 +231,28 @@ fn simple_document_operation_dont_autobatch_with_other() { #[test] fn document_addition_doesnt_batch_with_settings() { // simple case - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); // multiple settings and doc addition - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None), settings(true), settings(true)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))"); // addition and setting unordered - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None), settings(true)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); // Doesn't batch with other forbidden operations - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_del()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_del()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_create()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_create()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_update()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_update()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_swap()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_swap()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_imp(UpdateDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_del()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_del()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_create()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_create()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_update()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_update()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), idx_swap()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), idx_swap()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); } #[test] @@ -280,8 +280,8 @@ fn clear_and_additions_and_settings() { debug_snapshot!(autobatch_from(true, None, [doc_clr(), settings(true)]), @"Some((DocumentClear { ids: [0] }, false))"); debug_snapshot!(autobatch_from(true, None, [settings(true), doc_clr(), settings(true)]), @"Some((ClearAndSettings { other: [1], allow_index_creation: true, settings_ids: [0, 2] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_clr()]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_clr()]), @"Some((DocumentOperation { method: UpdateDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true), doc_clr()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(UpdateDocuments, true, None), settings(true), doc_clr()]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); } #[test] @@ -333,17 +333,17 @@ fn anything_and_index_deletion() { #[test] fn allowed_and_disallowed_index_creation() { // `DocumentImport` can't be mixed with those disallowed to do so except if the index already exists. - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); - debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))"); - debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); - debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); + debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))"); + debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), doc_imp(ReplaceDocuments, false, None)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0, 1] }, false))"); + debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, true, None), settings(true)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(false,None, [doc_imp(ReplaceDocuments, false, None), settings(true)]), @"Some((DocumentOperation { allow_index_creation: false, primary_key: None, operation_ids: [0] }, false))"); // batch deletion and addition debug_snapshot!(autobatch_from(false, None, [doc_del(), doc_imp(ReplaceDocuments, true, Some("catto"))]), @"Some((DocumentDeletion { deletion_ids: [0], includes_by_filter: false }, false))"); @@ -356,40 +356,40 @@ fn allowed_and_disallowed_index_creation() { fn autobatch_primary_key() { // ==> If I have a pk // With a single update - debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###); - debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###); + debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###); + debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###); // With a multiple updates - debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))"); - debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###); - debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###); - debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other"))]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))"); + debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###); + debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###); + debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other"))]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###); - debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###); - debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###); - debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###); - debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###); - debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###); + debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###); + debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###); + debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("id"), operation_ids: [0, 1] }, true))"###); + debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###); + debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###); + debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###); - debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###); - debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###); - debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###); - debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###); - debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###); - debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###); + debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###); + debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###); + debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###); + debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###); + debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###); + debug_snapshot!(autobatch_from(true, Some("id"), [doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("other")), doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###); // ==> If I don't have a pk // With a single update - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("id"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("other"))]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("other"), operation_ids: [0] }, true))"###); // With a multiple updates - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id"))]), @"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); - debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { method: ReplaceDocuments, allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, None)]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0, 1] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, None), doc_imp(ReplaceDocuments, true, Some("id"))]), @"Some((DocumentOperation { allow_index_creation: true, primary_key: None, operation_ids: [0] }, true))"); + debug_snapshot!(autobatch_from(true, None, [doc_imp(ReplaceDocuments, true, Some("id")), doc_imp(ReplaceDocuments, true, None)]), @r###"Some((DocumentOperation { allow_index_creation: true, primary_key: Some("id"), operation_ids: [0] }, true))"###); } diff --git a/crates/index-scheduler/src/scheduler/create_batch.rs b/crates/index-scheduler/src/scheduler/create_batch.rs index 2fc3025d7..10f480d12 100644 --- a/crates/index-scheduler/src/scheduler/create_batch.rs +++ b/crates/index-scheduler/src/scheduler/create_batch.rs @@ -54,7 +54,8 @@ pub(crate) enum Batch { #[derive(Debug)] pub(crate) enum DocumentOperation { - Add(Uuid), + Replace(Uuid), + Update(Uuid), Delete(Vec), } @@ -64,7 +65,6 @@ pub(crate) enum IndexOperation { DocumentOperation { index_uid: String, primary_key: Option, - method: IndexDocumentsMethod, operations: Vec, tasks: Vec, }, @@ -254,7 +254,7 @@ impl IndexScheduler { _ => unreachable!(), } } - BatchKind::DocumentOperation { method, operation_ids, .. } => { + BatchKind::DocumentOperation { operation_ids, .. } => { let tasks = self.queue.get_existing_tasks_for_processing_batch( rtxn, current_batch, @@ -276,9 +276,17 @@ impl IndexScheduler { for task in tasks.iter() { match task.kind { - KindWithContent::DocumentAdditionOrUpdate { content_file, .. } => { - operations.push(DocumentOperation::Add(content_file)); - } + KindWithContent::DocumentAdditionOrUpdate { + content_file, method, .. + } => match method { + IndexDocumentsMethod::ReplaceDocuments => { + operations.push(DocumentOperation::Replace(content_file)) + } + IndexDocumentsMethod::UpdateDocuments => { + operations.push(DocumentOperation::Update(content_file)) + } + _ => unreachable!("Unknown document merging method"), + }, KindWithContent::DocumentDeletion { ref documents_ids, .. } => { operations.push(DocumentOperation::Delete(documents_ids.clone())); } @@ -290,7 +298,6 @@ impl IndexScheduler { op: IndexOperation::DocumentOperation { index_uid, primary_key, - method, operations, tasks, }, diff --git a/crates/index-scheduler/src/scheduler/process_index_operation.rs b/crates/index-scheduler/src/scheduler/process_index_operation.rs index eff3740a0..630ab62e4 100644 --- a/crates/index-scheduler/src/scheduler/process_index_operation.rs +++ b/crates/index-scheduler/src/scheduler/process_index_operation.rs @@ -62,23 +62,21 @@ impl IndexScheduler { Ok(tasks) } - IndexOperation::DocumentOperation { - index_uid, - primary_key, - method, - operations, - mut tasks, - } => { + IndexOperation::DocumentOperation { index_uid, primary_key, operations, mut tasks } => { progress.update_progress(DocumentOperationProgress::RetrievingConfig); // TODO: at some point, for better efficiency we might want to reuse the bumpalo for successive batches. // this is made difficult by the fact we're doing private clones of the index scheduler and sending it // to a fresh thread. let mut content_files = Vec::new(); for operation in &operations { - if let DocumentOperation::Add(content_uuid) = operation { - let content_file = self.queue.file_store.get_update(*content_uuid)?; - let mmap = unsafe { memmap2::Mmap::map(&content_file)? }; - content_files.push(mmap); + match operation { + DocumentOperation::Replace(content_uuid) + | DocumentOperation::Update(content_uuid) => { + let content_file = self.queue.file_store.get_update(*content_uuid)?; + let mmap = unsafe { memmap2::Mmap::map(&content_file)? }; + content_files.push(mmap); + } + _ => (), } } @@ -87,17 +85,23 @@ impl IndexScheduler { let mut new_fields_ids_map = db_fields_ids_map.clone(); let mut content_files_iter = content_files.iter(); - let mut indexer = indexer::DocumentOperation::new(method); + let mut indexer = indexer::DocumentOperation::new(); let embedders = index .embedding_configs(index_wtxn) .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?; let embedders = self.embedders(index_uid.clone(), embedders)?; for operation in operations { match operation { - DocumentOperation::Add(_content_uuid) => { + DocumentOperation::Replace(_content_uuid) => { let mmap = content_files_iter.next().unwrap(); indexer - .add_documents(mmap) + .replace_documents(mmap) + .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?; + } + DocumentOperation::Update(_content_uuid) => { + let mmap = content_files_iter.next().unwrap(); + indexer + .update_documents(mmap) .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?; } DocumentOperation::Delete(document_ids) => { diff --git a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_mixed_document_addition/all_tasks_processed.snap b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_mixed_document_addition/all_tasks_processed.snap index ff617008c..cdab2097c 100644 --- a/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_mixed_document_addition/all_tasks_processed.snap +++ b/crates/index-scheduler/src/scheduler/snapshots/test_document_addition.rs/test_mixed_document_addition/all_tasks_processed.snap @@ -1,6 +1,5 @@ --- source: crates/index-scheduler/src/scheduler/test_document_addition.rs -snapshot_kind: text --- ### Autobatching Enabled = true ### Processing batch None: @@ -8,15 +7,15 @@ snapshot_kind: text ---------------------------------------------------------------------- ### All Tasks: 0 {uid: 0, batch_uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} -1 {uid: 1, batch_uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} -2 {uid: 2, batch_uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: true }} -3 {uid: 3, batch_uid: 3, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: true }} -4 {uid: 4, batch_uid: 4, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: true }} -5 {uid: 5, batch_uid: 5, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: true }} -6 {uid: 6, batch_uid: 6, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: true }} -7 {uid: 7, batch_uid: 7, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: true }} -8 {uid: 8, batch_uid: 8, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: true }} -9 {uid: 9, batch_uid: 9, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: true }} +1 {uid: 1, batch_uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} +2 {uid: 2, batch_uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000002, documents_count: 1, allow_index_creation: true }} +3 {uid: 3, batch_uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000003, documents_count: 1, allow_index_creation: true }} +4 {uid: 4, batch_uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000004, documents_count: 1, allow_index_creation: true }} +5 {uid: 5, batch_uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000005, documents_count: 1, allow_index_creation: true }} +6 {uid: 6, batch_uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000006, documents_count: 1, allow_index_creation: true }} +7 {uid: 7, batch_uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000007, documents_count: 1, allow_index_creation: true }} +8 {uid: 8, batch_uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000008, documents_count: 1, allow_index_creation: true }} +9 {uid: 9, batch_uid: 0, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: ReplaceDocuments, content_file: 00000000-0000-0000-0000-000000000009, documents_count: 1, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: enqueued [] @@ -48,97 +47,35 @@ doggos: { number_of_documents: 10, field_distribution: {"doggo": 10, "id": 10} } [timestamp] [9,] ---------------------------------------------------------------------- ### Started At: -[timestamp] [0,] -[timestamp] [1,] -[timestamp] [2,] -[timestamp] [3,] -[timestamp] [4,] -[timestamp] [5,] -[timestamp] [6,] -[timestamp] [7,] -[timestamp] [8,] -[timestamp] [9,] +[timestamp] [0,1,2,3,4,5,6,7,8,9,] ---------------------------------------------------------------------- ### Finished At: -[timestamp] [0,] -[timestamp] [1,] -[timestamp] [2,] -[timestamp] [3,] -[timestamp] [4,] -[timestamp] [5,] -[timestamp] [6,] -[timestamp] [7,] -[timestamp] [8,] -[timestamp] [9,] +[timestamp] [0,1,2,3,4,5,6,7,8,9,] ---------------------------------------------------------------------- ### All Batches: -0 {uid: 0, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, } -1 {uid: 1, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, } -2 {uid: 2, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, } -3 {uid: 3, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, } -4 {uid: 4, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, } -5 {uid: 5, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, } -6 {uid: 6, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, } -7 {uid: 7, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, } -8 {uid: 8, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, } -9 {uid: 9, details: {"receivedDocuments":1,"indexedDocuments":1}, stats: {"totalNbTasks":1,"status":{"succeeded":1},"types":{"documentAdditionOrUpdate":1},"indexUids":{"doggos":1}}, } +0 {uid: 0, details: {"receivedDocuments":10,"indexedDocuments":10}, stats: {"totalNbTasks":10,"status":{"succeeded":10},"types":{"documentAdditionOrUpdate":10},"indexUids":{"doggos":10}}, } ---------------------------------------------------------------------- ### Batch to tasks mapping: -0 [0,] -1 [1,] -2 [2,] -3 [3,] -4 [4,] -5 [5,] -6 [6,] -7 [7,] -8 [8,] -9 [9,] +0 [0,1,2,3,4,5,6,7,8,9,] ---------------------------------------------------------------------- ### Batches Status: -succeeded [0,1,2,3,4,5,6,7,8,9,] +succeeded [0,] ---------------------------------------------------------------------- ### Batches Kind: -"documentAdditionOrUpdate" [0,1,2,3,4,5,6,7,8,9,] +"documentAdditionOrUpdate" [0,] ---------------------------------------------------------------------- ### Batches Index Tasks: -doggos [0,1,2,3,4,5,6,7,8,9,] +doggos [0,] ---------------------------------------------------------------------- ### Batches Enqueued At: [timestamp] [0,] -[timestamp] [1,] -[timestamp] [2,] -[timestamp] [3,] -[timestamp] [4,] -[timestamp] [5,] -[timestamp] [6,] -[timestamp] [7,] -[timestamp] [8,] -[timestamp] [9,] +[timestamp] [0,] ---------------------------------------------------------------------- ### Batches Started At: [timestamp] [0,] -[timestamp] [1,] -[timestamp] [2,] -[timestamp] [3,] -[timestamp] [4,] -[timestamp] [5,] -[timestamp] [6,] -[timestamp] [7,] -[timestamp] [8,] -[timestamp] [9,] ---------------------------------------------------------------------- ### Batches Finished At: [timestamp] [0,] -[timestamp] [1,] -[timestamp] [2,] -[timestamp] [3,] -[timestamp] [4,] -[timestamp] [5,] -[timestamp] [6,] -[timestamp] [7,] -[timestamp] [8,] -[timestamp] [9,] ---------------------------------------------------------------------- ### File Store: diff --git a/crates/index-scheduler/src/scheduler/test_document_addition.rs b/crates/index-scheduler/src/scheduler/test_document_addition.rs index 96181cbaa..3c0d89d54 100644 --- a/crates/index-scheduler/src/scheduler/test_document_addition.rs +++ b/crates/index-scheduler/src/scheduler/test_document_addition.rs @@ -298,11 +298,8 @@ fn test_mixed_document_addition() { } snapshot!(snapshot_index_scheduler(&index_scheduler), name: "after_registering_the_10_tasks"); - // Only half of the task should've been processed since we can't autobatch replace and update together. - handle.advance_n_successful_batches(5); - snapshot!(snapshot_index_scheduler(&index_scheduler), name: "five_tasks_processed"); - - handle.advance_n_successful_batches(5); + // All tasks should've been batched and processed together since any indexing task (updates with replacements) can be batched together + handle.advance_n_successful_batches(1); snapshot!(snapshot_index_scheduler(&index_scheduler), name: "all_tasks_processed"); // has everything being pushed successfully in milli? diff --git a/crates/milli/src/index.rs b/crates/milli/src/index.rs index 944fb6cd4..6c7534553 100644 --- a/crates/milli/src/index.rs +++ b/crates/milli/src/index.rs @@ -1839,9 +1839,15 @@ pub(crate) mod tests { let embedders = InnerIndexSettings::from_index(&self.inner, &rtxn, None)?.embedding_configs; - let mut indexer = - indexer::DocumentOperation::new(self.index_documents_config.update_method); - indexer.add_documents(&documents).unwrap(); + let mut indexer = indexer::DocumentOperation::new(); + match self.index_documents_config.update_method { + IndexDocumentsMethod::ReplaceDocuments => { + indexer.replace_documents(&documents).unwrap() + } + IndexDocumentsMethod::UpdateDocuments => { + indexer.update_documents(&documents).unwrap() + } + } let indexer_alloc = Bump::new(); let (document_changes, operation_stats, primary_key) = indexer.into_changes( @@ -1928,8 +1934,7 @@ pub(crate) mod tests { let embedders = InnerIndexSettings::from_index(&self.inner, &rtxn, None)?.embedding_configs; - let mut indexer = - indexer::DocumentOperation::new(self.index_documents_config.update_method); + let mut indexer = indexer::DocumentOperation::new(); let external_document_ids: Vec<_> = external_document_ids.iter().map(AsRef::as_ref).collect(); indexer.delete_documents(external_document_ids.as_slice()); @@ -2006,13 +2011,13 @@ pub(crate) mod tests { let mut new_fields_ids_map = db_fields_ids_map.clone(); let embedders = EmbeddingConfigs::default(); - let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let payload = documents!([ { "id": 1, "name": "kevin" }, { "id": 2, "name": "bob", "age": 20 }, { "id": 2, "name": "bob", "age": 20 }, ]); - indexer.add_documents(&payload).unwrap(); + indexer.replace_documents(&payload).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer diff --git a/crates/milli/src/search/new/tests/integration.rs b/crates/milli/src/search/new/tests/integration.rs index 99d5dc033..e60a09ec5 100644 --- a/crates/milli/src/search/new/tests/integration.rs +++ b/crates/milli/src/search/new/tests/integration.rs @@ -7,7 +7,7 @@ use maplit::{btreemap, hashset}; use crate::progress::Progress; use crate::update::new::indexer; -use crate::update::{IndexDocumentsMethod, IndexerConfig, Settings}; +use crate::update::{IndexerConfig, Settings}; use crate::vector::EmbeddingConfigs; use crate::{db_snap, Criterion, Index}; pub const CONTENT: &str = include_str!("../../../../tests/assets/test_set.ndjson"); @@ -55,7 +55,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index { let mut new_fields_ids_map = db_fields_ids_map.clone(); let embedders = EmbeddingConfigs::default(); - let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let mut file = tempfile::tempfile().unwrap(); file.write_all(CONTENT.as_bytes()).unwrap(); @@ -63,7 +63,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index { let payload = unsafe { memmap2::Mmap::map(&file).unwrap() }; // index documents - indexer.add_documents(&payload).unwrap(); + indexer.replace_documents(&payload).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, operation_stats, primary_key) = indexer diff --git a/crates/milli/src/update/index_documents/mod.rs b/crates/milli/src/update/index_documents/mod.rs index 154db7875..56c26ed29 100644 --- a/crates/milli/src/update/index_documents/mod.rs +++ b/crates/milli/src/update/index_documents/mod.rs @@ -771,7 +771,7 @@ mod tests { use crate::search::TermsMatchingStrategy; use crate::update::new::indexer; use crate::update::Setting; - use crate::{db_snap, Filter, Search, UserError}; + use crate::{all_obkv_to_json, db_snap, Filter, Search, UserError}; #[test] fn simple_document_replacement() { @@ -1951,11 +1951,11 @@ mod tests { let db_fields_ids_map = index.inner.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); - let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); - indexer.add_documents(&doc1).unwrap(); - indexer.add_documents(&doc2).unwrap(); - indexer.add_documents(&doc3).unwrap(); - indexer.add_documents(&doc4).unwrap(); + let mut indexer = indexer::DocumentOperation::new(); + indexer.replace_documents(&doc1).unwrap(); + indexer.replace_documents(&doc2).unwrap(); + indexer.replace_documents(&doc3).unwrap(); + indexer.replace_documents(&doc4).unwrap(); let indexer_alloc = Bump::new(); let (_document_changes, operation_stats, _primary_key) = indexer @@ -1974,6 +1974,174 @@ mod tests { assert_eq!(operation_stats.iter().filter(|ps| ps.error.is_some()).count(), 3); } + #[test] + fn mixing_documents_replace_with_updates() { + let index = TempIndex::new_with_map_size(4096 * 100); + + let doc1 = documents! {[{ + "id": 1, + "title": "asdsad", + "description": "Wat wat wat, wat" + }]}; + + let doc2 = documents! {[{ + "id": 1, + "title": "something", + }]}; + + let doc3 = documents! {[{ + "id": 1, + "title": "another something", + }]}; + + let doc4 = documents! {[{ + "id": 1, + "description": "This is it!", + }]}; + + let rtxn = index.inner.read_txn().unwrap(); + let db_fields_ids_map = index.inner.fields_ids_map(&rtxn).unwrap(); + let mut new_fields_ids_map = db_fields_ids_map.clone(); + + let mut indexer = indexer::DocumentOperation::new(); + indexer.replace_documents(&doc1).unwrap(); + indexer.update_documents(&doc2).unwrap(); + indexer.update_documents(&doc3).unwrap(); + indexer.update_documents(&doc4).unwrap(); + + let indexer_alloc = Bump::new(); + let (document_changes, operation_stats, primary_key) = indexer + .into_changes( + &indexer_alloc, + &index.inner, + &rtxn, + None, + &mut new_fields_ids_map, + &|| false, + Progress::default(), + ) + .unwrap(); + + assert_eq!(operation_stats.iter().filter(|ps| ps.error.is_none()).count(), 4); + + let mut wtxn = index.write_txn().unwrap(); + indexer::index( + &mut wtxn, + &index.inner, + &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(), + index.indexer_config.grenad_parameters(), + &db_fields_ids_map, + new_fields_ids_map, + primary_key, + &document_changes, + EmbeddingConfigs::default(), + &|| false, + &Progress::default(), + ) + .unwrap(); + wtxn.commit().unwrap(); + + let rtxn = index.read_txn().unwrap(); + let obkv = index.document(&rtxn, 0).unwrap(); + let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); + + let json_document = all_obkv_to_json(obkv, &fields_ids_map).unwrap(); + let expected = serde_json::json!({ + "id": 1, + "title": "another something", + "description": "This is it!", + }); + let expected = expected.as_object().unwrap(); + assert_eq!(&json_document, expected); + } + + #[test] + fn mixing_documents_replace_with_updates_even_more() { + let index = TempIndex::new_with_map_size(4096 * 100); + + let doc1 = documents! {[{ + "id": 1, + "title": "asdsad", + "description": "Wat wat wat, wat" + }]}; + + let doc2 = documents! {[{ + "id": 1, + "title": "something", + }]}; + + let doc3 = documents! {[{ + "id": 1, + "title": "another something", + }]}; + + let doc4 = documents! {[{ + "id": 1, + "title": "Woooof", + }]}; + + let doc5 = documents! {[{ + "id": 1, + "description": "This is it!", + }]}; + + let rtxn = index.inner.read_txn().unwrap(); + let db_fields_ids_map = index.inner.fields_ids_map(&rtxn).unwrap(); + let mut new_fields_ids_map = db_fields_ids_map.clone(); + + let mut indexer = indexer::DocumentOperation::new(); + indexer.replace_documents(&doc1).unwrap(); + indexer.update_documents(&doc2).unwrap(); + indexer.update_documents(&doc3).unwrap(); + indexer.replace_documents(&doc4).unwrap(); + indexer.update_documents(&doc5).unwrap(); + + let indexer_alloc = Bump::new(); + let (document_changes, operation_stats, primary_key) = indexer + .into_changes( + &indexer_alloc, + &index.inner, + &rtxn, + None, + &mut new_fields_ids_map, + &|| false, + Progress::default(), + ) + .unwrap(); + + assert_eq!(operation_stats.iter().filter(|ps| ps.error.is_none()).count(), 5); + + let mut wtxn = index.write_txn().unwrap(); + indexer::index( + &mut wtxn, + &index.inner, + &crate::ThreadPoolNoAbortBuilder::new().build().unwrap(), + index.indexer_config.grenad_parameters(), + &db_fields_ids_map, + new_fields_ids_map, + primary_key, + &document_changes, + EmbeddingConfigs::default(), + &|| false, + &Progress::default(), + ) + .unwrap(); + wtxn.commit().unwrap(); + + let rtxn = index.read_txn().unwrap(); + let obkv = index.document(&rtxn, 0).unwrap(); + let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); + + let json_document = all_obkv_to_json(obkv, &fields_ids_map).unwrap(); + let expected = serde_json::json!({ + "id": 1, + "title": "Woooof", + "description": "This is it!", + }); + let expected = expected.as_object().unwrap(); + assert_eq!(&json_document, expected); + } + #[test] fn primary_key_must_not_contain_whitespace() { let index = TempIndex::new(); @@ -2112,8 +2280,8 @@ mod tests { let indexer_alloc = Bump::new(); let embedders = EmbeddingConfigs::default(); - let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); - indexer.add_documents(&documents).unwrap(); + let mut indexer = indexer::DocumentOperation::new(); + indexer.replace_documents(&documents).unwrap(); indexer.delete_documents(&["2"]); let (document_changes, _operation_stats, primary_key) = indexer .into_changes( @@ -2165,14 +2333,14 @@ mod tests { { "id": 2, "doggo": { "name": "bob", "age": 20 } }, { "id": 3, "name": "jean", "age": 25 }, ]); - let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::UpdateDocuments); - indexer.add_documents(&documents).unwrap(); + let mut indexer = indexer::DocumentOperation::new(); + indexer.update_documents(&documents).unwrap(); let documents = documents!([ { "id": 2, "catto": "jorts" }, { "id": 3, "legs": 4 }, ]); - indexer.add_documents(&documents).unwrap(); + indexer.update_documents(&documents).unwrap(); indexer.delete_documents(&["1", "2"]); let indexer_alloc = Bump::new(); @@ -2227,8 +2395,8 @@ mod tests { ]); let indexer_alloc = Bump::new(); let embedders = EmbeddingConfigs::default(); - let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::UpdateDocuments); - indexer.add_documents(&documents).unwrap(); + let mut indexer = indexer::DocumentOperation::new(); + indexer.update_documents(&documents).unwrap(); let (document_changes, _operation_stats, primary_key) = indexer .into_changes( @@ -2278,8 +2446,8 @@ mod tests { ]); let indexer_alloc = Bump::new(); let embedders = EmbeddingConfigs::default(); - let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::UpdateDocuments); - indexer.add_documents(&documents).unwrap(); + let mut indexer = indexer::DocumentOperation::new(); + indexer.update_documents(&documents).unwrap(); indexer.delete_documents(&["1", "2"]); let (document_changes, _operation_stats, primary_key) = indexer @@ -2327,14 +2495,14 @@ mod tests { let indexer_alloc = Bump::new(); let embedders = EmbeddingConfigs::default(); - let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::UpdateDocuments); + let mut indexer = indexer::DocumentOperation::new(); indexer.delete_documents(&["1", "2"]); let documents = documents!([ { "id": 2, "doggo": { "name": "jean", "age": 20 } }, { "id": 3, "name": "bob", "age": 25 }, ]); - indexer.add_documents(&documents).unwrap(); + indexer.update_documents(&documents).unwrap(); let (document_changes, _operation_stats, primary_key) = indexer .into_changes( @@ -2382,7 +2550,7 @@ mod tests { let indexer_alloc = Bump::new(); let embedders = EmbeddingConfigs::default(); - let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::UpdateDocuments); + let mut indexer = indexer::DocumentOperation::new(); indexer.delete_documents(&["1", "2", "1", "2"]); @@ -2391,7 +2559,7 @@ mod tests { { "id": 2, "doggo": { "name": "jean", "age": 20 } }, { "id": 3, "name": "bob", "age": 25 }, ]); - indexer.add_documents(&documents).unwrap(); + indexer.update_documents(&documents).unwrap(); indexer.delete_documents(&["1", "2", "1", "2"]); @@ -2440,12 +2608,12 @@ mod tests { let indexer_alloc = Bump::new(); let embedders = EmbeddingConfigs::default(); - let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::UpdateDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = documents!([ { "id": 1, "doggo": "kevin" }, ]); - indexer.add_documents(&documents).unwrap(); + indexer.update_documents(&documents).unwrap(); let (document_changes, _operation_stats, primary_key) = indexer .into_changes( @@ -2489,7 +2657,7 @@ mod tests { let indexer_alloc = Bump::new(); let embedders = EmbeddingConfigs::default(); - let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); indexer.delete_documents(&["1"]); @@ -2497,7 +2665,7 @@ mod tests { { "id": 1, "catto": "jorts" }, ]); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let (document_changes, _operation_stats, primary_key) = indexer .into_changes( @@ -2683,14 +2851,14 @@ mod tests { let indexer_alloc = Bump::new(); let embedders = EmbeddingConfigs::default(); - let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); // OP let documents = documents!([ { "id": 1, "doggo": "bernese" }, ]); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); // FINISHING let (document_changes, _operation_stats, primary_key) = indexer @@ -2743,14 +2911,14 @@ mod tests { let indexer_alloc = Bump::new(); let embedders = EmbeddingConfigs::default(); - let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); indexer.delete_documents(&["1"]); let documents = documents!([ { "id": 0, "catto": "jorts" }, ]); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let (document_changes, _operation_stats, primary_key) = indexer .into_changes( @@ -2801,12 +2969,12 @@ mod tests { let indexer_alloc = Bump::new(); let embedders = EmbeddingConfigs::default(); - let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let documents = documents!([ { "id": 1, "catto": "jorts" }, ]); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let (document_changes, _operation_stats, primary_key) = indexer .into_changes( diff --git a/crates/milli/src/update/new/document_change.rs b/crates/milli/src/update/new/document_change.rs index 1644b2254..8a71d7295 100644 --- a/crates/milli/src/update/new/document_change.rs +++ b/crates/milli/src/update/new/document_change.rs @@ -27,7 +27,7 @@ pub struct Update<'doc> { docid: DocumentId, external_document_id: &'doc str, new: Versions<'doc>, - has_deletion: bool, + from_scratch: bool, } pub struct Insertion<'doc> { @@ -109,9 +109,9 @@ impl<'doc> Update<'doc> { docid: DocumentId, external_document_id: &'doc str, new: Versions<'doc>, - has_deletion: bool, + from_scratch: bool, ) -> Self { - Update { docid, new, external_document_id, has_deletion } + Update { docid, new, external_document_id, from_scratch } } pub fn docid(&self) -> DocumentId { @@ -154,7 +154,7 @@ impl<'doc> Update<'doc> { index: &'t Index, mapper: &'t Mapper, ) -> Result> { - if self.has_deletion { + if self.from_scratch { Ok(MergedDocument::without_db(DocumentFromVersions::new(&self.new))) } else { MergedDocument::with_db( @@ -207,8 +207,8 @@ impl<'doc> Update<'doc> { cached_current = Some(current); } - if !self.has_deletion { - // no field deletion, so fields that don't appear in `updated` cannot have changed + if !self.from_scratch { + // no field deletion or update, so fields that don't appear in `updated` cannot have changed return Ok(changed); } @@ -257,7 +257,7 @@ impl<'doc> Update<'doc> { doc_alloc: &'doc Bump, embedders: &'doc EmbeddingConfigs, ) -> Result>> { - if self.has_deletion { + if self.from_scratch { MergedVectorDocument::without_db( self.external_document_id, &self.new, diff --git a/crates/milli/src/update/new/indexer/document_operation.rs b/crates/milli/src/update/new/indexer/document_operation.rs index 8216742ec..96a64cabe 100644 --- a/crates/milli/src/update/new/indexer/document_operation.rs +++ b/crates/milli/src/update/new/indexer/document_operation.rs @@ -23,25 +23,39 @@ use crate::update::new::{Deletion, Insertion, Update}; use crate::update::{AvailableIds, IndexDocumentsMethod}; use crate::{DocumentId, Error, FieldsIdsMap, Index, InternalError, Result, UserError}; +#[derive(Default)] pub struct DocumentOperation<'pl> { operations: Vec>, - method: MergeMethod, } impl<'pl> DocumentOperation<'pl> { - pub fn new(method: IndexDocumentsMethod) -> Self { - Self { operations: Default::default(), method: MergeMethod::from(method) } + pub fn new() -> Self { + Self { operations: Default::default() } } - /// TODO please give me a type + /// Append a replacement of documents. + /// /// The payload is expected to be in the NDJSON format - pub fn add_documents(&mut self, payload: &'pl Mmap) -> Result<()> { + pub fn replace_documents(&mut self, payload: &'pl Mmap) -> Result<()> { #[cfg(unix)] payload.advise(memmap2::Advice::Sequential)?; - self.operations.push(Payload::Addition(&payload[..])); + self.operations.push(Payload::Replace(&payload[..])); Ok(()) } + /// Append an update of documents. + /// + /// The payload is expected to be in the NDJSON format + pub fn update_documents(&mut self, payload: &'pl Mmap) -> Result<()> { + #[cfg(unix)] + payload.advise(memmap2::Advice::Sequential)?; + self.operations.push(Payload::Update(&payload[..])); + Ok(()) + } + + /// Append a deletion of documents IDs. + /// + /// The list is a set of external documents IDs. pub fn delete_documents(&mut self, to_delete: &'pl [&'pl str]) { self.operations.push(Payload::Deletion(to_delete)) } @@ -62,7 +76,7 @@ impl<'pl> DocumentOperation<'pl> { MSP: Fn() -> bool, { progress.update_progress(IndexingStep::PreparingPayloads); - let Self { operations, method } = self; + let Self { operations } = self; let documents_ids = index.documents_ids(rtxn)?; let mut operations_stats = Vec::new(); @@ -82,7 +96,7 @@ impl<'pl> DocumentOperation<'pl> { let mut bytes = 0; let result = match operation { - Payload::Addition(payload) => extract_addition_payload_changes( + Payload::Replace(payload) => extract_addition_payload_changes( indexer, index, rtxn, @@ -92,7 +106,20 @@ impl<'pl> DocumentOperation<'pl> { &mut available_docids, &mut bytes, &docids_version_offsets, - method, + IndexDocumentsMethod::ReplaceDocuments, + payload, + ), + Payload::Update(payload) => extract_addition_payload_changes( + indexer, + index, + rtxn, + primary_key_from_op, + &mut primary_key, + new_fields_ids_map, + &mut available_docids, + &mut bytes, + &docids_version_offsets, + IndexDocumentsMethod::UpdateDocuments, payload, ), Payload::Deletion(to_delete) => extract_deletion_payload_changes( @@ -100,7 +127,6 @@ impl<'pl> DocumentOperation<'pl> { rtxn, &mut available_docids, &docids_version_offsets, - method, to_delete, ), }; @@ -126,20 +152,15 @@ impl<'pl> DocumentOperation<'pl> { docids_version_offsets.drain().collect_in(indexer); // Reorder the offsets to make sure we iterate on the file sequentially - // And finally sort them - docids_version_offsets.sort_unstable_by_key(|(_, po)| method.sort_key(&po.operations)); + // And finally sort them. This clearly speeds up reading the update files. + docids_version_offsets + .sort_unstable_by_key(|(_, po)| first_update_pointer(&po.operations).unwrap_or(0)); let docids_version_offsets = docids_version_offsets.into_bump_slice(); Ok((DocumentOperationChanges { docids_version_offsets }, operations_stats, primary_key)) } } -impl Default for DocumentOperation<'_> { - fn default() -> Self { - DocumentOperation::new(IndexDocumentsMethod::default()) - } -} - #[allow(clippy::too_many_arguments)] fn extract_addition_payload_changes<'r, 'pl: 'r>( indexer: &'pl Bump, @@ -151,9 +172,11 @@ fn extract_addition_payload_changes<'r, 'pl: 'r>( available_docids: &mut AvailableIds, bytes: &mut u64, main_docids_version_offsets: &hashbrown::HashMap<&'pl str, PayloadOperations<'pl>>, - method: MergeMethod, + method: IndexDocumentsMethod, payload: &'pl [u8], ) -> Result>> { + use IndexDocumentsMethod::{ReplaceDocuments, UpdateDocuments}; + let mut new_docids_version_offsets = hashbrown::HashMap::<&str, PayloadOperations<'pl>>::new(); let mut previous_offset = 0; @@ -204,48 +227,82 @@ fn extract_addition_payload_changes<'r, 'pl: 'r>( None => { match index.external_documents_ids().get(rtxn, external_id) { Ok(Some(docid)) => match new_docids_version_offsets.entry(external_id) { - Entry::Occupied(mut entry) => { - entry.get_mut().push_addition(document_offset) - } + Entry::Occupied(mut entry) => match method { + ReplaceDocuments => entry.get_mut().push_replacement(document_offset), + UpdateDocuments => entry.get_mut().push_update(document_offset), + }, Entry::Vacant(entry) => { - entry.insert(PayloadOperations::new_addition( - method, - docid, - false, // is new - document_offset, - )); + match method { + ReplaceDocuments => { + entry.insert(PayloadOperations::new_replacement( + docid, + false, // is new + document_offset, + )); + } + UpdateDocuments => { + entry.insert(PayloadOperations::new_update( + docid, + false, // is new + document_offset, + )); + } + } } }, Ok(None) => match new_docids_version_offsets.entry(external_id) { - Entry::Occupied(mut entry) => { - entry.get_mut().push_addition(document_offset) - } + Entry::Occupied(mut entry) => match method { + ReplaceDocuments => entry.get_mut().push_replacement(document_offset), + UpdateDocuments => entry.get_mut().push_update(document_offset), + }, Entry::Vacant(entry) => { let docid = match available_docids.next() { Some(docid) => docid, None => return Err(UserError::DocumentLimitReached.into()), }; - entry.insert(PayloadOperations::new_addition( - method, - docid, - true, // is new - document_offset, - )); + + match method { + ReplaceDocuments => { + entry.insert(PayloadOperations::new_replacement( + docid, + true, // is new + document_offset, + )); + } + UpdateDocuments => { + entry.insert(PayloadOperations::new_update( + docid, + true, // is new + document_offset, + )); + } + } } }, Err(e) => return Err(e.into()), } } Some(payload_operations) => match new_docids_version_offsets.entry(external_id) { - Entry::Occupied(mut entry) => entry.get_mut().push_addition(document_offset), - Entry::Vacant(entry) => { - entry.insert(PayloadOperations::new_addition( - method, - payload_operations.docid, - payload_operations.is_new, - document_offset, - )); - } + Entry::Occupied(mut entry) => match method { + ReplaceDocuments => entry.get_mut().push_replacement(document_offset), + UpdateDocuments => entry.get_mut().push_update(document_offset), + }, + Entry::Vacant(entry) => match method { + ReplaceDocuments => { + entry.insert(PayloadOperations::new_replacement( + payload_operations.docid, + payload_operations.is_new, + document_offset, + )); + } + UpdateDocuments => { + entry.insert(PayloadOperations::new_update( + payload_operations.docid, + payload_operations.is_new, + document_offset, + )); + } + }, }, } @@ -278,7 +335,6 @@ fn extract_deletion_payload_changes<'s, 'pl: 's>( rtxn: &RoTxn, available_docids: &mut AvailableIds, main_docids_version_offsets: &hashbrown::HashMap<&'s str, PayloadOperations<'pl>>, - method: MergeMethod, to_delete: &'pl [&'pl str], ) -> Result>> { let mut new_docids_version_offsets = hashbrown::HashMap::<&str, PayloadOperations<'pl>>::new(); @@ -292,7 +348,7 @@ fn extract_deletion_payload_changes<'s, 'pl: 's>( Entry::Occupied(mut entry) => entry.get_mut().push_deletion(), Entry::Vacant(entry) => { entry.insert(PayloadOperations::new_deletion( - method, docid, false, // is new + docid, false, // is new )); } } @@ -306,7 +362,7 @@ fn extract_deletion_payload_changes<'s, 'pl: 's>( Entry::Occupied(mut entry) => entry.get_mut().push_deletion(), Entry::Vacant(entry) => { entry.insert(PayloadOperations::new_deletion( - method, docid, true, // is new + docid, true, // is new )); } } @@ -318,7 +374,6 @@ fn extract_deletion_payload_changes<'s, 'pl: 's>( Entry::Occupied(mut entry) => entry.get_mut().push_deletion(), Entry::Vacant(entry) => { entry.insert(PayloadOperations::new_deletion( - method, payload_operations.docid, payload_operations.is_new, )); @@ -369,13 +424,7 @@ impl<'pl> DocumentChanges<'pl> for DocumentOperationChanges<'pl> { 'pl: 'doc, { let (external_doc, payload_operations) = item; - payload_operations.merge_method.merge( - payload_operations.docid, - external_doc, - payload_operations.is_new, - &context.doc_alloc, - &payload_operations.operations[..], - ) + payload_operations.merge(external_doc, &context.doc_alloc) } fn len(&self) -> usize { @@ -388,7 +437,8 @@ pub struct DocumentOperationChanges<'pl> { } pub enum Payload<'pl> { - Addition(&'pl [u8]), + Replace(&'pl [u8]), + Update(&'pl [u8]), Deletion(&'pl [&'pl str]), } @@ -405,31 +455,30 @@ pub struct PayloadOperations<'pl> { pub is_new: bool, /// The operations to perform, in order, on this document. pub operations: Vec>, - /// The merge method we are using to merge payloads and documents. - merge_method: MergeMethod, } impl<'pl> PayloadOperations<'pl> { - fn new_deletion(merge_method: MergeMethod, docid: DocumentId, is_new: bool) -> Self { - Self { docid, is_new, operations: vec![InnerDocOp::Deletion], merge_method } + fn new_replacement(docid: DocumentId, is_new: bool, offset: DocumentOffset<'pl>) -> Self { + Self { docid, is_new, operations: vec![InnerDocOp::Replace(offset)] } } - fn new_addition( - merge_method: MergeMethod, - docid: DocumentId, - is_new: bool, - offset: DocumentOffset<'pl>, - ) -> Self { - Self { docid, is_new, operations: vec![InnerDocOp::Addition(offset)], merge_method } + fn new_update(docid: DocumentId, is_new: bool, offset: DocumentOffset<'pl>) -> Self { + Self { docid, is_new, operations: vec![InnerDocOp::Update(offset)] } + } + + fn new_deletion(docid: DocumentId, is_new: bool) -> Self { + Self { docid, is_new, operations: vec![InnerDocOp::Deletion] } } } impl<'pl> PayloadOperations<'pl> { - fn push_addition(&mut self, offset: DocumentOffset<'pl>) { - if self.merge_method.useless_previous_changes() { - self.operations.clear(); - } - self.operations.push(InnerDocOp::Addition(offset)) + fn push_replacement(&mut self, offset: DocumentOffset<'pl>) { + self.operations.clear(); + self.operations.push(InnerDocOp::Replace(offset)) + } + + fn push_update(&mut self, offset: DocumentOffset<'pl>) { + self.operations.push(InnerDocOp::Update(offset)) } fn push_deletion(&mut self) { @@ -439,16 +488,114 @@ impl<'pl> PayloadOperations<'pl> { fn append_operations(&mut self, mut operations: Vec>) { debug_assert!(!operations.is_empty()); - if self.merge_method.useless_previous_changes() { + if matches!(operations.first(), Some(InnerDocOp::Deletion | InnerDocOp::Replace(_))) { self.operations.clear(); } self.operations.append(&mut operations); } + + /// Returns only the most recent version of a document based on the updates from the payloads. + /// + /// This function is only meant to be used when doing a replacement and not an update. + fn merge<'doc>( + &self, + external_doc: &'doc str, + doc_alloc: &'doc Bump, + ) -> Result>> + where + 'pl: 'doc, + { + match self.operations.last() { + Some(InnerDocOp::Replace(DocumentOffset { content })) => { + let document = serde_json::from_slice(content).unwrap(); + let document = + RawMap::from_raw_value_and_hasher(document, FxBuildHasher, doc_alloc) + .map_err(UserError::SerdeJson)?; + + if self.is_new { + Ok(Some(DocumentChange::Insertion(Insertion::create( + self.docid, + external_doc, + Versions::single(document), + )))) + } else { + Ok(Some(DocumentChange::Update(Update::create( + self.docid, + external_doc, + Versions::single(document), + true, + )))) + } + } + Some(InnerDocOp::Update(_)) => { + // Search the first operation that is a tombstone which resets the document. + let last_tombstone = self + .operations + .iter() + .rposition(|op| matches!(op, InnerDocOp::Deletion | InnerDocOp::Replace(_))); + + // Track when we must ignore previous document versions from the rtxn. + let from_scratch = last_tombstone.is_some(); + + // We ignore deletion and keep the replacement to create the appropriate versions. + let operations = match last_tombstone { + Some(i) => match self.operations[i] { + InnerDocOp::Deletion => &self.operations[i + 1..], + InnerDocOp::Replace(_) => &self.operations[i..], + InnerDocOp::Update(_) => unreachable!("Found a non-tombstone operation"), + }, + None => &self.operations[..], + }; + + // We collect the versions to generate the appropriate document. + let versions = operations.iter().map(|operation| { + let DocumentOffset { content } = match operation { + InnerDocOp::Replace(offset) | InnerDocOp::Update(offset) => offset, + InnerDocOp::Deletion => unreachable!("Deletion in document operations"), + }; + + let document = serde_json::from_slice(content).unwrap(); + let document = + RawMap::from_raw_value_and_hasher(document, FxBuildHasher, doc_alloc) + .map_err(UserError::SerdeJson)?; + + Ok(document) + }); + + let Some(versions) = Versions::multiple(versions)? else { return Ok(None) }; + + if self.is_new { + Ok(Some(DocumentChange::Insertion(Insertion::create( + self.docid, + external_doc, + versions, + )))) + } else { + Ok(Some(DocumentChange::Update(Update::create( + self.docid, + external_doc, + versions, + from_scratch, + )))) + } + } + Some(InnerDocOp::Deletion) => { + return if self.is_new { + Ok(None) + } else { + let deletion = Deletion::create(self.docid, external_doc); + Ok(Some(DocumentChange::Deletion(deletion))) + }; + } + None => unreachable!("We must not have an empty set of operations on a document"), + } + } } #[derive(Clone)] pub enum InnerDocOp<'pl> { - Addition(DocumentOffset<'pl>), + Replace(DocumentOffset<'pl>), + Update(DocumentOffset<'pl>), Deletion, } @@ -460,231 +607,14 @@ pub struct DocumentOffset<'pl> { pub content: &'pl [u8], } -trait MergeChanges { - /// Whether the payloads in the list of operations are useless or not. - fn useless_previous_changes(&self) -> bool; - - /// Returns a key that is used to order the payloads the right way. - fn sort_key(&self, docops: &[InnerDocOp]) -> usize; - - fn merge<'doc>( - &self, - docid: DocumentId, - external_docid: &'doc str, - is_new: bool, - doc_alloc: &'doc Bump, - operations: &'doc [InnerDocOp], - ) -> Result>>; -} - -#[derive(Debug, Clone, Copy)] -enum MergeMethod { - ForReplacement(MergeDocumentForReplacement), - ForUpdates(MergeDocumentForUpdates), -} - -impl MergeChanges for MergeMethod { - fn useless_previous_changes(&self) -> bool { - match self { - MergeMethod::ForReplacement(merge) => merge.useless_previous_changes(), - MergeMethod::ForUpdates(merge) => merge.useless_previous_changes(), - } - } - - fn sort_key(&self, docops: &[InnerDocOp]) -> usize { - match self { - MergeMethod::ForReplacement(merge) => merge.sort_key(docops), - MergeMethod::ForUpdates(merge) => merge.sort_key(docops), - } - } - - fn merge<'doc>( - &self, - docid: DocumentId, - external_docid: &'doc str, - is_new: bool, - doc_alloc: &'doc Bump, - operations: &'doc [InnerDocOp], - ) -> Result>> { - match self { - MergeMethod::ForReplacement(merge) => { - merge.merge(docid, external_docid, is_new, doc_alloc, operations) - } - MergeMethod::ForUpdates(merge) => { - merge.merge(docid, external_docid, is_new, doc_alloc, operations) - } - } - } -} - -impl From for MergeMethod { - fn from(method: IndexDocumentsMethod) -> Self { - match method { - IndexDocumentsMethod::ReplaceDocuments => { - MergeMethod::ForReplacement(MergeDocumentForReplacement) - } - IndexDocumentsMethod::UpdateDocuments => { - MergeMethod::ForUpdates(MergeDocumentForUpdates) - } - } - } -} - -#[derive(Debug, Clone, Copy)] -struct MergeDocumentForReplacement; - -impl MergeChanges for MergeDocumentForReplacement { - fn useless_previous_changes(&self) -> bool { - true - } - - /// Reorders to read only the last change. - fn sort_key(&self, docops: &[InnerDocOp]) -> usize { - let f = |ido: &_| match ido { - InnerDocOp::Addition(add) => Some(add.content.as_ptr() as usize), - InnerDocOp::Deletion => None, - }; - docops.iter().rev().find_map(f).unwrap_or(0) - } - - /// Returns only the most recent version of a document based on the updates from the payloads. - /// - /// This function is only meant to be used when doing a replacement and not an update. - fn merge<'doc>( - &self, - docid: DocumentId, - external_doc: &'doc str, - is_new: bool, - doc_alloc: &'doc Bump, - operations: &'doc [InnerDocOp], - ) -> Result>> { - match operations.last() { - Some(InnerDocOp::Addition(DocumentOffset { content })) => { - let document = serde_json::from_slice(content).unwrap(); - let document = - RawMap::from_raw_value_and_hasher(document, FxBuildHasher, doc_alloc) - .map_err(UserError::SerdeJson)?; - - if is_new { - Ok(Some(DocumentChange::Insertion(Insertion::create( - docid, - external_doc, - Versions::single(document), - )))) - } else { - Ok(Some(DocumentChange::Update(Update::create( - docid, - external_doc, - Versions::single(document), - true, - )))) - } - } - Some(InnerDocOp::Deletion) => { - return if is_new { - Ok(None) - } else { - let deletion = Deletion::create(docid, external_doc); - Ok(Some(DocumentChange::Deletion(deletion))) - }; - } - None => unreachable!("We must not have empty set of operations on a document"), - } - } -} - -#[derive(Debug, Clone, Copy)] -struct MergeDocumentForUpdates; - -impl MergeChanges for MergeDocumentForUpdates { - fn useless_previous_changes(&self) -> bool { - false - } - - /// Reorders to read the first changes first so that it's faster to read the first one and then the rest. - fn sort_key(&self, docops: &[InnerDocOp]) -> usize { - let f = |ido: &_| match ido { - InnerDocOp::Addition(add) => Some(add.content.as_ptr() as usize), - InnerDocOp::Deletion => None, - }; - docops.iter().find_map(f).unwrap_or(0) - } - - /// Reads the previous version of a document from the database, the new versions - /// in the grenad update files and merges them to generate a new boxed obkv. - /// - /// This function is only meant to be used when doing an update and not a replacement. - fn merge<'doc>( - &self, - docid: DocumentId, - external_docid: &'doc str, - is_new: bool, - doc_alloc: &'doc Bump, - operations: &'doc [InnerDocOp], - ) -> Result>> { - if operations.is_empty() { - unreachable!("We must not have empty set of operations on a document"); - } - - let last_deletion = operations.iter().rposition(|op| matches!(op, InnerDocOp::Deletion)); - let operations = &operations[last_deletion.map_or(0, |i| i + 1)..]; - - let has_deletion = last_deletion.is_some(); - - if operations.is_empty() { - return if is_new { - Ok(None) - } else { - let deletion = Deletion::create(docid, external_docid); - Ok(Some(DocumentChange::Deletion(deletion))) - }; - } - - let versions = match operations { - [single] => { - let DocumentOffset { content } = match single { - InnerDocOp::Addition(offset) => offset, - InnerDocOp::Deletion => { - unreachable!("Deletion in document operations") - } - }; - let document = serde_json::from_slice(content).unwrap(); - let document = - RawMap::from_raw_value_and_hasher(document, FxBuildHasher, doc_alloc) - .map_err(UserError::SerdeJson)?; - - Some(Versions::single(document)) - } - operations => { - let versions = operations.iter().map(|operation| { - let DocumentOffset { content } = match operation { - InnerDocOp::Addition(offset) => offset, - InnerDocOp::Deletion => { - unreachable!("Deletion in document operations") - } - }; - - let document = serde_json::from_slice(content).unwrap(); - let document = - RawMap::from_raw_value_and_hasher(document, FxBuildHasher, doc_alloc) - .map_err(UserError::SerdeJson)?; - Ok(document) - }); - Versions::multiple(versions)? - } - }; - - let Some(versions) = versions else { return Ok(None) }; - - if is_new { - Ok(Some(DocumentChange::Insertion(Insertion::create(docid, external_docid, versions)))) - } else { - Ok(Some(DocumentChange::Update(Update::create( - docid, - external_docid, - versions, - has_deletion, - )))) - } - } +/// Returns the first pointer of the first change in a document. +/// +/// This is used to sort the documents in update file content order +/// and read the update file in order to largely speed up the indexation. +pub fn first_update_pointer(docops: &[InnerDocOp]) -> Option { + docops.iter().find_map(|ido: &_| match ido { + InnerDocOp::Replace(replace) => Some(replace.content.as_ptr() as usize), + InnerDocOp::Update(update) => Some(update.content.as_ptr() as usize), + InnerDocOp::Deletion => None, + }) } diff --git a/crates/milli/tests/search/facet_distribution.rs b/crates/milli/tests/search/facet_distribution.rs index db9f86357..4d8bf324c 100644 --- a/crates/milli/tests/search/facet_distribution.rs +++ b/crates/milli/tests/search/facet_distribution.rs @@ -5,7 +5,7 @@ use maplit::hashset; use milli::documents::mmap_from_objects; use milli::progress::Progress; use milli::update::new::indexer; -use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings}; +use milli::update::{IndexerConfig, Settings}; use milli::vector::EmbeddingConfigs; use milli::{FacetDistribution, Index, Object, OrderBy}; use serde_json::{from_value, json}; @@ -36,7 +36,7 @@ fn test_facet_distribution_with_no_facet_values() { let mut new_fields_ids_map = db_fields_ids_map.clone(); let embedders = EmbeddingConfigs::default(); - let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let doc1: Object = from_value( json!({ "id": 123, "title": "What a week, hu...", "genres": [], "tags": ["blue"] }), @@ -47,7 +47,7 @@ fn test_facet_distribution_with_no_facet_values() { let documents = mmap_from_objects(vec![doc1, doc2]); // index documents - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer diff --git a/crates/milli/tests/search/mod.rs b/crates/milli/tests/search/mod.rs index 662715638..337a4c88c 100644 --- a/crates/milli/tests/search/mod.rs +++ b/crates/milli/tests/search/mod.rs @@ -9,7 +9,7 @@ use heed::EnvOpenOptions; use maplit::{btreemap, hashset}; use milli::progress::Progress; use milli::update::new::indexer; -use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings}; +use milli::update::{IndexerConfig, Settings}; use milli::vector::EmbeddingConfigs; use milli::{AscDesc, Criterion, DocumentId, Index, Member, TermsMatchingStrategy}; use serde::{Deserialize, Deserializer}; @@ -72,7 +72,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index { let mut new_fields_ids_map = db_fields_ids_map.clone(); let embedders = EmbeddingConfigs::default(); - let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let mut file = tempfile::tempfile().unwrap(); file.write_all(CONTENT.as_bytes()).unwrap(); @@ -80,7 +80,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index { let payload = unsafe { memmap2::Mmap::map(&file).unwrap() }; // index documents - indexer.add_documents(&payload).unwrap(); + indexer.replace_documents(&payload).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, operation_stats, primary_key) = indexer diff --git a/crates/milli/tests/search/query_criteria.rs b/crates/milli/tests/search/query_criteria.rs index d47c9539d..3cc747f06 100644 --- a/crates/milli/tests/search/query_criteria.rs +++ b/crates/milli/tests/search/query_criteria.rs @@ -7,7 +7,7 @@ use itertools::Itertools; use maplit::hashset; use milli::progress::Progress; use milli::update::new::indexer; -use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings}; +use milli::update::{IndexerConfig, Settings}; use milli::vector::EmbeddingConfigs; use milli::{AscDesc, Criterion, Index, Member, Search, SearchResult, TermsMatchingStrategy}; use rand::Rng; @@ -288,7 +288,7 @@ fn criteria_ascdesc() { let mut new_fields_ids_map = db_fields_ids_map.clone(); let embedders = EmbeddingConfigs::default(); - let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); let mut file = tempfile::tempfile().unwrap(); (0..ASC_DESC_CANDIDATES_THRESHOLD + 1).for_each(|_| { @@ -318,7 +318,7 @@ fn criteria_ascdesc() { file.sync_all().unwrap(); let payload = unsafe { memmap2::Mmap::map(&file).unwrap() }; - indexer.add_documents(&payload).unwrap(); + indexer.replace_documents(&payload).unwrap(); let (document_changes, _operation_stats, primary_key) = indexer .into_changes( &indexer_alloc, diff --git a/crates/milli/tests/search/typo_tolerance.rs b/crates/milli/tests/search/typo_tolerance.rs index b640fa910..837b5e6b2 100644 --- a/crates/milli/tests/search/typo_tolerance.rs +++ b/crates/milli/tests/search/typo_tolerance.rs @@ -5,7 +5,7 @@ use heed::EnvOpenOptions; use milli::documents::mmap_from_objects; use milli::progress::Progress; use milli::update::new::indexer; -use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings}; +use milli::update::{IndexerConfig, Settings}; use milli::vector::EmbeddingConfigs; use milli::{Criterion, Index, Object, Search, TermsMatchingStrategy}; use serde_json::from_value; @@ -123,9 +123,9 @@ fn test_typo_disabled_on_word() { let db_fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); let mut new_fields_ids_map = db_fields_ids_map.clone(); let embedders = EmbeddingConfigs::default(); - let mut indexer = indexer::DocumentOperation::new(IndexDocumentsMethod::ReplaceDocuments); + let mut indexer = indexer::DocumentOperation::new(); - indexer.add_documents(&documents).unwrap(); + indexer.replace_documents(&documents).unwrap(); let indexer_alloc = Bump::new(); let (document_changes, _operation_stats, primary_key) = indexer