5147: Batch progress r=dureuill a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/5068

## What does this PR do?
- ...

## PR checklist
Please check if your PR fulfills the following requirements:
- [ ] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [ ] Have you read the contributing guidelines?
- [ ] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Tamo <tamo@meilisearch.com>
This commit is contained in:
meili-bors[bot] 2024-12-12 09:15:54 +00:00 committed by GitHub
commit 1fc90fbacb
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
38 changed files with 940 additions and 473 deletions

1
Cargo.lock generated
View File

@ -2632,6 +2632,7 @@ dependencies = [
"bincode", "bincode",
"bumpalo", "bumpalo",
"bumparaw-collections", "bumparaw-collections",
"convert_case 0.6.0",
"crossbeam-channel", "crossbeam-channel",
"csv", "csv",
"derive_builder 0.20.0", "derive_builder 0.20.0",

View File

@ -8,6 +8,7 @@ use bumpalo::Bump;
use criterion::{criterion_group, criterion_main, Criterion}; use criterion::{criterion_group, criterion_main, Criterion};
use milli::documents::PrimaryKey; use milli::documents::PrimaryKey;
use milli::heed::{EnvOpenOptions, RwTxn}; use milli::heed::{EnvOpenOptions, RwTxn};
use milli::progress::Progress;
use milli::update::new::indexer; use milli::update::new::indexer;
use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings}; use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
use milli::vector::EmbeddingConfigs; use milli::vector::EmbeddingConfigs;
@ -151,7 +152,7 @@ fn indexing_songs_default(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -166,7 +167,7 @@ fn indexing_songs_default(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -218,7 +219,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -233,7 +234,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -263,7 +264,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -278,7 +279,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -332,7 +333,7 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -347,7 +348,7 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -409,7 +410,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -424,7 +425,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -454,7 +455,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -469,7 +470,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -495,7 +496,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -510,7 +511,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -563,7 +564,7 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -578,7 +579,7 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -630,7 +631,7 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -645,7 +646,7 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -697,7 +698,7 @@ fn indexing_wiki(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -712,7 +713,7 @@ fn indexing_wiki(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -763,7 +764,7 @@ fn reindexing_wiki(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -778,7 +779,7 @@ fn reindexing_wiki(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -808,7 +809,7 @@ fn reindexing_wiki(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -823,7 +824,7 @@ fn reindexing_wiki(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -876,7 +877,7 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -891,7 +892,7 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -953,7 +954,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -968,7 +969,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -999,7 +1000,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -1014,7 +1015,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -1041,7 +1042,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -1056,7 +1057,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -1108,7 +1109,7 @@ fn indexing_movies_default(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -1123,7 +1124,7 @@ fn indexing_movies_default(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -1174,7 +1175,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -1189,7 +1190,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -1219,7 +1220,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -1234,7 +1235,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -1287,7 +1288,7 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -1302,7 +1303,7 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -1350,7 +1351,7 @@ fn delete_documents_from_ids(index: Index, document_ids_to_delete: Vec<RoaringBi
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -1400,7 +1401,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -1415,7 +1416,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -1445,7 +1446,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -1460,7 +1461,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -1486,7 +1487,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -1501,7 +1502,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -1576,7 +1577,7 @@ fn indexing_nested_movies_default(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -1591,7 +1592,7 @@ fn indexing_nested_movies_default(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -1667,7 +1668,7 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -1682,7 +1683,7 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -1750,7 +1751,7 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -1765,7 +1766,7 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -1817,7 +1818,7 @@ fn indexing_geo(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -1832,7 +1833,7 @@ fn indexing_geo(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -1883,7 +1884,7 @@ fn reindexing_geo(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -1898,7 +1899,7 @@ fn reindexing_geo(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -1928,7 +1929,7 @@ fn reindexing_geo(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -1943,7 +1944,7 @@ fn reindexing_geo(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
@ -1996,7 +1997,7 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -2011,7 +2012,7 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();

View File

@ -10,6 +10,7 @@ use bumpalo::Bump;
use criterion::BenchmarkId; use criterion::BenchmarkId;
use memmap2::Mmap; use memmap2::Mmap;
use milli::heed::EnvOpenOptions; use milli::heed::EnvOpenOptions;
use milli::progress::Progress;
use milli::update::new::indexer; use milli::update::new::indexer;
use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings}; use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
use milli::vector::EmbeddingConfigs; use milli::vector::EmbeddingConfigs;
@ -110,7 +111,7 @@ pub fn base_setup(conf: &Conf) -> Index {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -125,7 +126,7 @@ pub fn base_setup(conf: &Conf) -> Index {
&document_changes, &document_changes,
EmbeddingConfigs::default(), EmbeddingConfigs::default(),
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();

View File

@ -10,6 +10,7 @@ use either::Either;
use fuzzers::Operation; use fuzzers::Operation;
use milli::documents::mmap_from_objects; use milli::documents::mmap_from_objects;
use milli::heed::EnvOpenOptions; use milli::heed::EnvOpenOptions;
use milli::progress::Progress;
use milli::update::new::indexer; use milli::update::new::indexer;
use milli::update::{IndexDocumentsMethod, IndexerConfig}; use milli::update::{IndexDocumentsMethod, IndexerConfig};
use milli::vector::EmbeddingConfigs; use milli::vector::EmbeddingConfigs;
@ -128,7 +129,7 @@ fn main() {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -143,7 +144,7 @@ fn main() {
&document_changes, &document_changes,
embedders, embedders,
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();

View File

@ -15,6 +15,7 @@ anyhow = "1.0.86"
bincode = "1.3.3" bincode = "1.3.3"
bumpalo = "3.16.0" bumpalo = "3.16.0"
bumparaw-collections = "0.1.2" bumparaw-collections = "0.1.2"
convert_case = "0.6.0"
csv = "1.3.0" csv = "1.3.0"
derive_builder = "0.20.0" derive_builder = "0.20.0"
dump = { path = "../dump" } dump = { path = "../dump" }

View File

@ -22,8 +22,7 @@ use std::ffi::OsStr;
use std::fmt; use std::fmt;
use std::fs::{self, File}; use std::fs::{self, File};
use std::io::BufWriter; use std::io::BufWriter;
use std::sync::atomic::{self, AtomicU64}; use std::sync::atomic::Ordering;
use std::time::Duration;
use bumpalo::collections::CollectIn; use bumpalo::collections::CollectIn;
use bumpalo::Bump; use bumpalo::Bump;
@ -32,6 +31,7 @@ use meilisearch_types::batches::BatchId;
use meilisearch_types::heed::{RoTxn, RwTxn}; use meilisearch_types::heed::{RoTxn, RwTxn};
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader, PrimaryKey}; use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader, PrimaryKey};
use meilisearch_types::milli::heed::CompactionOption; use meilisearch_types::milli::heed::CompactionOption;
use meilisearch_types::milli::progress::Progress;
use meilisearch_types::milli::update::new::indexer::{self, UpdateByFunction}; use meilisearch_types::milli::update::new::indexer::{self, UpdateByFunction};
use meilisearch_types::milli::update::{ use meilisearch_types::milli::update::{
DocumentAdditionResult, IndexDocumentsMethod, Settings as MilliSettings, DocumentAdditionResult, IndexDocumentsMethod, Settings as MilliSettings,
@ -41,9 +41,7 @@ use meilisearch_types::milli::vector::parsed_vectors::{
}; };
use meilisearch_types::milli::{self, Filter, ThreadPoolNoAbortBuilder}; use meilisearch_types::milli::{self, Filter, ThreadPoolNoAbortBuilder};
use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked}; use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked};
use meilisearch_types::tasks::{ use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task};
Details, IndexSwap, Kind, KindWithContent, Status, Task, TaskProgress,
};
use meilisearch_types::{compression, Index, VERSION_FILE_NAME}; use meilisearch_types::{compression, Index, VERSION_FILE_NAME};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use time::macros::format_description; use time::macros::format_description;
@ -51,6 +49,13 @@ use time::OffsetDateTime;
use uuid::Uuid; use uuid::Uuid;
use crate::autobatcher::{self, BatchKind}; use crate::autobatcher::{self, BatchKind};
use crate::processing::{
AtomicBatchStep, AtomicDocumentStep, AtomicTaskStep, AtomicUpdateFileStep, CreateIndexProgress,
DeleteIndexProgress, DocumentDeletionProgress, DocumentEditionProgress,
DocumentOperationProgress, DumpCreationProgress, InnerSwappingTwoIndexes, SettingsProgress,
SnapshotCreationProgress, SwappingTheIndexes, TaskCancelationProgress, TaskDeletionProgress,
UpdateIndexProgress, VariableNameStep,
};
use crate::utils::{self, swap_index_uid_in_task, ProcessingBatch}; use crate::utils::{self, swap_index_uid_in_task, ProcessingBatch};
use crate::{Error, IndexScheduler, Result, TaskId}; use crate::{Error, IndexScheduler, Result, TaskId};
@ -561,11 +566,12 @@ impl IndexScheduler {
/// The list of tasks that were processed. The metadata of each task in the returned /// The list of tasks that were processed. The metadata of each task in the returned
/// list is updated accordingly, with the exception of the its date fields /// list is updated accordingly, with the exception of the its date fields
/// [`finished_at`](meilisearch_types::tasks::Task::finished_at) and [`started_at`](meilisearch_types::tasks::Task::started_at). /// [`finished_at`](meilisearch_types::tasks::Task::finished_at) and [`started_at`](meilisearch_types::tasks::Task::started_at).
#[tracing::instrument(level = "trace", skip(self, batch), target = "indexing::scheduler", fields(batch=batch.to_string()))] #[tracing::instrument(level = "trace", skip(self, batch, progress), target = "indexing::scheduler", fields(batch=batch.to_string()))]
pub(crate) fn process_batch( pub(crate) fn process_batch(
&self, &self,
batch: Batch, batch: Batch,
current_batch: &mut ProcessingBatch, current_batch: &mut ProcessingBatch,
progress: Progress,
) -> Result<Vec<Task>> { ) -> Result<Vec<Task>> {
#[cfg(test)] #[cfg(test)]
{ {
@ -585,8 +591,13 @@ impl IndexScheduler {
}; };
let rtxn = self.env.read_txn()?; let rtxn = self.env.read_txn()?;
let mut canceled_tasks = let mut canceled_tasks = self.cancel_matched_tasks(
self.cancel_matched_tasks(&rtxn, task.uid, current_batch, matched_tasks)?; &rtxn,
task.uid,
current_batch,
matched_tasks,
&progress,
)?;
task.status = Status::Succeeded; task.status = Status::Succeeded;
match &mut task.details { match &mut task.details {
@ -617,7 +628,8 @@ impl IndexScheduler {
} }
let mut wtxn = self.env.write_txn()?; let mut wtxn = self.env.write_txn()?;
let mut deleted_tasks = self.delete_matched_tasks(&mut wtxn, &matched_tasks)?; let mut deleted_tasks =
self.delete_matched_tasks(&mut wtxn, &matched_tasks, &progress)?;
wtxn.commit()?; wtxn.commit()?;
for task in tasks.iter_mut() { for task in tasks.iter_mut() {
@ -643,6 +655,8 @@ impl IndexScheduler {
Ok(tasks) Ok(tasks)
} }
Batch::SnapshotCreation(mut tasks) => { Batch::SnapshotCreation(mut tasks) => {
progress.update_progress(SnapshotCreationProgress::StartTheSnapshotCreation);
fs::create_dir_all(&self.snapshots_path)?; fs::create_dir_all(&self.snapshots_path)?;
let temp_snapshot_dir = tempfile::tempdir()?; let temp_snapshot_dir = tempfile::tempdir()?;
@ -663,6 +677,7 @@ impl IndexScheduler {
// two read operations as the task processing is synchronous. // two read operations as the task processing is synchronous.
// 2.1 First copy the LMDB env of the index-scheduler // 2.1 First copy the LMDB env of the index-scheduler
progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexScheduler);
let dst = temp_snapshot_dir.path().join("tasks"); let dst = temp_snapshot_dir.path().join("tasks");
fs::create_dir_all(&dst)?; fs::create_dir_all(&dst)?;
self.env.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?; self.env.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?;
@ -675,18 +690,29 @@ impl IndexScheduler {
fs::create_dir_all(&update_files_dir)?; fs::create_dir_all(&update_files_dir)?;
// 2.4 Only copy the update files of the enqueued tasks // 2.4 Only copy the update files of the enqueued tasks
for task_id in self.get_status(&rtxn, Status::Enqueued)? { progress.update_progress(SnapshotCreationProgress::SnapshotTheUpdateFiles);
let enqueued = self.get_status(&rtxn, Status::Enqueued)?;
let (atomic, update_file_progress) =
AtomicUpdateFileStep::new(enqueued.len() as u32);
progress.update_progress(update_file_progress);
for task_id in enqueued {
let task = self.get_task(&rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?; let task = self.get_task(&rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
if let Some(content_uuid) = task.content_uuid() { if let Some(content_uuid) = task.content_uuid() {
let src = self.file_store.get_update_path(content_uuid); let src = self.file_store.get_update_path(content_uuid);
let dst = update_files_dir.join(content_uuid.to_string()); let dst = update_files_dir.join(content_uuid.to_string());
fs::copy(src, dst)?; fs::copy(src, dst)?;
} }
atomic.fetch_add(1, Ordering::Relaxed);
} }
// 3. Snapshot every indexes // 3. Snapshot every indexes
for result in self.index_mapper.index_mapping.iter(&rtxn)? { progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexes);
let index_mapping = self.index_mapper.index_mapping;
let nb_indexes = index_mapping.len(&rtxn)? as u32;
for (i, result) in index_mapping.iter(&rtxn)?.enumerate() {
let (name, uuid) = result?; let (name, uuid) = result?;
progress.update_progress(VariableNameStep::new(name, i as u32, nb_indexes));
let index = self.index_mapper.index(&rtxn, name)?; let index = self.index_mapper.index(&rtxn, name)?;
let dst = temp_snapshot_dir.path().join("indexes").join(uuid.to_string()); let dst = temp_snapshot_dir.path().join("indexes").join(uuid.to_string());
fs::create_dir_all(&dst)?; fs::create_dir_all(&dst)?;
@ -698,6 +724,7 @@ impl IndexScheduler {
drop(rtxn); drop(rtxn);
// 4. Snapshot the auth LMDB env // 4. Snapshot the auth LMDB env
progress.update_progress(SnapshotCreationProgress::SnapshotTheApiKeys);
let dst = temp_snapshot_dir.path().join("auth"); let dst = temp_snapshot_dir.path().join("auth");
fs::create_dir_all(&dst)?; fs::create_dir_all(&dst)?;
// TODO We can't use the open_auth_store_env function here but we should // TODO We can't use the open_auth_store_env function here but we should
@ -710,6 +737,7 @@ impl IndexScheduler {
auth.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?; auth.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?;
// 5. Copy and tarball the flat snapshot // 5. Copy and tarball the flat snapshot
progress.update_progress(SnapshotCreationProgress::CreateTheTarball);
// 5.1 Find the original name of the database // 5.1 Find the original name of the database
// TODO find a better way to get this path // TODO find a better way to get this path
let mut base_path = self.env.path().to_owned(); let mut base_path = self.env.path().to_owned();
@ -742,6 +770,7 @@ impl IndexScheduler {
Ok(tasks) Ok(tasks)
} }
Batch::Dump(mut task) => { Batch::Dump(mut task) => {
progress.update_progress(DumpCreationProgress::StartTheDumpCreation);
let started_at = OffsetDateTime::now_utc(); let started_at = OffsetDateTime::now_utc();
let (keys, instance_uid) = let (keys, instance_uid) =
if let KindWithContent::DumpCreation { keys, instance_uid } = &task.kind { if let KindWithContent::DumpCreation { keys, instance_uid } = &task.kind {
@ -752,6 +781,7 @@ impl IndexScheduler {
let dump = dump::DumpWriter::new(*instance_uid)?; let dump = dump::DumpWriter::new(*instance_uid)?;
// 1. dump the keys // 1. dump the keys
progress.update_progress(DumpCreationProgress::DumpTheApiKeys);
let mut dump_keys = dump.create_keys()?; let mut dump_keys = dump.create_keys()?;
for key in keys { for key in keys {
dump_keys.push_key(key)?; dump_keys.push_key(key)?;
@ -761,7 +791,13 @@ impl IndexScheduler {
let rtxn = self.env.read_txn()?; let rtxn = self.env.read_txn()?;
// 2. dump the tasks // 2. dump the tasks
progress.update_progress(DumpCreationProgress::DumpTheTasks);
let mut dump_tasks = dump.create_tasks_queue()?; let mut dump_tasks = dump.create_tasks_queue()?;
let (atomic, update_task_progress) =
AtomicTaskStep::new(self.all_tasks.len(&rtxn)? as u32);
progress.update_progress(update_task_progress);
for ret in self.all_tasks.iter(&rtxn)? { for ret in self.all_tasks.iter(&rtxn)? {
if self.must_stop_processing.get() { if self.must_stop_processing.get() {
return Err(Error::AbortedTask); return Err(Error::AbortedTask);
@ -811,11 +847,22 @@ impl IndexScheduler {
dump_content_file.flush()?; dump_content_file.flush()?;
} }
} }
atomic.fetch_add(1, Ordering::Relaxed);
} }
dump_tasks.flush()?; dump_tasks.flush()?;
// 3. Dump the indexes // 3. Dump the indexes
progress.update_progress(DumpCreationProgress::DumpTheIndexes);
let nb_indexes = self.index_mapper.index_mapping.len(&rtxn)? as u32;
let mut count = 0;
self.index_mapper.try_for_each_index(&rtxn, |uid, index| -> Result<()> { self.index_mapper.try_for_each_index(&rtxn, |uid, index| -> Result<()> {
progress.update_progress(VariableNameStep::new(
uid.to_string(),
count,
nb_indexes,
));
count += 1;
let rtxn = index.read_txn()?; let rtxn = index.read_txn()?;
let metadata = IndexMetadata { let metadata = IndexMetadata {
uid: uid.to_owned(), uid: uid.to_owned(),
@ -835,6 +882,12 @@ impl IndexScheduler {
.embedding_configs(&rtxn) .embedding_configs(&rtxn)
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
let nb_documents = index
.number_of_documents(&rtxn)
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?
as u32;
let (atomic, update_document_progress) = AtomicDocumentStep::new(nb_documents);
progress.update_progress(update_document_progress);
let documents = index let documents = index
.all_documents(&rtxn) .all_documents(&rtxn)
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
@ -904,6 +957,7 @@ impl IndexScheduler {
} }
index_dumper.push_document(&document)?; index_dumper.push_document(&document)?;
atomic.fetch_add(1, Ordering::Relaxed);
} }
// 3.2. Dump the settings // 3.2. Dump the settings
@ -918,6 +972,7 @@ impl IndexScheduler {
})?; })?;
// 4. Dump experimental feature settings // 4. Dump experimental feature settings
progress.update_progress(DumpCreationProgress::DumpTheExperimentalFeatures);
let features = self.features().runtime_features(); let features = self.features().runtime_features();
dump.create_experimental_features(features)?; dump.create_experimental_features(features)?;
@ -928,6 +983,7 @@ impl IndexScheduler {
if self.must_stop_processing.get() { if self.must_stop_processing.get() {
return Err(Error::AbortedTask); return Err(Error::AbortedTask);
} }
progress.update_progress(DumpCreationProgress::CompressTheDump);
let path = self.dumps_path.join(format!("{}.dump", dump_uid)); let path = self.dumps_path.join(format!("{}.dump", dump_uid));
let file = File::create(path)?; let file = File::create(path)?;
dump.persist_to(BufWriter::new(file))?; dump.persist_to(BufWriter::new(file))?;
@ -953,7 +1009,7 @@ impl IndexScheduler {
.set_currently_updating_index(Some((index_uid.clone(), index.clone()))); .set_currently_updating_index(Some((index_uid.clone(), index.clone())));
let mut index_wtxn = index.write_txn()?; let mut index_wtxn = index.write_txn()?;
let tasks = self.apply_index_operation(&mut index_wtxn, &index, op)?; let tasks = self.apply_index_operation(&mut index_wtxn, &index, op, progress)?;
{ {
let span = tracing::trace_span!(target: "indexing::scheduler", "commit"); let span = tracing::trace_span!(target: "indexing::scheduler", "commit");
@ -987,6 +1043,8 @@ impl IndexScheduler {
Ok(tasks) Ok(tasks)
} }
Batch::IndexCreation { index_uid, primary_key, task } => { Batch::IndexCreation { index_uid, primary_key, task } => {
progress.update_progress(CreateIndexProgress::CreatingTheIndex);
let wtxn = self.env.write_txn()?; let wtxn = self.env.write_txn()?;
if self.index_mapper.exists(&wtxn, &index_uid)? { if self.index_mapper.exists(&wtxn, &index_uid)? {
return Err(Error::IndexAlreadyExists(index_uid)); return Err(Error::IndexAlreadyExists(index_uid));
@ -996,9 +1054,11 @@ impl IndexScheduler {
self.process_batch( self.process_batch(
Batch::IndexUpdate { index_uid, primary_key, task }, Batch::IndexUpdate { index_uid, primary_key, task },
current_batch, current_batch,
progress,
) )
} }
Batch::IndexUpdate { index_uid, primary_key, mut task } => { Batch::IndexUpdate { index_uid, primary_key, mut task } => {
progress.update_progress(UpdateIndexProgress::UpdatingTheIndex);
let rtxn = self.env.read_txn()?; let rtxn = self.env.read_txn()?;
let index = self.index_mapper.index(&rtxn, &index_uid)?; let index = self.index_mapper.index(&rtxn, &index_uid)?;
@ -1051,6 +1111,7 @@ impl IndexScheduler {
Ok(vec![task]) Ok(vec![task])
} }
Batch::IndexDeletion { index_uid, index_has_been_created, mut tasks } => { Batch::IndexDeletion { index_uid, index_has_been_created, mut tasks } => {
progress.update_progress(DeleteIndexProgress::DeletingTheIndex);
let wtxn = self.env.write_txn()?; let wtxn = self.env.write_txn()?;
// it's possible that the index doesn't exist // it's possible that the index doesn't exist
@ -1084,6 +1145,8 @@ impl IndexScheduler {
Ok(tasks) Ok(tasks)
} }
Batch::IndexSwap { mut task } => { Batch::IndexSwap { mut task } => {
progress.update_progress(SwappingTheIndexes::EnsuringCorrectnessOfTheSwap);
let mut wtxn = self.env.write_txn()?; let mut wtxn = self.env.write_txn()?;
let swaps = if let KindWithContent::IndexSwap { swaps } = &task.kind { let swaps = if let KindWithContent::IndexSwap { swaps } = &task.kind {
swaps swaps
@ -1110,8 +1173,20 @@ impl IndexScheduler {
)); ));
} }
} }
for swap in swaps { progress.update_progress(SwappingTheIndexes::SwappingTheIndexes);
self.apply_index_swap(&mut wtxn, task.uid, &swap.indexes.0, &swap.indexes.1)?; for (step, swap) in swaps.iter().enumerate() {
progress.update_progress(VariableNameStep::new(
format!("swapping index {} and {}", swap.indexes.0, swap.indexes.1),
step as u32,
swaps.len() as u32,
));
self.apply_index_swap(
&mut wtxn,
&progress,
task.uid,
&swap.indexes.0,
&swap.indexes.1,
)?;
} }
wtxn.commit()?; wtxn.commit()?;
task.status = Status::Succeeded; task.status = Status::Succeeded;
@ -1121,7 +1196,15 @@ impl IndexScheduler {
} }
/// Swap the index `lhs` with the index `rhs`. /// Swap the index `lhs` with the index `rhs`.
fn apply_index_swap(&self, wtxn: &mut RwTxn, task_id: u32, lhs: &str, rhs: &str) -> Result<()> { fn apply_index_swap(
&self,
wtxn: &mut RwTxn,
progress: &Progress,
task_id: u32,
lhs: &str,
rhs: &str,
) -> Result<()> {
progress.update_progress(InnerSwappingTwoIndexes::RetrieveTheTasks);
// 1. Verify that both lhs and rhs are existing indexes // 1. Verify that both lhs and rhs are existing indexes
let index_lhs_exists = self.index_mapper.index_exists(wtxn, lhs)?; let index_lhs_exists = self.index_mapper.index_exists(wtxn, lhs)?;
if !index_lhs_exists { if !index_lhs_exists {
@ -1139,14 +1222,21 @@ impl IndexScheduler {
index_rhs_task_ids.remove_range(task_id..); index_rhs_task_ids.remove_range(task_id..);
// 3. before_name -> new_name in the task's KindWithContent // 3. before_name -> new_name in the task's KindWithContent
for task_id in &index_lhs_task_ids | &index_rhs_task_ids { progress.update_progress(InnerSwappingTwoIndexes::UpdateTheTasks);
let tasks_to_update = &index_lhs_task_ids | &index_rhs_task_ids;
let (atomic, task_progress) = AtomicTaskStep::new(tasks_to_update.len() as u32);
progress.update_progress(task_progress);
for task_id in tasks_to_update {
let mut task = self.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?; let mut task = self.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
swap_index_uid_in_task(&mut task, (lhs, rhs)); swap_index_uid_in_task(&mut task, (lhs, rhs));
self.all_tasks.put(wtxn, &task_id, &task)?; self.all_tasks.put(wtxn, &task_id, &task)?;
atomic.fetch_add(1, Ordering::Relaxed);
} }
// 4. remove the task from indexuid = before_name // 4. remove the task from indexuid = before_name
// 5. add the task to indexuid = after_name // 5. add the task to indexuid = after_name
progress.update_progress(InnerSwappingTwoIndexes::UpdateTheIndexesMetadata);
self.update_index(wtxn, lhs, |lhs_tasks| { self.update_index(wtxn, lhs, |lhs_tasks| {
*lhs_tasks -= &index_lhs_task_ids; *lhs_tasks -= &index_lhs_task_ids;
*lhs_tasks |= &index_rhs_task_ids; *lhs_tasks |= &index_rhs_task_ids;
@ -1168,7 +1258,7 @@ impl IndexScheduler {
/// The list of processed tasks. /// The list of processed tasks.
#[tracing::instrument( #[tracing::instrument(
level = "trace", level = "trace",
skip(self, index_wtxn, index), skip(self, index_wtxn, index, progress),
target = "indexing::scheduler" target = "indexing::scheduler"
)] )]
fn apply_index_operation<'i>( fn apply_index_operation<'i>(
@ -1176,44 +1266,12 @@ impl IndexScheduler {
index_wtxn: &mut RwTxn<'i>, index_wtxn: &mut RwTxn<'i>,
index: &'i Index, index: &'i Index,
operation: IndexOperation, operation: IndexOperation,
progress: Progress,
) -> Result<Vec<Task>> { ) -> Result<Vec<Task>> {
let indexer_alloc = Bump::new(); let indexer_alloc = Bump::new();
let started_processing_at = std::time::Instant::now(); let started_processing_at = std::time::Instant::now();
let secs_since_started_processing_at = AtomicU64::new(0);
const PRINT_SECS_DELTA: u64 = 5;
let processing_tasks = self.processing_tasks.clone();
let must_stop_processing = self.must_stop_processing.clone(); let must_stop_processing = self.must_stop_processing.clone();
let send_progress = |progress| {
let now = std::time::Instant::now();
let elapsed = secs_since_started_processing_at.load(atomic::Ordering::Relaxed);
let previous = started_processing_at + Duration::from_secs(elapsed);
let elapsed = now - previous;
if elapsed.as_secs() < PRINT_SECS_DELTA {
return;
}
secs_since_started_processing_at
.store((now - started_processing_at).as_secs(), atomic::Ordering::Relaxed);
let TaskProgress {
current_step,
finished_steps,
total_steps,
finished_substeps,
total_substeps,
} = processing_tasks.write().unwrap().update_progress(progress);
tracing::info!(
current_step,
finished_steps,
total_steps,
finished_substeps,
total_substeps
);
};
match operation { match operation {
IndexOperation::DocumentClear { index_uid, mut tasks } => { IndexOperation::DocumentClear { index_uid, mut tasks } => {
@ -1245,6 +1303,7 @@ impl IndexScheduler {
operations, operations,
mut tasks, mut tasks,
} => { } => {
progress.update_progress(DocumentOperationProgress::RetrievingConfig);
// TODO: at some point, for better efficiency we might want to reuse the bumpalo for successive batches. // TODO: at some point, for better efficiency we might want to reuse the bumpalo for successive batches.
// this is made difficult by the fact we're doing private clones of the index scheduler and sending it // this is made difficult by the fact we're doing private clones of the index scheduler and sending it
// to a fresh thread. // to a fresh thread.
@ -1300,6 +1359,7 @@ impl IndexScheduler {
} }
}; };
progress.update_progress(DocumentOperationProgress::ComputingDocumentChanges);
let (document_changes, operation_stats, primary_key) = indexer let (document_changes, operation_stats, primary_key) = indexer
.into_changes( .into_changes(
&indexer_alloc, &indexer_alloc,
@ -1308,7 +1368,7 @@ impl IndexScheduler {
primary_key.as_deref(), primary_key.as_deref(),
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| must_stop_processing.get(), &|| must_stop_processing.get(),
&send_progress, progress.clone(),
) )
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?; .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
@ -1344,6 +1404,7 @@ impl IndexScheduler {
} }
} }
progress.update_progress(DocumentOperationProgress::Indexing);
if tasks.iter().any(|res| res.error.is_none()) { if tasks.iter().any(|res| res.error.is_none()) {
indexer::index( indexer::index(
index_wtxn, index_wtxn,
@ -1356,7 +1417,7 @@ impl IndexScheduler {
&document_changes, &document_changes,
embedders, embedders,
&|| must_stop_processing.get(), &|| must_stop_processing.get(),
&send_progress, &progress,
) )
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?; .map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
@ -1373,6 +1434,8 @@ impl IndexScheduler {
Ok(tasks) Ok(tasks)
} }
IndexOperation::DocumentEdition { index_uid, mut task } => { IndexOperation::DocumentEdition { index_uid, mut task } => {
progress.update_progress(DocumentEditionProgress::RetrievingConfig);
let (filter, code) = if let KindWithContent::DocumentEdition { let (filter, code) = if let KindWithContent::DocumentEdition {
filter_expr, filter_expr,
context: _, context: _,
@ -1446,6 +1509,7 @@ impl IndexScheduler {
}; };
let candidates_count = candidates.len(); let candidates_count = candidates.len();
progress.update_progress(DocumentEditionProgress::ComputingDocumentChanges);
let indexer = UpdateByFunction::new(candidates, context.clone(), code.clone()); let indexer = UpdateByFunction::new(candidates, context.clone(), code.clone());
let document_changes = pool let document_changes = pool
.install(|| { .install(|| {
@ -1459,6 +1523,7 @@ impl IndexScheduler {
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?; .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
let embedders = self.embedders(index_uid.clone(), embedders)?; let embedders = self.embedders(index_uid.clone(), embedders)?;
progress.update_progress(DocumentEditionProgress::Indexing);
indexer::index( indexer::index(
index_wtxn, index_wtxn,
index, index,
@ -1470,7 +1535,7 @@ impl IndexScheduler {
&document_changes, &document_changes,
embedders, embedders,
&|| must_stop_processing.get(), &|| must_stop_processing.get(),
&send_progress, &progress,
) )
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?; .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
@ -1511,6 +1576,8 @@ impl IndexScheduler {
Ok(vec![task]) Ok(vec![task])
} }
IndexOperation::DocumentDeletion { mut tasks, index_uid } => { IndexOperation::DocumentDeletion { mut tasks, index_uid } => {
progress.update_progress(DocumentDeletionProgress::RetrievingConfig);
let mut to_delete = RoaringBitmap::new(); let mut to_delete = RoaringBitmap::new();
let external_documents_ids = index.external_documents_ids(); let external_documents_ids = index.external_documents_ids();
@ -1601,6 +1668,7 @@ impl IndexScheduler {
} }
}; };
progress.update_progress(DocumentDeletionProgress::DeleteDocuments);
let mut indexer = indexer::DocumentDeletion::new(); let mut indexer = indexer::DocumentDeletion::new();
let candidates_count = to_delete.len(); let candidates_count = to_delete.len();
indexer.delete_documents_by_docids(to_delete); indexer.delete_documents_by_docids(to_delete);
@ -1610,6 +1678,7 @@ impl IndexScheduler {
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?; .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
let embedders = self.embedders(index_uid.clone(), embedders)?; let embedders = self.embedders(index_uid.clone(), embedders)?;
progress.update_progress(DocumentDeletionProgress::Indexing);
indexer::index( indexer::index(
index_wtxn, index_wtxn,
index, index,
@ -1621,7 +1690,7 @@ impl IndexScheduler {
&document_changes, &document_changes,
embedders, embedders,
&|| must_stop_processing.get(), &|| must_stop_processing.get(),
&send_progress, &progress,
) )
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?; .map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
@ -1638,6 +1707,7 @@ impl IndexScheduler {
Ok(tasks) Ok(tasks)
} }
IndexOperation::Settings { index_uid, settings, mut tasks } => { IndexOperation::Settings { index_uid, settings, mut tasks } => {
progress.update_progress(SettingsProgress::RetrievingAndMergingTheSettings);
let indexer_config = self.index_mapper.indexer_config(); let indexer_config = self.index_mapper.indexer_config();
let mut builder = milli::update::Settings::new(index_wtxn, index, indexer_config); let mut builder = milli::update::Settings::new(index_wtxn, index, indexer_config);
@ -1651,6 +1721,7 @@ impl IndexScheduler {
task.status = Status::Succeeded; task.status = Status::Succeeded;
} }
progress.update_progress(SettingsProgress::ApplyTheSettings);
builder builder
.execute( .execute(
|indexing_step| tracing::debug!(update = ?indexing_step), |indexing_step| tracing::debug!(update = ?indexing_step),
@ -1673,12 +1744,14 @@ impl IndexScheduler {
index_uid: index_uid.clone(), index_uid: index_uid.clone(),
tasks: cleared_tasks, tasks: cleared_tasks,
}, },
progress.clone(),
)?; )?;
let settings_tasks = self.apply_index_operation( let settings_tasks = self.apply_index_operation(
index_wtxn, index_wtxn,
index, index,
IndexOperation::Settings { index_uid, settings, tasks: settings_tasks }, IndexOperation::Settings { index_uid, settings, tasks: settings_tasks },
progress,
)?; )?;
let mut tasks = settings_tasks; let mut tasks = settings_tasks;
@ -1695,15 +1768,18 @@ impl IndexScheduler {
&self, &self,
wtxn: &mut RwTxn, wtxn: &mut RwTxn,
matched_tasks: &RoaringBitmap, matched_tasks: &RoaringBitmap,
progress: &Progress,
) -> Result<RoaringBitmap> { ) -> Result<RoaringBitmap> {
progress.update_progress(TaskDeletionProgress::DeletingTasksDateTime);
// 1. Remove from this list the tasks that we are not allowed to delete // 1. Remove from this list the tasks that we are not allowed to delete
let enqueued_tasks = self.get_status(wtxn, Status::Enqueued)?; let enqueued_tasks = self.get_status(wtxn, Status::Enqueued)?;
let processing_tasks = &self.processing_tasks.read().unwrap().processing.clone(); let processing_tasks = &self.processing_tasks.read().unwrap().processing.clone();
let all_task_ids = self.all_task_ids(wtxn)?; let all_task_ids = self.all_task_ids(wtxn)?;
let mut to_delete_tasks = all_task_ids & matched_tasks; let mut to_delete_tasks = all_task_ids & matched_tasks;
to_delete_tasks -= processing_tasks; to_delete_tasks -= &**processing_tasks;
to_delete_tasks -= enqueued_tasks; to_delete_tasks -= &enqueued_tasks;
// 2. We now have a list of tasks to delete, delete them // 2. We now have a list of tasks to delete, delete them
@ -1714,6 +1790,8 @@ impl IndexScheduler {
// The tasks that have been removed *per batches*. // The tasks that have been removed *per batches*.
let mut affected_batches: HashMap<BatchId, RoaringBitmap> = HashMap::new(); let mut affected_batches: HashMap<BatchId, RoaringBitmap> = HashMap::new();
let (atomic_progress, task_progress) = AtomicTaskStep::new(to_delete_tasks.len() as u32);
progress.update_progress(task_progress);
for task_id in to_delete_tasks.iter() { for task_id in to_delete_tasks.iter() {
let task = self.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?; let task = self.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
@ -1737,22 +1815,35 @@ impl IndexScheduler {
if let Some(batch_uid) = task.batch_uid { if let Some(batch_uid) = task.batch_uid {
affected_batches.entry(batch_uid).or_default().insert(task_id); affected_batches.entry(batch_uid).or_default().insert(task_id);
} }
atomic_progress.fetch_add(1, Ordering::Relaxed);
} }
progress.update_progress(TaskDeletionProgress::DeletingTasksMetadata);
let (atomic_progress, task_progress) = AtomicTaskStep::new(
(affected_indexes.len() + affected_statuses.len() + affected_kinds.len()) as u32,
);
progress.update_progress(task_progress);
for index in affected_indexes.iter() { for index in affected_indexes.iter() {
self.update_index(wtxn, index, |bitmap| *bitmap -= &to_delete_tasks)?; self.update_index(wtxn, index, |bitmap| *bitmap -= &to_delete_tasks)?;
atomic_progress.fetch_add(1, Ordering::Relaxed);
} }
for status in affected_statuses.iter() { for status in affected_statuses.iter() {
self.update_status(wtxn, *status, |bitmap| *bitmap -= &to_delete_tasks)?; self.update_status(wtxn, *status, |bitmap| *bitmap -= &to_delete_tasks)?;
atomic_progress.fetch_add(1, Ordering::Relaxed);
} }
for kind in affected_kinds.iter() { for kind in affected_kinds.iter() {
self.update_kind(wtxn, *kind, |bitmap| *bitmap -= &to_delete_tasks)?; self.update_kind(wtxn, *kind, |bitmap| *bitmap -= &to_delete_tasks)?;
atomic_progress.fetch_add(1, Ordering::Relaxed);
} }
progress.update_progress(TaskDeletionProgress::DeletingTasks);
let (atomic_progress, task_progress) = AtomicTaskStep::new(to_delete_tasks.len() as u32);
progress.update_progress(task_progress);
for task in to_delete_tasks.iter() { for task in to_delete_tasks.iter() {
self.all_tasks.delete(wtxn, &task)?; self.all_tasks.delete(wtxn, &task)?;
atomic_progress.fetch_add(1, Ordering::Relaxed);
} }
for canceled_by in affected_canceled_by { for canceled_by in affected_canceled_by {
if let Some(mut tasks) = self.canceled_by.get(wtxn, &canceled_by)? { if let Some(mut tasks) = self.canceled_by.get(wtxn, &canceled_by)? {
@ -1764,6 +1855,9 @@ impl IndexScheduler {
} }
} }
} }
progress.update_progress(TaskDeletionProgress::DeletingBatches);
let (atomic_progress, batch_progress) = AtomicBatchStep::new(affected_batches.len() as u32);
progress.update_progress(batch_progress);
for (batch_id, to_delete_tasks) in affected_batches { for (batch_id, to_delete_tasks) in affected_batches {
if let Some(mut tasks) = self.batch_to_tasks_mapping.get(wtxn, &batch_id)? { if let Some(mut tasks) = self.batch_to_tasks_mapping.get(wtxn, &batch_id)? {
tasks -= &to_delete_tasks; tasks -= &to_delete_tasks;
@ -1805,6 +1899,7 @@ impl IndexScheduler {
} }
} }
} }
atomic_progress.fetch_add(1, Ordering::Relaxed);
} }
Ok(to_delete_tasks) Ok(to_delete_tasks)
@ -1819,21 +1914,36 @@ impl IndexScheduler {
cancel_task_id: TaskId, cancel_task_id: TaskId,
current_batch: &mut ProcessingBatch, current_batch: &mut ProcessingBatch,
matched_tasks: &RoaringBitmap, matched_tasks: &RoaringBitmap,
progress: &Progress,
) -> Result<Vec<Task>> { ) -> Result<Vec<Task>> {
progress.update_progress(TaskCancelationProgress::RetrievingTasks);
// 1. Remove from this list the tasks that we are not allowed to cancel // 1. Remove from this list the tasks that we are not allowed to cancel
// Notice that only the _enqueued_ ones are cancelable and we should // Notice that only the _enqueued_ ones are cancelable and we should
// have already aborted the indexation of the _processing_ ones // have already aborted the indexation of the _processing_ ones
let cancelable_tasks = self.get_status(rtxn, Status::Enqueued)?; let cancelable_tasks = self.get_status(rtxn, Status::Enqueued)?;
let tasks_to_cancel = cancelable_tasks & matched_tasks; let tasks_to_cancel = cancelable_tasks & matched_tasks;
// 2. We now have a list of tasks to cancel, cancel them let (task_progress, progress_obj) = AtomicTaskStep::new(tasks_to_cancel.len() as u32);
let mut tasks = self.get_existing_tasks(rtxn, tasks_to_cancel.iter())?; progress.update_progress(progress_obj);
// 2. We now have a list of tasks to cancel, cancel them
let mut tasks = self.get_existing_tasks(
rtxn,
tasks_to_cancel.iter().inspect(|_| {
task_progress.fetch_add(1, Ordering::Relaxed);
}),
)?;
progress.update_progress(TaskCancelationProgress::UpdatingTasks);
let (task_progress, progress_obj) = AtomicTaskStep::new(tasks_to_cancel.len() as u32);
progress.update_progress(progress_obj);
for task in tasks.iter_mut() { for task in tasks.iter_mut() {
task.status = Status::Canceled; task.status = Status::Canceled;
task.canceled_by = Some(cancel_task_id); task.canceled_by = Some(cancel_task_id);
task.details = task.details.as_ref().map(|d| d.to_failed()); task.details = task.details.as_ref().map(|d| d.to_failed());
current_batch.processing(Some(task)); current_batch.processing(Some(task));
task_progress.fetch_add(1, Ordering::Relaxed);
} }
Ok(tasks) Ok(tasks)

View File

@ -3,10 +3,6 @@ use std::sync::{Arc, RwLock};
use std::time::Duration; use std::time::Duration;
use std::{fs, thread}; use std::{fs, thread};
use self::index_map::IndexMap;
use self::IndexStatus::{Available, BeingDeleted, Closing, Missing};
use crate::uuid_codec::UuidCodec;
use crate::{Error, Result};
use meilisearch_types::heed::types::{SerdeJson, Str}; use meilisearch_types::heed::types::{SerdeJson, Str};
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn}; use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
use meilisearch_types::milli; use meilisearch_types::milli;
@ -17,6 +13,11 @@ use time::OffsetDateTime;
use tracing::error; use tracing::error;
use uuid::Uuid; use uuid::Uuid;
use self::index_map::IndexMap;
use self::IndexStatus::{Available, BeingDeleted, Closing, Missing};
use crate::uuid_codec::UuidCodec;
use crate::{Error, Result};
mod index_map; mod index_map;
const INDEX_MAPPING: &str = "index-mapping"; const INDEX_MAPPING: &str = "index-mapping";

View File

@ -353,7 +353,7 @@ pub fn snapshot_canceled_by(rtxn: &RoTxn, db: Database<BEU32, RoaringBitmapCodec
pub fn snapshot_batch(batch: &Batch) -> String { pub fn snapshot_batch(batch: &Batch) -> String {
let mut snap = String::new(); let mut snap = String::new();
let Batch { uid, details, stats, started_at, finished_at } = batch; let Batch { uid, details, stats, started_at, finished_at, progress: _ } = batch;
if let Some(finished_at) = finished_at { if let Some(finished_at) = finished_at {
assert!(finished_at > started_at); assert!(finished_at > started_at);
} }

View File

@ -26,6 +26,7 @@ mod index_mapper;
#[cfg(test)] #[cfg(test)]
mod insta_snapshot; mod insta_snapshot;
mod lru; mod lru;
mod processing;
mod utils; mod utils;
pub mod uuid_codec; pub mod uuid_codec;
@ -56,12 +57,12 @@ use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str, I128};
use meilisearch_types::heed::{self, Database, Env, PutFlags, RoTxn, RwTxn}; use meilisearch_types::heed::{self, Database, Env, PutFlags, RoTxn, RwTxn};
use meilisearch_types::milli::documents::DocumentsBatchBuilder; use meilisearch_types::milli::documents::DocumentsBatchBuilder;
use meilisearch_types::milli::index::IndexEmbeddingConfig; use meilisearch_types::milli::index::IndexEmbeddingConfig;
use meilisearch_types::milli::update::new::indexer::document_changes::Progress;
use meilisearch_types::milli::update::IndexerConfig; use meilisearch_types::milli::update::IndexerConfig;
use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs}; use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs};
use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32}; use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
use meilisearch_types::task_view::TaskView; use meilisearch_types::task_view::TaskView;
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task, TaskProgress}; use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
use processing::ProcessingTasks;
use rayon::current_num_threads; use rayon::current_num_threads;
use rayon::prelude::{IntoParallelIterator, ParallelIterator}; use rayon::prelude::{IntoParallelIterator, ParallelIterator};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
@ -72,7 +73,8 @@ use utils::{filter_out_references_to_newer_tasks, keep_ids_within_datetimes, map
use uuid::Uuid; use uuid::Uuid;
use crate::index_mapper::IndexMapper; use crate::index_mapper::IndexMapper;
use crate::utils::{check_index_swap_validity, clamp_to_page_size, ProcessingBatch}; use crate::processing::{AtomicTaskStep, BatchProgress};
use crate::utils::{check_index_swap_validity, clamp_to_page_size};
pub(crate) type BEI128 = I128<BE>; pub(crate) type BEI128 = I128<BE>;
@ -163,48 +165,6 @@ impl Query {
} }
} }
#[derive(Debug, Clone)]
pub struct ProcessingTasks {
batch: Option<ProcessingBatch>,
/// The list of tasks ids that are currently running.
processing: RoaringBitmap,
/// The progress on processing tasks
progress: Option<TaskProgress>,
}
impl ProcessingTasks {
/// Creates an empty `ProcessingAt` struct.
fn new() -> ProcessingTasks {
ProcessingTasks { batch: None, processing: RoaringBitmap::new(), progress: None }
}
/// Stores the currently processing tasks, and the date time at which it started.
fn start_processing(&mut self, processing_batch: ProcessingBatch, processing: RoaringBitmap) {
self.batch = Some(processing_batch);
self.processing = processing;
}
fn update_progress(&mut self, progress: Progress) -> TaskProgress {
self.progress.get_or_insert_with(TaskProgress::default).update(progress)
}
/// Set the processing tasks to an empty list
fn stop_processing(&mut self) -> Self {
self.progress = None;
Self {
batch: std::mem::take(&mut self.batch),
processing: std::mem::take(&mut self.processing),
progress: None,
}
}
/// Returns `true` if there, at least, is one task that is currently processing that we must stop.
fn must_cancel_processing_tasks(&self, canceled_tasks: &RoaringBitmap) -> bool {
!self.processing.is_disjoint(canceled_tasks)
}
}
#[derive(Default, Clone, Debug)] #[derive(Default, Clone, Debug)]
struct MustStopProcessing(Arc<AtomicBool>); struct MustStopProcessing(Arc<AtomicBool>);
@ -813,7 +773,7 @@ impl IndexScheduler {
let mut batch_tasks = RoaringBitmap::new(); let mut batch_tasks = RoaringBitmap::new();
for batch_uid in batch_uids { for batch_uid in batch_uids {
if processing_batch.as_ref().map_or(false, |batch| batch.uid == *batch_uid) { if processing_batch.as_ref().map_or(false, |batch| batch.uid == *batch_uid) {
batch_tasks |= &processing_tasks; batch_tasks |= &*processing_tasks;
} else { } else {
batch_tasks |= self.tasks_in_batch(rtxn, *batch_uid)?; batch_tasks |= self.tasks_in_batch(rtxn, *batch_uid)?;
} }
@ -827,13 +787,13 @@ impl IndexScheduler {
match status { match status {
// special case for Processing tasks // special case for Processing tasks
Status::Processing => { Status::Processing => {
status_tasks |= &processing_tasks; status_tasks |= &*processing_tasks;
} }
status => status_tasks |= &self.get_status(rtxn, *status)?, status => status_tasks |= &self.get_status(rtxn, *status)?,
}; };
} }
if !status.contains(&Status::Processing) { if !status.contains(&Status::Processing) {
tasks -= &processing_tasks; tasks -= &*processing_tasks;
} }
tasks &= status_tasks; tasks &= status_tasks;
} }
@ -882,7 +842,7 @@ impl IndexScheduler {
// Once we have filtered the two subsets, we put them back together and assign it back to `tasks`. // Once we have filtered the two subsets, we put them back together and assign it back to `tasks`.
tasks = { tasks = {
let (mut filtered_non_processing_tasks, mut filtered_processing_tasks) = let (mut filtered_non_processing_tasks, mut filtered_processing_tasks) =
(&tasks - &processing_tasks, &tasks & &processing_tasks); (&tasks - &*processing_tasks, &tasks & &*processing_tasks);
// special case for Processing tasks // special case for Processing tasks
// A closure that clears the filtered_processing_tasks if their started_at date falls outside the given bounds // A closure that clears the filtered_processing_tasks if their started_at date falls outside the given bounds
@ -1090,7 +1050,7 @@ impl IndexScheduler {
// Once we have filtered the two subsets, we put them back together and assign it back to `batches`. // Once we have filtered the two subsets, we put them back together and assign it back to `batches`.
batches = { batches = {
let (mut filtered_non_processing_batches, mut filtered_processing_batches) = let (mut filtered_non_processing_batches, mut filtered_processing_batches) =
(&batches - &processing.processing, &batches & &processing.processing); (&batches - &*processing.processing, &batches & &*processing.processing);
// special case for Processing batches // special case for Processing batches
// A closure that clears the filtered_processing_batches if their started_at date falls outside the given bounds // A closure that clears the filtered_processing_batches if their started_at date falls outside the given bounds
@ -1606,7 +1566,8 @@ impl IndexScheduler {
// We reset the must_stop flag to be sure that we don't stop processing tasks // We reset the must_stop flag to be sure that we don't stop processing tasks
self.must_stop_processing.reset(); self.must_stop_processing.reset();
self.processing_tasks let progress = self
.processing_tasks
.write() .write()
.unwrap() .unwrap()
// We can clone the processing batch here because we don't want its modification to affect the view of the processing batches // We can clone the processing batch here because we don't want its modification to affect the view of the processing batches
@ -1619,11 +1580,12 @@ impl IndexScheduler {
let res = { let res = {
let cloned_index_scheduler = self.private_clone(); let cloned_index_scheduler = self.private_clone();
let processing_batch = &mut processing_batch; let processing_batch = &mut processing_batch;
let progress = progress.clone();
std::thread::scope(|s| { std::thread::scope(|s| {
let handle = std::thread::Builder::new() let handle = std::thread::Builder::new()
.name(String::from("batch-operation")) .name(String::from("batch-operation"))
.spawn_scoped(s, move || { .spawn_scoped(s, move || {
cloned_index_scheduler.process_batch(batch, processing_batch) cloned_index_scheduler.process_batch(batch, processing_batch, progress)
}) })
.unwrap(); .unwrap();
handle.join().unwrap_or(Err(Error::ProcessBatchPanicked)) handle.join().unwrap_or(Err(Error::ProcessBatchPanicked))
@ -1636,6 +1598,7 @@ impl IndexScheduler {
#[cfg(test)] #[cfg(test)]
self.maybe_fail(tests::FailureLocation::AcquiringWtxn)?; self.maybe_fail(tests::FailureLocation::AcquiringWtxn)?;
progress.update_progress(BatchProgress::WritingTasksToDisk);
processing_batch.finished(); processing_batch.finished();
let mut wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?; let mut wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?;
let mut canceled = RoaringBitmap::new(); let mut canceled = RoaringBitmap::new();
@ -1645,12 +1608,15 @@ impl IndexScheduler {
#[cfg(test)] #[cfg(test)]
self.breakpoint(Breakpoint::ProcessBatchSucceeded); self.breakpoint(Breakpoint::ProcessBatchSucceeded);
let (task_progress, task_progress_obj) = AtomicTaskStep::new(tasks.len() as u32);
progress.update_progress(task_progress_obj);
let mut success = 0; let mut success = 0;
let mut failure = 0; let mut failure = 0;
let mut canceled_by = None; let mut canceled_by = None;
#[allow(unused_variables)] #[allow(unused_variables)]
for (i, mut task) in tasks.into_iter().enumerate() { for (i, mut task) in tasks.into_iter().enumerate() {
task_progress.fetch_add(1, Ordering::Relaxed);
processing_batch.update(&mut task); processing_batch.update(&mut task);
if task.status == Status::Canceled { if task.status == Status::Canceled {
canceled.insert(task.uid); canceled.insert(task.uid);
@ -1718,8 +1684,12 @@ impl IndexScheduler {
Err(err) => { Err(err) => {
#[cfg(test)] #[cfg(test)]
self.breakpoint(Breakpoint::ProcessBatchFailed); self.breakpoint(Breakpoint::ProcessBatchFailed);
let (task_progress, task_progress_obj) = AtomicTaskStep::new(ids.len() as u32);
progress.update_progress(task_progress_obj);
let error: ResponseError = err.into(); let error: ResponseError = err.into();
for id in ids.iter() { for id in ids.iter() {
task_progress.fetch_add(1, Ordering::Relaxed);
let mut task = self let mut task = self
.get_task(&wtxn, id) .get_task(&wtxn, id)
.map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))? .map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?

View File

@ -0,0 +1,316 @@
use std::borrow::Cow;
use std::sync::Arc;
use enum_iterator::Sequence;
use meilisearch_types::milli::progress::{AtomicSubStep, NamedStep, Progress, ProgressView, Step};
use meilisearch_types::milli::{make_atomic_progress, make_enum_progress};
use roaring::RoaringBitmap;
use crate::utils::ProcessingBatch;
#[derive(Clone)]
pub struct ProcessingTasks {
pub batch: Option<Arc<ProcessingBatch>>,
/// The list of tasks ids that are currently running.
pub processing: Arc<RoaringBitmap>,
/// The progress on processing tasks
pub progress: Option<Progress>,
}
impl ProcessingTasks {
/// Creates an empty `ProcessingAt` struct.
pub fn new() -> ProcessingTasks {
ProcessingTasks { batch: None, processing: Arc::new(RoaringBitmap::new()), progress: None }
}
pub fn get_progress_view(&self) -> Option<ProgressView> {
Some(self.progress.as_ref()?.as_progress_view())
}
/// Stores the currently processing tasks, and the date time at which it started.
pub fn start_processing(
&mut self,
processing_batch: ProcessingBatch,
processing: RoaringBitmap,
) -> Progress {
self.batch = Some(Arc::new(processing_batch));
self.processing = Arc::new(processing);
let progress = Progress::default();
progress.update_progress(BatchProgress::ProcessingTasks);
self.progress = Some(progress.clone());
progress
}
/// Set the processing tasks to an empty list
pub fn stop_processing(&mut self) -> Self {
self.progress = None;
Self {
batch: std::mem::take(&mut self.batch),
processing: std::mem::take(&mut self.processing),
progress: None,
}
}
/// Returns `true` if there, at least, is one task that is currently processing that we must stop.
pub fn must_cancel_processing_tasks(&self, canceled_tasks: &RoaringBitmap) -> bool {
!self.processing.is_disjoint(canceled_tasks)
}
}
make_enum_progress! {
pub enum BatchProgress {
ProcessingTasks,
WritingTasksToDisk,
}
}
make_enum_progress! {
pub enum TaskCancelationProgress {
RetrievingTasks,
UpdatingTasks,
}
}
make_enum_progress! {
pub enum TaskDeletionProgress {
DeletingTasksDateTime,
DeletingTasksMetadata,
DeletingTasks,
DeletingBatches,
}
}
make_enum_progress! {
pub enum SnapshotCreationProgress {
StartTheSnapshotCreation,
SnapshotTheIndexScheduler,
SnapshotTheUpdateFiles,
SnapshotTheIndexes,
SnapshotTheApiKeys,
CreateTheTarball,
}
}
make_enum_progress! {
pub enum DumpCreationProgress {
StartTheDumpCreation,
DumpTheApiKeys,
DumpTheTasks,
DumpTheIndexes,
DumpTheExperimentalFeatures,
CompressTheDump,
}
}
make_enum_progress! {
pub enum CreateIndexProgress {
CreatingTheIndex,
}
}
make_enum_progress! {
pub enum UpdateIndexProgress {
UpdatingTheIndex,
}
}
make_enum_progress! {
pub enum DeleteIndexProgress {
DeletingTheIndex,
}
}
make_enum_progress! {
pub enum SwappingTheIndexes {
EnsuringCorrectnessOfTheSwap,
SwappingTheIndexes,
}
}
make_enum_progress! {
pub enum InnerSwappingTwoIndexes {
RetrieveTheTasks,
UpdateTheTasks,
UpdateTheIndexesMetadata,
}
}
make_enum_progress! {
pub enum DocumentOperationProgress {
RetrievingConfig,
ComputingDocumentChanges,
Indexing,
}
}
make_enum_progress! {
pub enum DocumentEditionProgress {
RetrievingConfig,
ComputingDocumentChanges,
Indexing,
}
}
make_enum_progress! {
pub enum DocumentDeletionProgress {
RetrievingConfig,
DeleteDocuments,
Indexing,
}
}
make_enum_progress! {
pub enum SettingsProgress {
RetrievingAndMergingTheSettings,
ApplyTheSettings,
}
}
make_atomic_progress!(Task alias AtomicTaskStep => "task" );
make_atomic_progress!(Document alias AtomicDocumentStep => "document" );
make_atomic_progress!(Batch alias AtomicBatchStep => "batch" );
make_atomic_progress!(UpdateFile alias AtomicUpdateFileStep => "update file" );
pub struct VariableNameStep {
name: String,
current: u32,
total: u32,
}
impl VariableNameStep {
pub fn new(name: impl Into<String>, current: u32, total: u32) -> Self {
Self { name: name.into(), current, total }
}
}
impl Step for VariableNameStep {
fn name(&self) -> Cow<'static, str> {
self.name.clone().into()
}
fn current(&self) -> u32 {
self.current
}
fn total(&self) -> u32 {
self.total
}
}
#[cfg(test)]
mod test {
use std::sync::atomic::Ordering;
use meili_snap::{json_string, snapshot};
use super::*;
#[test]
fn one_level() {
let mut processing = ProcessingTasks::new();
processing.start_processing(ProcessingBatch::new(0), RoaringBitmap::new());
snapshot!(json_string!(processing.get_progress_view()), @r#"
{
"steps": [
{
"currentStep": "processing tasks",
"finished": 0,
"total": 2
}
],
"percentage": 0.0
}
"#);
processing.progress.as_ref().unwrap().update_progress(BatchProgress::WritingTasksToDisk);
snapshot!(json_string!(processing.get_progress_view()), @r#"
{
"steps": [
{
"currentStep": "writing tasks to disk",
"finished": 1,
"total": 2
}
],
"percentage": 50.0
}
"#);
}
#[test]
fn task_progress() {
let mut processing = ProcessingTasks::new();
processing.start_processing(ProcessingBatch::new(0), RoaringBitmap::new());
let (atomic, tasks) = AtomicTaskStep::new(10);
processing.progress.as_ref().unwrap().update_progress(tasks);
snapshot!(json_string!(processing.get_progress_view()), @r#"
{
"steps": [
{
"currentStep": "processing tasks",
"finished": 0,
"total": 2
},
{
"currentStep": "task",
"finished": 0,
"total": 10
}
],
"percentage": 0.0
}
"#);
atomic.fetch_add(6, Ordering::Relaxed);
snapshot!(json_string!(processing.get_progress_view()), @r#"
{
"steps": [
{
"currentStep": "processing tasks",
"finished": 0,
"total": 2
},
{
"currentStep": "task",
"finished": 6,
"total": 10
}
],
"percentage": 30.000002
}
"#);
processing.progress.as_ref().unwrap().update_progress(BatchProgress::WritingTasksToDisk);
snapshot!(json_string!(processing.get_progress_view()), @r#"
{
"steps": [
{
"currentStep": "writing tasks to disk",
"finished": 1,
"total": 2
}
],
"percentage": 50.0
}
"#);
let (atomic, tasks) = AtomicTaskStep::new(5);
processing.progress.as_ref().unwrap().update_progress(tasks);
atomic.fetch_add(4, Ordering::Relaxed);
snapshot!(json_string!(processing.get_progress_view()), @r#"
{
"steps": [
{
"currentStep": "writing tasks to disk",
"finished": 1,
"total": 2
},
{
"currentStep": "task",
"finished": 4,
"total": 5
}
],
"percentage": 90.0
}
"#);
}
}

View File

@ -134,6 +134,7 @@ impl ProcessingBatch {
pub fn to_batch(&self) -> Batch { pub fn to_batch(&self) -> Batch {
Batch { Batch {
uid: self.uid, uid: self.uid,
progress: None,
details: self.details.clone(), details: self.details.clone(),
stats: self.stats.clone(), stats: self.stats.clone(),
started_at: self.started_at, started_at: self.started_at,
@ -187,6 +188,7 @@ impl IndexScheduler {
&batch.uid, &batch.uid,
&Batch { &Batch {
uid: batch.uid, uid: batch.uid,
progress: None,
details: batch.details, details: batch.details,
stats: batch.stats, stats: batch.stats,
started_at: batch.started_at, started_at: batch.started_at,
@ -273,7 +275,9 @@ impl IndexScheduler {
.into_iter() .into_iter()
.map(|batch_id| { .map(|batch_id| {
if Some(batch_id) == processing.batch.as_ref().map(|batch| batch.uid) { if Some(batch_id) == processing.batch.as_ref().map(|batch| batch.uid) {
Ok(processing.batch.as_ref().unwrap().to_batch()) let mut batch = processing.batch.as_ref().unwrap().to_batch();
batch.progress = processing.get_progress_view();
Ok(batch)
} else { } else {
self.get_batch(rtxn, batch_id) self.get_batch(rtxn, batch_id)
.and_then(|task| task.ok_or(Error::CorruptedTaskQueue)) .and_then(|task| task.ok_or(Error::CorruptedTaskQueue))

View File

@ -1,16 +1,16 @@
use milli::progress::ProgressView;
use serde::Serialize; use serde::Serialize;
use time::{Duration, OffsetDateTime}; use time::{Duration, OffsetDateTime};
use crate::{ use crate::batches::{Batch, BatchId, BatchStats};
batches::{Batch, BatchId, BatchStats}, use crate::task_view::DetailsView;
task_view::DetailsView, use crate::tasks::serialize_duration;
tasks::serialize_duration,
};
#[derive(Debug, Clone, Serialize)] #[derive(Debug, Clone, Serialize)]
#[serde(rename_all = "camelCase")] #[serde(rename_all = "camelCase")]
pub struct BatchView { pub struct BatchView {
pub uid: BatchId, pub uid: BatchId,
pub progress: Option<ProgressView>,
pub details: DetailsView, pub details: DetailsView,
pub stats: BatchStats, pub stats: BatchStats,
#[serde(serialize_with = "serialize_duration", default)] #[serde(serialize_with = "serialize_duration", default)]
@ -25,6 +25,7 @@ impl BatchView {
pub fn from_batch(batch: &Batch) -> Self { pub fn from_batch(batch: &Batch) -> Self {
Self { Self {
uid: batch.uid, uid: batch.uid,
progress: batch.progress.clone(),
details: batch.details.clone(), details: batch.details.clone(),
stats: batch.stats.clone(), stats: batch.stats.clone(),
duration: batch.finished_at.map(|finished_at| finished_at - batch.started_at), duration: batch.finished_at.map(|finished_at| finished_at - batch.started_at),

View File

@ -1,12 +1,11 @@
use std::collections::BTreeMap; use std::collections::BTreeMap;
use milli::progress::ProgressView;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use time::OffsetDateTime; use time::OffsetDateTime;
use crate::{ use crate::task_view::DetailsView;
task_view::DetailsView, use crate::tasks::{Kind, Status};
tasks::{Kind, Status},
};
pub type BatchId = u32; pub type BatchId = u32;
@ -15,6 +14,8 @@ pub type BatchId = u32;
pub struct Batch { pub struct Batch {
pub uid: BatchId, pub uid: BatchId,
#[serde(skip)]
pub progress: Option<ProgressView>,
pub details: DetailsView, pub details: DetailsView,
pub stats: BatchStats, pub stats: BatchStats,

View File

@ -4,7 +4,6 @@ use std::fmt::{Display, Write};
use std::str::FromStr; use std::str::FromStr;
use enum_iterator::Sequence; use enum_iterator::Sequence;
use milli::update::new::indexer::document_changes::Progress;
use milli::update::IndexDocumentsMethod; use milli::update::IndexDocumentsMethod;
use milli::Object; use milli::Object;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
@ -41,62 +40,6 @@ pub struct Task {
pub kind: KindWithContent, pub kind: KindWithContent,
} }
#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct TaskProgress {
pub current_step: &'static str,
pub finished_steps: u16,
pub total_steps: u16,
pub finished_substeps: Option<u32>,
pub total_substeps: Option<u32>,
}
impl Default for TaskProgress {
fn default() -> Self {
Self::new()
}
}
impl TaskProgress {
pub fn new() -> Self {
Self {
current_step: "start",
finished_steps: 0,
total_steps: 1,
finished_substeps: None,
total_substeps: None,
}
}
pub fn update(&mut self, progress: Progress) -> TaskProgress {
if self.finished_steps > progress.finished_steps {
return *self;
}
if self.current_step != progress.step_name {
self.current_step = progress.step_name
}
self.total_steps = progress.total_steps;
if self.finished_steps < progress.finished_steps {
self.finished_substeps = None;
self.total_substeps = None;
}
self.finished_steps = progress.finished_steps;
if let Some((finished_substeps, total_substeps)) = progress.finished_total_substep {
if let Some(task_finished_substeps) = self.finished_substeps {
if task_finished_substeps > finished_substeps {
return *self;
}
}
self.finished_substeps = Some(finished_substeps);
self.total_substeps = Some(total_substeps);
}
*self
}
}
impl Task { impl Task {
pub fn index_uid(&self) -> Option<&str> { pub fn index_uid(&self) -> Option<&str> {
use KindWithContent::*; use KindWithContent::*;

View File

@ -1,18 +1,18 @@
use actix_web::{ use actix_web::web::{self, Data};
web::{self, Data}, use actix_web::HttpResponse;
HttpResponse,
};
use deserr::actix_web::AwebQueryParameter; use deserr::actix_web::AwebQueryParameter;
use index_scheduler::{IndexScheduler, Query}; use index_scheduler::{IndexScheduler, Query};
use meilisearch_types::{ use meilisearch_types::batch_view::BatchView;
batch_view::BatchView, batches::BatchId, deserr::DeserrQueryParamError, error::ResponseError, use meilisearch_types::batches::BatchId;
keys::actions, use meilisearch_types::deserr::DeserrQueryParamError;
}; use meilisearch_types::error::ResponseError;
use meilisearch_types::keys::actions;
use serde::Serialize; use serde::Serialize;
use crate::extractors::{authentication::GuardedData, sequential_extractor::SeqHandler}; use super::tasks::TasksFilterQuery;
use super::ActionPolicy;
use super::{tasks::TasksFilterQuery, ActionPolicy}; use crate::extractors::authentication::GuardedData;
use crate::extractors::sequential_extractor::SeqHandler;
pub fn configure(cfg: &mut web::ServiceConfig) { pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(web::resource("").route(web::get().to(SeqHandler(get_batches)))) cfg.service(web::resource("").route(web::get().to(SeqHandler(get_batches))))

View File

@ -284,6 +284,7 @@ async fn test_summarized_document_addition_or_update() {
@r#" @r#"
{ {
"uid": 0, "uid": 0,
"progress": null,
"details": { "details": {
"receivedDocuments": 1, "receivedDocuments": 1,
"indexedDocuments": 1 "indexedDocuments": 1
@ -314,6 +315,7 @@ async fn test_summarized_document_addition_or_update() {
@r#" @r#"
{ {
"uid": 1, "uid": 1,
"progress": null,
"details": { "details": {
"receivedDocuments": 1, "receivedDocuments": 1,
"indexedDocuments": 1 "indexedDocuments": 1
@ -349,6 +351,7 @@ async fn test_summarized_delete_documents_by_batch() {
@r#" @r#"
{ {
"uid": 0, "uid": 0,
"progress": null,
"details": { "details": {
"providedIds": 3, "providedIds": 3,
"deletedDocuments": 0 "deletedDocuments": 0
@ -380,6 +383,7 @@ async fn test_summarized_delete_documents_by_batch() {
@r#" @r#"
{ {
"uid": 2, "uid": 2,
"progress": null,
"details": { "details": {
"providedIds": 1, "providedIds": 1,
"deletedDocuments": 0 "deletedDocuments": 0
@ -416,6 +420,7 @@ async fn test_summarized_delete_documents_by_filter() {
@r#" @r#"
{ {
"uid": 0, "uid": 0,
"progress": null,
"details": { "details": {
"providedIds": 0, "providedIds": 0,
"deletedDocuments": 0, "deletedDocuments": 0,
@ -448,6 +453,7 @@ async fn test_summarized_delete_documents_by_filter() {
@r#" @r#"
{ {
"uid": 2, "uid": 2,
"progress": null,
"details": { "details": {
"providedIds": 0, "providedIds": 0,
"deletedDocuments": 0, "deletedDocuments": 0,
@ -480,6 +486,7 @@ async fn test_summarized_delete_documents_by_filter() {
@r#" @r#"
{ {
"uid": 4, "uid": 4,
"progress": null,
"details": { "details": {
"providedIds": 0, "providedIds": 0,
"deletedDocuments": 0, "deletedDocuments": 0,
@ -516,6 +523,7 @@ async fn test_summarized_delete_document_by_id() {
@r#" @r#"
{ {
"uid": 0, "uid": 0,
"progress": null,
"details": { "details": {
"providedIds": 1, "providedIds": 1,
"deletedDocuments": 0 "deletedDocuments": 0
@ -547,6 +555,7 @@ async fn test_summarized_delete_document_by_id() {
@r#" @r#"
{ {
"uid": 2, "uid": 2,
"progress": null,
"details": { "details": {
"providedIds": 1, "providedIds": 1,
"deletedDocuments": 0 "deletedDocuments": 0
@ -594,6 +603,7 @@ async fn test_summarized_settings_update() {
@r#" @r#"
{ {
"uid": 0, "uid": 0,
"progress": null,
"details": { "details": {
"displayedAttributes": [ "displayedAttributes": [
"doggos", "doggos",
@ -638,6 +648,7 @@ async fn test_summarized_index_creation() {
@r#" @r#"
{ {
"uid": 0, "uid": 0,
"progress": null,
"details": {}, "details": {},
"stats": { "stats": {
"totalNbTasks": 1, "totalNbTasks": 1,
@ -665,6 +676,7 @@ async fn test_summarized_index_creation() {
@r#" @r#"
{ {
"uid": 1, "uid": 1,
"progress": null,
"details": { "details": {
"primaryKey": "doggos" "primaryKey": "doggos"
}, },
@ -809,6 +821,7 @@ async fn test_summarized_index_update() {
@r#" @r#"
{ {
"uid": 0, "uid": 0,
"progress": null,
"details": {}, "details": {},
"stats": { "stats": {
"totalNbTasks": 1, "totalNbTasks": 1,
@ -836,6 +849,7 @@ async fn test_summarized_index_update() {
@r#" @r#"
{ {
"uid": 1, "uid": 1,
"progress": null,
"details": { "details": {
"primaryKey": "bones" "primaryKey": "bones"
}, },
@ -868,6 +882,7 @@ async fn test_summarized_index_update() {
@r#" @r#"
{ {
"uid": 3, "uid": 3,
"progress": null,
"details": {}, "details": {},
"stats": { "stats": {
"totalNbTasks": 1, "totalNbTasks": 1,
@ -895,6 +910,7 @@ async fn test_summarized_index_update() {
@r#" @r#"
{ {
"uid": 4, "uid": 4,
"progress": null,
"details": { "details": {
"primaryKey": "bones" "primaryKey": "bones"
}, },
@ -932,6 +948,7 @@ async fn test_summarized_index_swap() {
@r#" @r#"
{ {
"uid": 0, "uid": 0,
"progress": null,
"details": { "details": {
"swaps": [ "swaps": [
{ {
@ -972,6 +989,7 @@ async fn test_summarized_index_swap() {
@r#" @r#"
{ {
"uid": 3, "uid": 3,
"progress": null,
"details": { "details": {
"swaps": [ "swaps": [
{ {
@ -1014,6 +1032,7 @@ async fn test_summarized_batch_cancelation() {
@r#" @r#"
{ {
"uid": 1, "uid": 1,
"progress": null,
"details": { "details": {
"matchedTasks": 1, "matchedTasks": 1,
"canceledTasks": 0, "canceledTasks": 0,
@ -1051,6 +1070,7 @@ async fn test_summarized_batch_deletion() {
@r#" @r#"
{ {
"uid": 1, "uid": 1,
"progress": null,
"details": { "details": {
"matchedTasks": 1, "matchedTasks": 1,
"deletedTasks": 1, "deletedTasks": 1,
@ -1084,6 +1104,7 @@ async fn test_summarized_dump_creation() {
@r#" @r#"
{ {
"uid": 0, "uid": 0,
"progress": null,
"details": { "details": {
"dumpUid": "[dumpUid]" "dumpUid": "[dumpUid]"
}, },

View File

@ -7,7 +7,6 @@ use std::path::{Path, PathBuf};
use anyhow::{bail, Context}; use anyhow::{bail, Context};
use meilisearch_types::versioning::create_version_file; use meilisearch_types::versioning::create_version_file;
use v1_10::v1_9_to_v1_10; use v1_10::v1_9_to_v1_10;
use v1_12::v1_11_to_v1_12; use v1_12::v1_11_to_v1_12;

View File

@ -1,18 +1,13 @@
use anyhow::bail;
use std::path::Path; use std::path::Path;
use anyhow::Context; use anyhow::{bail, Context};
use meilisearch_types::{ use meilisearch_types::heed::types::{SerdeJson, Str};
heed::{ use meilisearch_types::heed::{Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified};
types::{SerdeJson, Str}, use meilisearch_types::milli::index::{db_name, main_key};
Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified,
},
milli::index::{db_name, main_key},
};
use crate::{try_opening_database, try_opening_poly_database, uuid_codec::UuidCodec};
use super::v1_9; use super::v1_9;
use crate::uuid_codec::UuidCodec;
use crate::{try_opening_database, try_opening_poly_database};
pub type FieldDistribution = std::collections::BTreeMap<String, u64>; pub type FieldDistribution = std::collections::BTreeMap<String, u64>;

View File

@ -7,12 +7,12 @@
use std::path::Path; use std::path::Path;
use anyhow::Context; use anyhow::Context;
use meilisearch_types::{ use meilisearch_types::heed::types::Str;
heed::{types::Str, Database, EnvOpenOptions}, use meilisearch_types::heed::{Database, EnvOpenOptions};
milli::index::db_name, use meilisearch_types::milli::index::db_name;
};
use crate::{try_opening_database, try_opening_poly_database, uuid_codec::UuidCodec}; use crate::uuid_codec::UuidCodec;
use crate::{try_opening_database, try_opening_poly_database};
pub fn v1_10_to_v1_11(db_path: &Path) -> anyhow::Result<()> { pub fn v1_10_to_v1_11(db_path: &Path) -> anyhow::Result<()> {
println!("Upgrading from v1.10.0 to v1.11.0"); println!("Upgrading from v1.10.0 to v1.11.0");

View File

@ -1,7 +1,8 @@
//! The breaking changes that happened between the v1.11 and the v1.12 are: //! The breaking changes that happened between the v1.11 and the v1.12 are:
//! - The new indexer changed the update files format from OBKV to ndjson. https://github.com/meilisearch/meilisearch/pull/4900 //! - The new indexer changed the update files format from OBKV to ndjson. https://github.com/meilisearch/meilisearch/pull/4900
use std::{io::BufWriter, path::Path}; use std::io::BufWriter;
use std::path::Path;
use anyhow::Context; use anyhow::Context;
use file_store::FileStore; use file_store::FileStore;

View File

@ -1734,6 +1734,7 @@ pub(crate) mod tests {
use crate::error::{Error, InternalError}; use crate::error::{Error, InternalError};
use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS}; use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS};
use crate::progress::Progress;
use crate::update::new::indexer; use crate::update::new::indexer;
use crate::update::settings::InnerIndexSettings; use crate::update::settings::InnerIndexSettings;
use crate::update::{ use crate::update::{
@ -1810,7 +1811,7 @@ pub(crate) mod tests {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
)?; )?;
if let Some(error) = operation_stats.into_iter().find_map(|stat| stat.error) { if let Some(error) = operation_stats.into_iter().find_map(|stat| stat.error) {
@ -1829,7 +1830,7 @@ pub(crate) mod tests {
&document_changes, &document_changes,
embedders, embedders,
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
}) })
.unwrap()?; .unwrap()?;
@ -1901,7 +1902,7 @@ pub(crate) mod tests {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
)?; )?;
if let Some(error) = operation_stats.into_iter().find_map(|stat| stat.error) { if let Some(error) = operation_stats.into_iter().find_map(|stat| stat.error) {
@ -1920,7 +1921,7 @@ pub(crate) mod tests {
&document_changes, &document_changes,
embedders, embedders,
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
}) })
.unwrap()?; .unwrap()?;
@ -1982,7 +1983,7 @@ pub(crate) mod tests {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -2001,7 +2002,7 @@ pub(crate) mod tests {
&document_changes, &document_changes,
embedders, embedders,
&|| should_abort.load(Relaxed), &|| should_abort.load(Relaxed),
&|_| (), &Progress::default(),
) )
}) })
.unwrap() .unwrap()

View File

@ -31,6 +31,7 @@ pub mod vector;
#[macro_use] #[macro_use]
pub mod snapshot_tests; pub mod snapshot_tests;
mod fieldids_weights_map; mod fieldids_weights_map;
pub mod progress;
use std::collections::{BTreeMap, HashMap}; use std::collections::{BTreeMap, HashMap};
use std::convert::{TryFrom, TryInto}; use std::convert::{TryFrom, TryInto};

View File

@ -0,0 +1,152 @@
use std::any::TypeId;
use std::borrow::Cow;
use std::sync::atomic::{AtomicU32, Ordering};
use std::sync::{Arc, RwLock};
use serde::Serialize;
pub trait Step: 'static + Send + Sync {
fn name(&self) -> Cow<'static, str>;
fn current(&self) -> u32;
fn total(&self) -> u32;
}
#[derive(Clone, Default)]
pub struct Progress {
steps: Arc<RwLock<Vec<(TypeId, Box<dyn Step>)>>>,
}
impl Progress {
pub fn update_progress<P: Step>(&self, sub_progress: P) {
let mut steps = self.steps.write().unwrap();
let step_type = TypeId::of::<P>();
if let Some(idx) = steps.iter().position(|(id, _)| *id == step_type) {
steps.truncate(idx);
}
steps.push((step_type, Box::new(sub_progress)));
}
// TODO: This code should be in meilisearch_types but cannot because milli can't depend on meilisearch_types
pub fn as_progress_view(&self) -> ProgressView {
let steps = self.steps.read().unwrap();
let mut percentage = 0.0;
let mut prev_factors = 1.0;
let mut step_view = Vec::with_capacity(steps.len());
for (_, step) in steps.iter() {
prev_factors *= step.total() as f32;
percentage += step.current() as f32 / prev_factors;
step_view.push(ProgressStepView {
current_step: step.name(),
finished: step.current(),
total: step.total(),
});
}
ProgressView { steps: step_view, percentage: percentage * 100.0 }
}
}
/// This trait lets you use the AtomicSubStep defined right below.
/// The name must be a const that never changed but that can't be enforced by the type system because it make the trait non object-safe.
/// By forcing the Default trait + the &'static str we make it harder to miss-use the trait.
pub trait NamedStep: 'static + Send + Sync + Default {
fn name(&self) -> &'static str;
}
/// Structure to quickly define steps that need very quick, lockless updating of their current step.
/// You can use this struct if:
/// - The name of the step doesn't change
/// - The total number of steps doesn't change
pub struct AtomicSubStep<Name: NamedStep> {
unit_name: Name,
current: Arc<AtomicU32>,
total: u32,
}
impl<Name: NamedStep> AtomicSubStep<Name> {
pub fn new(total: u32) -> (Arc<AtomicU32>, Self) {
let current = Arc::new(AtomicU32::new(0));
(current.clone(), Self { current, total, unit_name: Name::default() })
}
}
impl<Name: NamedStep> Step for AtomicSubStep<Name> {
fn name(&self) -> Cow<'static, str> {
self.unit_name.name().into()
}
fn current(&self) -> u32 {
self.current.load(Ordering::Relaxed)
}
fn total(&self) -> u32 {
self.total
}
}
#[macro_export]
macro_rules! make_enum_progress {
($visibility:vis enum $name:ident { $($variant:ident,)+ }) => {
#[repr(u8)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, Sequence)]
#[allow(clippy::enum_variant_names)]
$visibility enum $name {
$($variant),+
}
impl Step for $name {
fn name(&self) -> Cow<'static, str> {
use convert_case::Casing;
match self {
$(
$name::$variant => stringify!($variant).from_case(convert_case::Case::Camel).to_case(convert_case::Case::Lower).into()
),+
}
}
fn current(&self) -> u32 {
*self as u32
}
fn total(&self) -> u32 {
Self::CARDINALITY as u32
}
}
};
}
#[macro_export]
macro_rules! make_atomic_progress {
($struct_name:ident alias $atomic_struct_name:ident => $step_name:literal) => {
#[derive(Default, Debug, Clone, Copy)]
pub struct $struct_name {}
impl NamedStep for $struct_name {
fn name(&self) -> &'static str {
$step_name
}
}
pub type $atomic_struct_name = AtomicSubStep<$struct_name>;
};
}
make_atomic_progress!(Document alias AtomicDocumentStep => "document" );
make_atomic_progress!(Payload alias AtomicPayloadStep => "payload" );
#[derive(Debug, Serialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct ProgressView {
pub steps: Vec<ProgressStepView>,
pub percentage: f32,
}
#[derive(Debug, Serialize, Clone)]
#[serde(rename_all = "camelCase")]
pub struct ProgressStepView {
pub current_step: Cow<'static, str>,
pub finished: u32,
pub total: u32,
}

View File

@ -5,6 +5,7 @@ use bumpalo::Bump;
use heed::EnvOpenOptions; use heed::EnvOpenOptions;
use maplit::{btreemap, hashset}; use maplit::{btreemap, hashset};
use crate::progress::Progress;
use crate::update::new::indexer; use crate::update::new::indexer;
use crate::update::{IndexDocumentsMethod, IndexerConfig, Settings}; use crate::update::{IndexDocumentsMethod, IndexerConfig, Settings};
use crate::vector::EmbeddingConfigs; use crate::vector::EmbeddingConfigs;
@ -72,7 +73,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -91,7 +92,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
&document_changes, &document_changes,
embedders, embedders,
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();

View File

@ -766,6 +766,7 @@ mod tests {
use crate::documents::mmap_from_objects; use crate::documents::mmap_from_objects;
use crate::index::tests::TempIndex; use crate::index::tests::TempIndex;
use crate::index::IndexEmbeddingConfig; use crate::index::IndexEmbeddingConfig;
use crate::progress::Progress;
use crate::search::TermsMatchingStrategy; use crate::search::TermsMatchingStrategy;
use crate::update::new::indexer; use crate::update::new::indexer;
use crate::update::Setting; use crate::update::Setting;
@ -1964,7 +1965,7 @@ mod tests {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -2148,7 +2149,7 @@ mod tests {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -2163,7 +2164,7 @@ mod tests {
&document_changes, &document_changes,
embedders, embedders,
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
wtxn.commit().unwrap(); wtxn.commit().unwrap();
@ -2210,7 +2211,7 @@ mod tests {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -2225,7 +2226,7 @@ mod tests {
&document_changes, &document_changes,
embedders, embedders,
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
wtxn.commit().unwrap(); wtxn.commit().unwrap();
@ -2263,7 +2264,7 @@ mod tests {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -2278,7 +2279,7 @@ mod tests {
&document_changes, &document_changes,
embedders, embedders,
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
wtxn.commit().unwrap(); wtxn.commit().unwrap();
@ -2315,7 +2316,7 @@ mod tests {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -2330,7 +2331,7 @@ mod tests {
&document_changes, &document_changes,
embedders, embedders,
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
wtxn.commit().unwrap(); wtxn.commit().unwrap();
@ -2369,7 +2370,7 @@ mod tests {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -2384,7 +2385,7 @@ mod tests {
&document_changes, &document_changes,
embedders, embedders,
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
wtxn.commit().unwrap(); wtxn.commit().unwrap();
@ -2428,7 +2429,7 @@ mod tests {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -2443,7 +2444,7 @@ mod tests {
&document_changes, &document_changes,
embedders, embedders,
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
wtxn.commit().unwrap(); wtxn.commit().unwrap();
@ -2480,7 +2481,7 @@ mod tests {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -2495,7 +2496,7 @@ mod tests {
&document_changes, &document_changes,
embedders, embedders,
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
wtxn.commit().unwrap(); wtxn.commit().unwrap();
@ -2532,7 +2533,7 @@ mod tests {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -2547,7 +2548,7 @@ mod tests {
&document_changes, &document_changes,
embedders, embedders,
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
wtxn.commit().unwrap(); wtxn.commit().unwrap();
@ -2726,7 +2727,7 @@ mod tests {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -2741,7 +2742,7 @@ mod tests {
&document_changes, &document_changes,
embedders, embedders,
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
wtxn.commit().unwrap(); wtxn.commit().unwrap();
@ -2785,7 +2786,7 @@ mod tests {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -2800,7 +2801,7 @@ mod tests {
&document_changes, &document_changes,
embedders, embedders,
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
wtxn.commit().unwrap(); wtxn.commit().unwrap();
@ -2841,7 +2842,7 @@ mod tests {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -2856,7 +2857,7 @@ mod tests {
&document_changes, &document_changes,
embedders, embedders,
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();
wtxn.commit().unwrap(); wtxn.commit().unwrap();

View File

@ -16,10 +16,10 @@ use crate::update::del_add::DelAdd;
use crate::update::new::channel::FieldIdDocidFacetSender; use crate::update::new::channel::FieldIdDocidFacetSender;
use crate::update::new::extract::perm_json_p; use crate::update::new::extract::perm_json_p;
use crate::update::new::indexer::document_changes::{ use crate::update::new::indexer::document_changes::{
extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext, Progress, extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext,
}; };
use crate::update::new::ref_cell_ext::RefCellExt as _; use crate::update::new::ref_cell_ext::RefCellExt as _;
use crate::update::new::steps::Step; use crate::update::new::steps::IndexingStep;
use crate::update::new::thread_local::{FullySend, ThreadLocal}; use crate::update::new::thread_local::{FullySend, ThreadLocal};
use crate::update::new::DocumentChange; use crate::update::new::DocumentChange;
use crate::update::GrenadParameters; use crate::update::GrenadParameters;
@ -373,26 +373,16 @@ fn truncate_str(s: &str) -> &str {
impl FacetedDocidsExtractor { impl FacetedDocidsExtractor {
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract::faceted")] #[tracing::instrument(level = "trace", skip_all, target = "indexing::extract::faceted")]
pub fn run_extraction< pub fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
'pl,
'fid,
'indexer,
'index,
'extractor,
DC: DocumentChanges<'pl>,
MSP,
SP,
>(
grenad_parameters: GrenadParameters, grenad_parameters: GrenadParameters,
document_changes: &DC, document_changes: &DC,
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>, indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>, extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
sender: &FieldIdDocidFacetSender, sender: &FieldIdDocidFacetSender,
step: Step, step: IndexingStep,
) -> Result<Vec<BalancedCaches<'extractor>>> ) -> Result<Vec<BalancedCaches<'extractor>>>
where where
MSP: Fn() -> bool + Sync, MSP: Fn() -> bool + Sync,
SP: Fn(Progress) + Sync,
{ {
let index = indexing_context.index; let index = indexing_context.index;
let rtxn = index.read_txn()?; let rtxn = index.read_txn()?;

View File

@ -15,23 +15,22 @@ pub use geo::*;
pub use searchable::*; pub use searchable::*;
pub use vectors::EmbeddingExtractor; pub use vectors::EmbeddingExtractor;
use super::indexer::document_changes::{DocumentChanges, IndexingContext, Progress}; use super::indexer::document_changes::{DocumentChanges, IndexingContext};
use super::steps::Step; use super::steps::IndexingStep;
use super::thread_local::{FullySend, ThreadLocal}; use super::thread_local::{FullySend, ThreadLocal};
use crate::update::GrenadParameters; use crate::update::GrenadParameters;
use crate::Result; use crate::Result;
pub trait DocidsExtractor { pub trait DocidsExtractor {
fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP, SP>( fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
grenad_parameters: GrenadParameters, grenad_parameters: GrenadParameters,
document_changes: &DC, document_changes: &DC,
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>, indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>, extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
step: Step, step: IndexingStep,
) -> Result<Vec<BalancedCaches<'extractor>>> ) -> Result<Vec<BalancedCaches<'extractor>>>
where where
MSP: Fn() -> bool + Sync, MSP: Fn() -> bool + Sync;
SP: Fn(Progress) + Sync;
} }
/// TODO move in permissive json pointer /// TODO move in permissive json pointer

View File

@ -11,10 +11,10 @@ use super::tokenize_document::{tokenizer_builder, DocumentTokenizer};
use crate::update::new::extract::cache::BalancedCaches; use crate::update::new::extract::cache::BalancedCaches;
use crate::update::new::extract::perm_json_p::contained_in; use crate::update::new::extract::perm_json_p::contained_in;
use crate::update::new::indexer::document_changes::{ use crate::update::new::indexer::document_changes::{
extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext, Progress, extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext,
}; };
use crate::update::new::ref_cell_ext::RefCellExt as _; use crate::update::new::ref_cell_ext::RefCellExt as _;
use crate::update::new::steps::Step; use crate::update::new::steps::IndexingStep;
use crate::update::new::thread_local::{FullySend, MostlySend, ThreadLocal}; use crate::update::new::thread_local::{FullySend, MostlySend, ThreadLocal};
use crate::update::new::DocumentChange; use crate::update::new::DocumentChange;
use crate::update::GrenadParameters; use crate::update::GrenadParameters;
@ -239,25 +239,15 @@ impl<'a, 'extractor> Extractor<'extractor> for WordDocidsExtractorData<'a> {
pub struct WordDocidsExtractors; pub struct WordDocidsExtractors;
impl WordDocidsExtractors { impl WordDocidsExtractors {
pub fn run_extraction< pub fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
'pl,
'fid,
'indexer,
'index,
'extractor,
DC: DocumentChanges<'pl>,
MSP,
SP,
>(
grenad_parameters: GrenadParameters, grenad_parameters: GrenadParameters,
document_changes: &DC, document_changes: &DC,
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>, indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>, extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
step: Step, step: IndexingStep,
) -> Result<WordDocidsCaches<'extractor>> ) -> Result<WordDocidsCaches<'extractor>>
where where
MSP: Fn() -> bool + Sync, MSP: Fn() -> bool + Sync,
SP: Fn(Progress) + Sync,
{ {
let index = indexing_context.index; let index = indexing_context.index;
let rtxn = index.read_txn()?; let rtxn = index.read_txn()?;

View File

@ -14,9 +14,9 @@ use tokenize_document::{tokenizer_builder, DocumentTokenizer};
use super::cache::BalancedCaches; use super::cache::BalancedCaches;
use super::DocidsExtractor; use super::DocidsExtractor;
use crate::update::new::indexer::document_changes::{ use crate::update::new::indexer::document_changes::{
extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext, Progress, extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext,
}; };
use crate::update::new::steps::Step; use crate::update::new::steps::IndexingStep;
use crate::update::new::thread_local::{FullySend, ThreadLocal}; use crate::update::new::thread_local::{FullySend, ThreadLocal};
use crate::update::new::DocumentChange; use crate::update::new::DocumentChange;
use crate::update::GrenadParameters; use crate::update::GrenadParameters;
@ -56,16 +56,15 @@ impl<'a, 'extractor, EX: SearchableExtractor + Sync> Extractor<'extractor>
} }
pub trait SearchableExtractor: Sized + Sync { pub trait SearchableExtractor: Sized + Sync {
fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP, SP>( fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
grenad_parameters: GrenadParameters, grenad_parameters: GrenadParameters,
document_changes: &DC, document_changes: &DC,
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>, indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>, extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
step: Step, step: IndexingStep,
) -> Result<Vec<BalancedCaches<'extractor>>> ) -> Result<Vec<BalancedCaches<'extractor>>>
where where
MSP: Fn() -> bool + Sync, MSP: Fn() -> bool + Sync,
SP: Fn(Progress) + Sync,
{ {
let rtxn = indexing_context.index.read_txn()?; let rtxn = indexing_context.index.read_txn()?;
let stop_words = indexing_context.index.stop_words(&rtxn)?; let stop_words = indexing_context.index.stop_words(&rtxn)?;
@ -134,16 +133,15 @@ pub trait SearchableExtractor: Sized + Sync {
} }
impl<T: SearchableExtractor> DocidsExtractor for T { impl<T: SearchableExtractor> DocidsExtractor for T {
fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP, SP>( fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
grenad_parameters: GrenadParameters, grenad_parameters: GrenadParameters,
document_changes: &DC, document_changes: &DC,
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>, indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>, extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
step: Step, step: IndexingStep,
) -> Result<Vec<BalancedCaches<'extractor>>> ) -> Result<Vec<BalancedCaches<'extractor>>>
where where
MSP: Fn() -> bool + Sync, MSP: Fn() -> bool + Sync,
SP: Fn(Progress) + Sync,
{ {
Self::run_extraction( Self::run_extraction(
grenad_parameters, grenad_parameters,

View File

@ -1,4 +1,5 @@
use std::cell::{Cell, RefCell}; use std::cell::{Cell, RefCell};
use std::sync::atomic::Ordering;
use std::sync::{Arc, RwLock}; use std::sync::{Arc, RwLock};
use bumpalo::Bump; use bumpalo::Bump;
@ -7,8 +8,9 @@ use rayon::iter::IndexedParallelIterator;
use super::super::document_change::DocumentChange; use super::super::document_change::DocumentChange;
use crate::fields_ids_map::metadata::FieldIdMapWithMetadata; use crate::fields_ids_map::metadata::FieldIdMapWithMetadata;
use crate::progress::{AtomicDocumentStep, Progress};
use crate::update::new::parallel_iterator_ext::ParallelIteratorExt as _; use crate::update::new::parallel_iterator_ext::ParallelIteratorExt as _;
use crate::update::new::steps::Step; use crate::update::new::steps::IndexingStep;
use crate::update::new::thread_local::{FullySend, MostlySend, ThreadLocal}; use crate::update::new::thread_local::{FullySend, MostlySend, ThreadLocal};
use crate::{FieldsIdsMap, GlobalFieldsIdsMap, Index, InternalError, Result}; use crate::{FieldsIdsMap, GlobalFieldsIdsMap, Index, InternalError, Result};
@ -133,10 +135,8 @@ pub struct IndexingContext<
'indexer, // covariant lifetime of objects that are borrowed during the entire indexing operation 'indexer, // covariant lifetime of objects that are borrowed during the entire indexing operation
'index, // covariant lifetime of the index 'index, // covariant lifetime of the index
MSP, MSP,
SP,
> where > where
MSP: Fn() -> bool + Sync, MSP: Fn() -> bool + Sync,
SP: Fn(Progress) + Sync,
{ {
pub index: &'index Index, pub index: &'index Index,
pub db_fields_ids_map: &'indexer FieldsIdsMap, pub db_fields_ids_map: &'indexer FieldsIdsMap,
@ -144,7 +144,7 @@ pub struct IndexingContext<
pub doc_allocs: &'indexer ThreadLocal<FullySend<Cell<Bump>>>, pub doc_allocs: &'indexer ThreadLocal<FullySend<Cell<Bump>>>,
pub fields_ids_map_store: &'indexer ThreadLocal<FullySend<RefCell<GlobalFieldsIdsMap<'fid>>>>, pub fields_ids_map_store: &'indexer ThreadLocal<FullySend<RefCell<GlobalFieldsIdsMap<'fid>>>>,
pub must_stop_processing: &'indexer MSP, pub must_stop_processing: &'indexer MSP,
pub send_progress: &'indexer SP, pub progress: &'indexer Progress,
} }
impl< impl<
@ -152,18 +152,15 @@ impl<
'indexer, // covariant lifetime of objects that are borrowed during the entire indexing operation 'indexer, // covariant lifetime of objects that are borrowed during the entire indexing operation
'index, // covariant lifetime of the index 'index, // covariant lifetime of the index
MSP, MSP,
SP,
> Copy > Copy
for IndexingContext< for IndexingContext<
'fid, // invariant lifetime of fields ids map 'fid, // invariant lifetime of fields ids map
'indexer, // covariant lifetime of objects that are borrowed during the entire indexing operation 'indexer, // covariant lifetime of objects that are borrowed during the entire indexing operation
'index, // covariant lifetime of the index 'index, // covariant lifetime of the index
MSP, MSP,
SP,
> >
where where
MSP: Fn() -> bool + Sync, MSP: Fn() -> bool + Sync,
SP: Fn(Progress) + Sync,
{ {
} }
@ -172,18 +169,15 @@ impl<
'indexer, // covariant lifetime of objects that are borrowed during the entire indexing operation 'indexer, // covariant lifetime of objects that are borrowed during the entire indexing operation
'index, // covariant lifetime of the index 'index, // covariant lifetime of the index
MSP, MSP,
SP,
> Clone > Clone
for IndexingContext< for IndexingContext<
'fid, // invariant lifetime of fields ids map 'fid, // invariant lifetime of fields ids map
'indexer, // covariant lifetime of objects that are borrowed during the entire indexing operation 'indexer, // covariant lifetime of objects that are borrowed during the entire indexing operation
'index, // covariant lifetime of the index 'index, // covariant lifetime of the index
MSP, MSP,
SP,
> >
where where
MSP: Fn() -> bool + Sync, MSP: Fn() -> bool + Sync,
SP: Fn(Progress) + Sync,
{ {
fn clone(&self) -> Self { fn clone(&self) -> Self {
*self *self
@ -202,7 +196,6 @@ pub fn extract<
EX, EX,
DC: DocumentChanges<'pl>, DC: DocumentChanges<'pl>,
MSP, MSP,
SP,
>( >(
document_changes: &DC, document_changes: &DC,
extractor: &EX, extractor: &EX,
@ -213,18 +206,18 @@ pub fn extract<
doc_allocs, doc_allocs,
fields_ids_map_store, fields_ids_map_store,
must_stop_processing, must_stop_processing,
send_progress, progress,
}: IndexingContext<'fid, 'indexer, 'index, MSP, SP>, }: IndexingContext<'fid, 'indexer, 'index, MSP>,
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>, extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
datastore: &'data ThreadLocal<EX::Data>, datastore: &'data ThreadLocal<EX::Data>,
step: Step, step: IndexingStep,
) -> Result<()> ) -> Result<()>
where where
EX: Extractor<'extractor>, EX: Extractor<'extractor>,
MSP: Fn() -> bool + Sync, MSP: Fn() -> bool + Sync,
SP: Fn(Progress) + Sync,
{ {
tracing::trace!("We are resetting the extractor allocators"); tracing::trace!("We are resetting the extractor allocators");
progress.update_progress(step);
// Clean up and reuse the extractor allocs // Clean up and reuse the extractor allocs
for extractor_alloc in extractor_allocs.iter_mut() { for extractor_alloc in extractor_allocs.iter_mut() {
tracing::trace!("\tWith {} bytes reset", extractor_alloc.0.allocated_bytes()); tracing::trace!("\tWith {} bytes reset", extractor_alloc.0.allocated_bytes());
@ -232,9 +225,11 @@ where
} }
let total_documents = document_changes.len() as u32; let total_documents = document_changes.len() as u32;
let (step, progress_step) = AtomicDocumentStep::new(total_documents);
progress.update_progress(progress_step);
let pi = document_changes.iter(CHUNK_SIZE); let pi = document_changes.iter(CHUNK_SIZE);
pi.enumerate().try_arc_for_each_try_init( pi.try_arc_for_each_try_init(
|| { || {
DocumentChangeContext::new( DocumentChangeContext::new(
index, index,
@ -247,13 +242,10 @@ where
move |index_alloc| extractor.init_data(index_alloc), move |index_alloc| extractor.init_data(index_alloc),
) )
}, },
|context, (finished_documents, items)| { |context, items| {
if (must_stop_processing)() { if (must_stop_processing)() {
return Err(Arc::new(InternalError::AbortedIndexation.into())); return Err(Arc::new(InternalError::AbortedIndexation.into()));
} }
let finished_documents = (finished_documents * CHUNK_SIZE) as u32;
(send_progress)(Progress::from_step_substep(step, finished_documents, total_documents));
// Clean up and reuse the document-specific allocator // Clean up and reuse the document-specific allocator
context.doc_alloc.reset(); context.doc_alloc.reset();
@ -264,6 +256,7 @@ where
}); });
let res = extractor.process(changes, context).map_err(Arc::new); let res = extractor.process(changes, context).map_err(Arc::new);
step.fetch_add(items.as_ref().len() as u32, Ordering::Relaxed);
// send back the doc_alloc in the pool // send back the doc_alloc in the pool
context.doc_allocs.get_or_default().0.set(std::mem::take(&mut context.doc_alloc)); context.doc_allocs.get_or_default().0.set(std::mem::take(&mut context.doc_alloc));
@ -271,32 +264,7 @@ where
res res
}, },
)?; )?;
step.store(total_documents, Ordering::Relaxed);
(send_progress)(Progress::from_step_substep(step, total_documents, total_documents));
Ok(()) Ok(())
} }
pub struct Progress {
pub finished_steps: u16,
pub total_steps: u16,
pub step_name: &'static str,
pub finished_total_substep: Option<(u32, u32)>,
}
impl Progress {
pub fn from_step(step: Step) -> Self {
Self {
finished_steps: step.finished_steps(),
total_steps: Step::total_steps(),
step_name: step.name(),
finished_total_substep: None,
}
}
pub fn from_step_substep(step: Step, finished_substep: u32, total_substep: u32) -> Self {
Self {
finished_total_substep: Some((finished_substep, total_substep)),
..Progress::from_step(step)
}
}
}

View File

@ -92,11 +92,12 @@ mod test {
use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder}; use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
use crate::index::tests::TempIndex; use crate::index::tests::TempIndex;
use crate::progress::Progress;
use crate::update::new::indexer::document_changes::{ use crate::update::new::indexer::document_changes::{
extract, DocumentChangeContext, Extractor, IndexingContext, extract, DocumentChangeContext, Extractor, IndexingContext,
}; };
use crate::update::new::indexer::DocumentDeletion; use crate::update::new::indexer::DocumentDeletion;
use crate::update::new::steps::Step; use crate::update::new::steps::IndexingStep;
use crate::update::new::thread_local::{MostlySend, ThreadLocal}; use crate::update::new::thread_local::{MostlySend, ThreadLocal};
use crate::update::new::DocumentChange; use crate::update::new::DocumentChange;
use crate::DocumentId; use crate::DocumentId;
@ -164,7 +165,7 @@ mod test {
doc_allocs: &doc_allocs, doc_allocs: &doc_allocs,
fields_ids_map_store: &fields_ids_map_store, fields_ids_map_store: &fields_ids_map_store,
must_stop_processing: &(|| false), must_stop_processing: &(|| false),
send_progress: &(|_progress| {}), progress: &Progress::default(),
}; };
for _ in 0..3 { for _ in 0..3 {
@ -176,7 +177,7 @@ mod test {
context, context,
&mut extractor_allocs, &mut extractor_allocs,
&datastore, &datastore,
Step::ExtractingDocuments, IndexingStep::ExtractingDocuments,
) )
.unwrap(); .unwrap();

View File

@ -1,3 +1,5 @@
use std::sync::atomic::Ordering;
use bumpalo::collections::CollectIn; use bumpalo::collections::CollectIn;
use bumpalo::Bump; use bumpalo::Bump;
use bumparaw_collections::RawMap; use bumparaw_collections::RawMap;
@ -10,11 +12,12 @@ use serde_json::value::RawValue;
use serde_json::Deserializer; use serde_json::Deserializer;
use super::super::document_change::DocumentChange; use super::super::document_change::DocumentChange;
use super::document_changes::{DocumentChangeContext, DocumentChanges, Progress}; use super::document_changes::{DocumentChangeContext, DocumentChanges};
use super::retrieve_or_guess_primary_key; use super::retrieve_or_guess_primary_key;
use crate::documents::PrimaryKey; use crate::documents::PrimaryKey;
use crate::progress::{AtomicPayloadStep, Progress};
use crate::update::new::document::Versions; use crate::update::new::document::Versions;
use crate::update::new::steps::Step; use crate::update::new::steps::IndexingStep;
use crate::update::new::thread_local::MostlySend; use crate::update::new::thread_local::MostlySend;
use crate::update::new::{Deletion, Insertion, Update}; use crate::update::new::{Deletion, Insertion, Update};
use crate::update::{AvailableIds, IndexDocumentsMethod}; use crate::update::{AvailableIds, IndexDocumentsMethod};
@ -45,7 +48,7 @@ impl<'pl> DocumentOperation<'pl> {
#[allow(clippy::too_many_arguments)] #[allow(clippy::too_many_arguments)]
#[tracing::instrument(level = "trace", skip_all, target = "indexing::document_operation")] #[tracing::instrument(level = "trace", skip_all, target = "indexing::document_operation")]
pub fn into_changes<MSP, SP>( pub fn into_changes<MSP>(
self, self,
indexer: &'pl Bump, indexer: &'pl Bump,
index: &Index, index: &Index,
@ -53,12 +56,12 @@ impl<'pl> DocumentOperation<'pl> {
primary_key_from_op: Option<&'pl str>, primary_key_from_op: Option<&'pl str>,
new_fields_ids_map: &mut FieldsIdsMap, new_fields_ids_map: &mut FieldsIdsMap,
must_stop_processing: &MSP, must_stop_processing: &MSP,
send_progress: &SP, progress: Progress,
) -> Result<(DocumentOperationChanges<'pl>, Vec<PayloadStats>, Option<PrimaryKey<'pl>>)> ) -> Result<(DocumentOperationChanges<'pl>, Vec<PayloadStats>, Option<PrimaryKey<'pl>>)>
where where
MSP: Fn() -> bool, MSP: Fn() -> bool,
SP: Fn(Progress),
{ {
progress.update_progress(IndexingStep::PreparingPayloads);
let Self { operations, method } = self; let Self { operations, method } = self;
let documents_ids = index.documents_ids(rtxn)?; let documents_ids = index.documents_ids(rtxn)?;
@ -68,16 +71,14 @@ impl<'pl> DocumentOperation<'pl> {
let mut primary_key = None; let mut primary_key = None;
let payload_count = operations.len(); let payload_count = operations.len();
let (step, progress_step) = AtomicPayloadStep::new(payload_count as u32);
progress.update_progress(progress_step);
for (payload_index, operation) in operations.into_iter().enumerate() { for (payload_index, operation) in operations.into_iter().enumerate() {
if must_stop_processing() { if must_stop_processing() {
return Err(InternalError::AbortedIndexation.into()); return Err(InternalError::AbortedIndexation.into());
} }
send_progress(Progress::from_step_substep( step.store(payload_index as u32, Ordering::Relaxed);
Step::PreparingPayloads,
payload_index as u32,
payload_count as u32,
));
let mut bytes = 0; let mut bytes = 0;
let result = match operation { let result = match operation {
@ -118,12 +119,7 @@ impl<'pl> DocumentOperation<'pl> {
}; };
operations_stats.push(PayloadStats { document_count, bytes, error }); operations_stats.push(PayloadStats { document_count, bytes, error });
} }
step.store(payload_count as u32, Ordering::Relaxed);
send_progress(Progress::from_step_substep(
Step::PreparingPayloads,
payload_count as u32,
payload_count as u32,
));
// TODO We must drain the HashMap into a Vec because rayon::hash_map::IntoIter: !Clone // TODO We must drain the HashMap into a Vec because rayon::hash_map::IntoIter: !Clone
let mut docids_version_offsets: bumpalo::collections::vec::Vec<_> = let mut docids_version_offsets: bumpalo::collections::vec::Vec<_> =

View File

@ -5,7 +5,7 @@ use std::thread::{self, Builder};
use big_s::S; use big_s::S;
use bumparaw_collections::RawMap; use bumparaw_collections::RawMap;
use document_changes::{extract, DocumentChanges, IndexingContext, Progress}; use document_changes::{extract, DocumentChanges, IndexingContext};
pub use document_deletion::DocumentDeletion; pub use document_deletion::DocumentDeletion;
pub use document_operation::{DocumentOperation, PayloadStats}; pub use document_operation::{DocumentOperation, PayloadStats};
use hashbrown::HashMap; use hashbrown::HashMap;
@ -22,7 +22,7 @@ use super::channel::*;
use super::extract::*; use super::extract::*;
use super::facet_search_builder::FacetSearchBuilder; use super::facet_search_builder::FacetSearchBuilder;
use super::merger::FacetFieldIdsDelta; use super::merger::FacetFieldIdsDelta;
use super::steps::Step; use super::steps::IndexingStep;
use super::thread_local::ThreadLocal; use super::thread_local::ThreadLocal;
use super::word_fst_builder::{PrefixData, PrefixDelta, WordFstBuilder}; use super::word_fst_builder::{PrefixData, PrefixDelta, WordFstBuilder};
use super::words_prefix_docids::{ use super::words_prefix_docids::{
@ -33,6 +33,7 @@ use crate::documents::{PrimaryKey, DEFAULT_PRIMARY_KEY};
use crate::facet::FacetType; use crate::facet::FacetType;
use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder}; use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
use crate::index::main_key::{WORDS_FST_KEY, WORDS_PREFIXES_FST_KEY}; use crate::index::main_key::{WORDS_FST_KEY, WORDS_PREFIXES_FST_KEY};
use crate::progress::Progress;
use crate::proximity::ProximityPrecision; use crate::proximity::ProximityPrecision;
use crate::update::del_add::DelAdd; use crate::update::del_add::DelAdd;
use crate::update::new::extract::EmbeddingExtractor; use crate::update::new::extract::EmbeddingExtractor;
@ -60,7 +61,7 @@ mod update_by_function;
/// ///
/// TODO return stats /// TODO return stats
#[allow(clippy::too_many_arguments)] // clippy: 😝 #[allow(clippy::too_many_arguments)] // clippy: 😝
pub fn index<'pl, 'indexer, 'index, DC, MSP, SP>( pub fn index<'pl, 'indexer, 'index, DC, MSP>(
wtxn: &mut RwTxn, wtxn: &mut RwTxn,
index: &'index Index, index: &'index Index,
pool: &ThreadPoolNoAbort, pool: &ThreadPoolNoAbort,
@ -71,12 +72,11 @@ pub fn index<'pl, 'indexer, 'index, DC, MSP, SP>(
document_changes: &DC, document_changes: &DC,
embedders: EmbeddingConfigs, embedders: EmbeddingConfigs,
must_stop_processing: &'indexer MSP, must_stop_processing: &'indexer MSP,
send_progress: &'indexer SP, progress: &'indexer Progress,
) -> Result<()> ) -> Result<()>
where where
DC: DocumentChanges<'pl>, DC: DocumentChanges<'pl>,
MSP: Fn() -> bool + Sync, MSP: Fn() -> bool + Sync,
SP: Fn(Progress) + Sync,
{ {
let mut bbbuffers = Vec::new(); let mut bbbuffers = Vec::new();
let finished_extraction = AtomicBool::new(false); let finished_extraction = AtomicBool::new(false);
@ -125,7 +125,7 @@ where
doc_allocs: &doc_allocs, doc_allocs: &doc_allocs,
fields_ids_map_store: &fields_ids_map_store, fields_ids_map_store: &fields_ids_map_store,
must_stop_processing, must_stop_processing,
send_progress, progress,
}; };
let mut index_embeddings = index.embedding_configs(wtxn)?; let mut index_embeddings = index.embedding_configs(wtxn)?;
@ -159,7 +159,7 @@ where
indexing_context, indexing_context,
&mut extractor_allocs, &mut extractor_allocs,
&datastore, &datastore,
Step::ExtractingDocuments, IndexingStep::ExtractingDocuments,
)?; )?;
} }
{ {
@ -191,7 +191,7 @@ where
indexing_context, indexing_context,
&mut extractor_allocs, &mut extractor_allocs,
&extractor_sender.field_id_docid_facet_sender(), &extractor_sender.field_id_docid_facet_sender(),
Step::ExtractingFacets IndexingStep::ExtractingFacets
)? )?
}; };
@ -224,7 +224,7 @@ where
document_changes, document_changes,
indexing_context, indexing_context,
&mut extractor_allocs, &mut extractor_allocs,
Step::ExtractingWords IndexingStep::ExtractingWords
)? )?
}; };
@ -302,7 +302,7 @@ where
document_changes, document_changes,
indexing_context, indexing_context,
&mut extractor_allocs, &mut extractor_allocs,
Step::ExtractingWordProximity, IndexingStep::ExtractingWordProximity,
)? )?
}; };
@ -338,7 +338,7 @@ where
indexing_context, indexing_context,
&mut extractor_allocs, &mut extractor_allocs,
&datastore, &datastore,
Step::ExtractingEmbeddings, IndexingStep::ExtractingEmbeddings,
)?; )?;
} }
{ {
@ -371,7 +371,7 @@ where
indexing_context, indexing_context,
&mut extractor_allocs, &mut extractor_allocs,
&datastore, &datastore,
Step::WritingGeoPoints IndexingStep::WritingGeoPoints
)?; )?;
} }
@ -383,9 +383,7 @@ where
&indexing_context.must_stop_processing, &indexing_context.must_stop_processing,
)?; )?;
} }
indexing_context.progress.update_progress(IndexingStep::WritingToDatabase);
(indexing_context.send_progress)(Progress::from_step(Step::WritingToDatabase));
finished_extraction.store(true, std::sync::atomic::Ordering::Relaxed); finished_extraction.store(true, std::sync::atomic::Ordering::Relaxed);
Result::Ok((facet_field_ids_delta, index_embeddings)) Result::Ok((facet_field_ids_delta, index_embeddings))
@ -485,7 +483,7 @@ where
)?; )?;
} }
(indexing_context.send_progress)(Progress::from_step(Step::WaitingForExtractors)); indexing_context.progress.update_progress(IndexingStep::WaitingForExtractors);
let (facet_field_ids_delta, index_embeddings) = extractor_handle.join().unwrap()?; let (facet_field_ids_delta, index_embeddings) = extractor_handle.join().unwrap()?;
@ -498,10 +496,7 @@ where
break 'vectors; break 'vectors;
} }
(indexing_context.send_progress)(Progress::from_step( indexing_context.progress.update_progress(IndexingStep::WritingEmbeddingsToDatabase);
Step::WritingEmbeddingsToDatabase,
));
let mut rng = rand::rngs::StdRng::seed_from_u64(42); let mut rng = rand::rngs::StdRng::seed_from_u64(42);
for (_index, (_embedder_name, _embedder, writer, dimensions)) in &mut arroy_writers { for (_index, (_embedder_name, _embedder, writer, dimensions)) in &mut arroy_writers {
let dimensions = *dimensions; let dimensions = *dimensions;
@ -517,21 +512,19 @@ where
index.put_embedding_configs(wtxn, index_embeddings)?; index.put_embedding_configs(wtxn, index_embeddings)?;
} }
(indexing_context.send_progress)(Progress::from_step(Step::PostProcessingFacets)); indexing_context.progress.update_progress(IndexingStep::PostProcessingFacets);
if index.facet_search(wtxn)? { if index.facet_search(wtxn)? {
compute_facet_search_database(index, wtxn, global_fields_ids_map)?; compute_facet_search_database(index, wtxn, global_fields_ids_map)?;
} }
compute_facet_level_database(index, wtxn, facet_field_ids_delta)?; compute_facet_level_database(index, wtxn, facet_field_ids_delta)?;
(indexing_context.send_progress)(Progress::from_step(Step::PostProcessingWords)); indexing_context.progress.update_progress(IndexingStep::PostProcessingWords);
if let Some(prefix_delta) = compute_word_fst(index, wtxn)? { if let Some(prefix_delta) = compute_word_fst(index, wtxn)? {
compute_prefix_database(index, wtxn, prefix_delta, grenad_parameters)?; compute_prefix_database(index, wtxn, prefix_delta, grenad_parameters)?;
} }
(indexing_context.send_progress)(Progress::from_step(Step::Finalizing)); indexing_context.progress.update_progress(IndexingStep::Finalizing);
Ok(()) as Result<_> Ok(()) as Result<_>
})?; })?;

View File

@ -1,8 +1,12 @@
use std::borrow::Cow;
use enum_iterator::Sequence; use enum_iterator::Sequence;
use crate::progress::Step;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Sequence)] #[derive(Debug, Clone, Copy, PartialEq, Eq, Sequence)]
#[repr(u16)] #[repr(u8)]
pub enum Step { pub enum IndexingStep {
PreparingPayloads, PreparingPayloads,
ExtractingDocuments, ExtractingDocuments,
ExtractingFacets, ExtractingFacets,
@ -18,30 +22,31 @@ pub enum Step {
Finalizing, Finalizing,
} }
impl Step { impl Step for IndexingStep {
pub fn name(&self) -> &'static str { fn name(&self) -> Cow<'static, str> {
match self { match self {
Step::PreparingPayloads => "preparing update file", IndexingStep::PreparingPayloads => "preparing update file",
Step::ExtractingDocuments => "extracting documents", IndexingStep::ExtractingDocuments => "extracting documents",
Step::ExtractingFacets => "extracting facets", IndexingStep::ExtractingFacets => "extracting facets",
Step::ExtractingWords => "extracting words", IndexingStep::ExtractingWords => "extracting words",
Step::ExtractingWordProximity => "extracting word proximity", IndexingStep::ExtractingWordProximity => "extracting word proximity",
Step::ExtractingEmbeddings => "extracting embeddings", IndexingStep::ExtractingEmbeddings => "extracting embeddings",
Step::WritingGeoPoints => "writing geo points", IndexingStep::WritingGeoPoints => "writing geo points",
Step::WritingToDatabase => "writing to database", IndexingStep::WritingToDatabase => "writing to database",
Step::WaitingForExtractors => "waiting for extractors", IndexingStep::WaitingForExtractors => "waiting for extractors",
Step::WritingEmbeddingsToDatabase => "writing embeddings to database", IndexingStep::WritingEmbeddingsToDatabase => "writing embeddings to database",
Step::PostProcessingFacets => "post-processing facets", IndexingStep::PostProcessingFacets => "post-processing facets",
Step::PostProcessingWords => "post-processing words", IndexingStep::PostProcessingWords => "post-processing words",
Step::Finalizing => "finalizing", IndexingStep::Finalizing => "finalizing",
} }
.into()
} }
pub fn finished_steps(self) -> u16 { fn current(&self) -> u32 {
self as u16 *self as u32
} }
pub const fn total_steps() -> u16 { fn total(&self) -> u32 {
Self::CARDINALITY as u16 Self::CARDINALITY as u32
} }
} }

View File

@ -3,6 +3,7 @@ use bumpalo::Bump;
use heed::EnvOpenOptions; use heed::EnvOpenOptions;
use maplit::hashset; use maplit::hashset;
use milli::documents::mmap_from_objects; use milli::documents::mmap_from_objects;
use milli::progress::Progress;
use milli::update::new::indexer; use milli::update::new::indexer;
use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings}; use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
use milli::vector::EmbeddingConfigs; use milli::vector::EmbeddingConfigs;
@ -57,7 +58,7 @@ fn test_facet_distribution_with_no_facet_values() {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -72,7 +73,7 @@ fn test_facet_distribution_with_no_facet_values() {
&document_changes, &document_changes,
embedders, embedders,
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();

View File

@ -7,6 +7,7 @@ use bumpalo::Bump;
use either::{Either, Left, Right}; use either::{Either, Left, Right};
use heed::EnvOpenOptions; use heed::EnvOpenOptions;
use maplit::{btreemap, hashset}; use maplit::{btreemap, hashset};
use milli::progress::Progress;
use milli::update::new::indexer; use milli::update::new::indexer;
use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings}; use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
use milli::vector::EmbeddingConfigs; use milli::vector::EmbeddingConfigs;
@ -90,7 +91,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -109,7 +110,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
&document_changes, &document_changes,
embedders, embedders,
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();

View File

@ -5,6 +5,7 @@ use bumpalo::Bump;
use heed::EnvOpenOptions; use heed::EnvOpenOptions;
use itertools::Itertools; use itertools::Itertools;
use maplit::hashset; use maplit::hashset;
use milli::progress::Progress;
use milli::update::new::indexer; use milli::update::new::indexer;
use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings}; use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
use milli::vector::EmbeddingConfigs; use milli::vector::EmbeddingConfigs;
@ -326,7 +327,7 @@ fn criteria_ascdesc() {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -341,7 +342,7 @@ fn criteria_ascdesc() {
&document_changes, &document_changes,
embedders, embedders,
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();

View File

@ -3,6 +3,7 @@ use std::collections::BTreeSet;
use bumpalo::Bump; use bumpalo::Bump;
use heed::EnvOpenOptions; use heed::EnvOpenOptions;
use milli::documents::mmap_from_objects; use milli::documents::mmap_from_objects;
use milli::progress::Progress;
use milli::update::new::indexer; use milli::update::new::indexer;
use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings}; use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
use milli::vector::EmbeddingConfigs; use milli::vector::EmbeddingConfigs;
@ -135,7 +136,7 @@ fn test_typo_disabled_on_word() {
None, None,
&mut new_fields_ids_map, &mut new_fields_ids_map,
&|| false, &|| false,
&|_progress| (), Progress::default(),
) )
.unwrap(); .unwrap();
@ -150,7 +151,7 @@ fn test_typo_disabled_on_word() {
&document_changes, &document_changes,
embedders, embedders,
&|| false, &|| false,
&|_| (), &Progress::default(),
) )
.unwrap(); .unwrap();