mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-12 14:27:28 +01:00
Merge #5147
5147: Batch progress r=dureuill a=irevoire # Pull Request ## Related issue Fixes https://github.com/meilisearch/meilisearch/issues/5068 ## What does this PR do? - ... ## PR checklist Please check if your PR fulfills the following requirements: - [ ] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)? - [ ] Have you read the contributing guidelines? - [ ] Have you made sure that the title is accurate and descriptive of the changes? Thank you so much for contributing to Meilisearch! Co-authored-by: Tamo <tamo@meilisearch.com>
This commit is contained in:
commit
1fc90fbacb
1
Cargo.lock
generated
1
Cargo.lock
generated
@ -2632,6 +2632,7 @@ dependencies = [
|
|||||||
"bincode",
|
"bincode",
|
||||||
"bumpalo",
|
"bumpalo",
|
||||||
"bumparaw-collections",
|
"bumparaw-collections",
|
||||||
|
"convert_case 0.6.0",
|
||||||
"crossbeam-channel",
|
"crossbeam-channel",
|
||||||
"csv",
|
"csv",
|
||||||
"derive_builder 0.20.0",
|
"derive_builder 0.20.0",
|
||||||
|
@ -8,6 +8,7 @@ use bumpalo::Bump;
|
|||||||
use criterion::{criterion_group, criterion_main, Criterion};
|
use criterion::{criterion_group, criterion_main, Criterion};
|
||||||
use milli::documents::PrimaryKey;
|
use milli::documents::PrimaryKey;
|
||||||
use milli::heed::{EnvOpenOptions, RwTxn};
|
use milli::heed::{EnvOpenOptions, RwTxn};
|
||||||
|
use milli::progress::Progress;
|
||||||
use milli::update::new::indexer;
|
use milli::update::new::indexer;
|
||||||
use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
|
use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
|
||||||
use milli::vector::EmbeddingConfigs;
|
use milli::vector::EmbeddingConfigs;
|
||||||
@ -151,7 +152,7 @@ fn indexing_songs_default(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_progress| (),
|
Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -166,7 +167,7 @@ fn indexing_songs_default(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_| (),
|
&Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -218,7 +219,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_progress| (),
|
Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -233,7 +234,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_| (),
|
&Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -263,7 +264,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_progress| (),
|
Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -278,7 +279,7 @@ fn reindexing_songs_default(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_| (),
|
&Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -332,7 +333,7 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_progress| (),
|
Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -347,7 +348,7 @@ fn deleting_songs_in_batches_default(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_| (),
|
&Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -409,7 +410,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_progress| (),
|
Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -424,7 +425,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_| (),
|
&Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -454,7 +455,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_progress| (),
|
Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -469,7 +470,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_| (),
|
&Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -495,7 +496,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_progress| (),
|
Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -510,7 +511,7 @@ fn indexing_songs_in_three_batches_default(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_| (),
|
&Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -563,7 +564,7 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_progress| (),
|
Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -578,7 +579,7 @@ fn indexing_songs_without_faceted_numbers(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_| (),
|
&Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -630,7 +631,7 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_progress| (),
|
Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -645,7 +646,7 @@ fn indexing_songs_without_faceted_fields(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_| (),
|
&Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -697,7 +698,7 @@ fn indexing_wiki(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_progress| (),
|
Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -712,7 +713,7 @@ fn indexing_wiki(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_| (),
|
&Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -763,7 +764,7 @@ fn reindexing_wiki(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_progress| (),
|
Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -778,7 +779,7 @@ fn reindexing_wiki(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_| (),
|
&Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -808,7 +809,7 @@ fn reindexing_wiki(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_progress| (),
|
Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -823,7 +824,7 @@ fn reindexing_wiki(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_| (),
|
&Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -876,7 +877,7 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_progress| (),
|
Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -891,7 +892,7 @@ fn deleting_wiki_in_batches_default(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_| (),
|
&Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -953,7 +954,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_progress| (),
|
Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -968,7 +969,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_| (),
|
&Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -999,7 +1000,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_progress| (),
|
Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1014,7 +1015,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_| (),
|
&Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1041,7 +1042,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_progress| (),
|
Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1056,7 +1057,7 @@ fn indexing_wiki_in_three_batches(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_| (),
|
&Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1108,7 +1109,7 @@ fn indexing_movies_default(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_progress| (),
|
Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1123,7 +1124,7 @@ fn indexing_movies_default(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_| (),
|
&Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1174,7 +1175,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_progress| (),
|
Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1189,7 +1190,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_| (),
|
&Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1219,7 +1220,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_progress| (),
|
Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1234,7 +1235,7 @@ fn reindexing_movies_default(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_| (),
|
&Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1287,7 +1288,7 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_progress| (),
|
Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1302,7 +1303,7 @@ fn deleting_movies_in_batches_default(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_| (),
|
&Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1350,7 +1351,7 @@ fn delete_documents_from_ids(index: Index, document_ids_to_delete: Vec<RoaringBi
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_| (),
|
&Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1400,7 +1401,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_progress| (),
|
Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1415,7 +1416,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_| (),
|
&Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1445,7 +1446,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_progress| (),
|
Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1460,7 +1461,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_| (),
|
&Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1486,7 +1487,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_progress| (),
|
Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1501,7 +1502,7 @@ fn indexing_movies_in_three_batches(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_| (),
|
&Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1576,7 +1577,7 @@ fn indexing_nested_movies_default(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_progress| (),
|
Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1591,7 +1592,7 @@ fn indexing_nested_movies_default(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_| (),
|
&Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1667,7 +1668,7 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_progress| (),
|
Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1682,7 +1683,7 @@ fn deleting_nested_movies_in_batches_default(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_| (),
|
&Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1750,7 +1751,7 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_progress| (),
|
Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1765,7 +1766,7 @@ fn indexing_nested_movies_without_faceted_fields(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_| (),
|
&Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1817,7 +1818,7 @@ fn indexing_geo(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_progress| (),
|
Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1832,7 +1833,7 @@ fn indexing_geo(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_| (),
|
&Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1883,7 +1884,7 @@ fn reindexing_geo(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_progress| (),
|
Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1898,7 +1899,7 @@ fn reindexing_geo(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_| (),
|
&Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1928,7 +1929,7 @@ fn reindexing_geo(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_progress| (),
|
Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1943,7 +1944,7 @@ fn reindexing_geo(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_| (),
|
&Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -1996,7 +1997,7 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_progress| (),
|
Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -2011,7 +2012,7 @@ fn deleting_geo_in_batches_default(c: &mut Criterion) {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_| (),
|
&Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
@ -10,6 +10,7 @@ use bumpalo::Bump;
|
|||||||
use criterion::BenchmarkId;
|
use criterion::BenchmarkId;
|
||||||
use memmap2::Mmap;
|
use memmap2::Mmap;
|
||||||
use milli::heed::EnvOpenOptions;
|
use milli::heed::EnvOpenOptions;
|
||||||
|
use milli::progress::Progress;
|
||||||
use milli::update::new::indexer;
|
use milli::update::new::indexer;
|
||||||
use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
|
use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
|
||||||
use milli::vector::EmbeddingConfigs;
|
use milli::vector::EmbeddingConfigs;
|
||||||
@ -110,7 +111,7 @@ pub fn base_setup(conf: &Conf) -> Index {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_progress| (),
|
Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -125,7 +126,7 @@ pub fn base_setup(conf: &Conf) -> Index {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
EmbeddingConfigs::default(),
|
EmbeddingConfigs::default(),
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_| (),
|
&Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
@ -10,6 +10,7 @@ use either::Either;
|
|||||||
use fuzzers::Operation;
|
use fuzzers::Operation;
|
||||||
use milli::documents::mmap_from_objects;
|
use milli::documents::mmap_from_objects;
|
||||||
use milli::heed::EnvOpenOptions;
|
use milli::heed::EnvOpenOptions;
|
||||||
|
use milli::progress::Progress;
|
||||||
use milli::update::new::indexer;
|
use milli::update::new::indexer;
|
||||||
use milli::update::{IndexDocumentsMethod, IndexerConfig};
|
use milli::update::{IndexDocumentsMethod, IndexerConfig};
|
||||||
use milli::vector::EmbeddingConfigs;
|
use milli::vector::EmbeddingConfigs;
|
||||||
@ -128,7 +129,7 @@ fn main() {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_progress| (),
|
Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -143,7 +144,7 @@ fn main() {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_| (),
|
&Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
@ -15,6 +15,7 @@ anyhow = "1.0.86"
|
|||||||
bincode = "1.3.3"
|
bincode = "1.3.3"
|
||||||
bumpalo = "3.16.0"
|
bumpalo = "3.16.0"
|
||||||
bumparaw-collections = "0.1.2"
|
bumparaw-collections = "0.1.2"
|
||||||
|
convert_case = "0.6.0"
|
||||||
csv = "1.3.0"
|
csv = "1.3.0"
|
||||||
derive_builder = "0.20.0"
|
derive_builder = "0.20.0"
|
||||||
dump = { path = "../dump" }
|
dump = { path = "../dump" }
|
||||||
|
@ -22,8 +22,7 @@ use std::ffi::OsStr;
|
|||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::fs::{self, File};
|
use std::fs::{self, File};
|
||||||
use std::io::BufWriter;
|
use std::io::BufWriter;
|
||||||
use std::sync::atomic::{self, AtomicU64};
|
use std::sync::atomic::Ordering;
|
||||||
use std::time::Duration;
|
|
||||||
|
|
||||||
use bumpalo::collections::CollectIn;
|
use bumpalo::collections::CollectIn;
|
||||||
use bumpalo::Bump;
|
use bumpalo::Bump;
|
||||||
@ -32,6 +31,7 @@ use meilisearch_types::batches::BatchId;
|
|||||||
use meilisearch_types::heed::{RoTxn, RwTxn};
|
use meilisearch_types::heed::{RoTxn, RwTxn};
|
||||||
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader, PrimaryKey};
|
use meilisearch_types::milli::documents::{obkv_to_object, DocumentsBatchReader, PrimaryKey};
|
||||||
use meilisearch_types::milli::heed::CompactionOption;
|
use meilisearch_types::milli::heed::CompactionOption;
|
||||||
|
use meilisearch_types::milli::progress::Progress;
|
||||||
use meilisearch_types::milli::update::new::indexer::{self, UpdateByFunction};
|
use meilisearch_types::milli::update::new::indexer::{self, UpdateByFunction};
|
||||||
use meilisearch_types::milli::update::{
|
use meilisearch_types::milli::update::{
|
||||||
DocumentAdditionResult, IndexDocumentsMethod, Settings as MilliSettings,
|
DocumentAdditionResult, IndexDocumentsMethod, Settings as MilliSettings,
|
||||||
@ -41,9 +41,7 @@ use meilisearch_types::milli::vector::parsed_vectors::{
|
|||||||
};
|
};
|
||||||
use meilisearch_types::milli::{self, Filter, ThreadPoolNoAbortBuilder};
|
use meilisearch_types::milli::{self, Filter, ThreadPoolNoAbortBuilder};
|
||||||
use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked};
|
use meilisearch_types::settings::{apply_settings_to_builder, Settings, Unchecked};
|
||||||
use meilisearch_types::tasks::{
|
use meilisearch_types::tasks::{Details, IndexSwap, Kind, KindWithContent, Status, Task};
|
||||||
Details, IndexSwap, Kind, KindWithContent, Status, Task, TaskProgress,
|
|
||||||
};
|
|
||||||
use meilisearch_types::{compression, Index, VERSION_FILE_NAME};
|
use meilisearch_types::{compression, Index, VERSION_FILE_NAME};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use time::macros::format_description;
|
use time::macros::format_description;
|
||||||
@ -51,6 +49,13 @@ use time::OffsetDateTime;
|
|||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
|
||||||
use crate::autobatcher::{self, BatchKind};
|
use crate::autobatcher::{self, BatchKind};
|
||||||
|
use crate::processing::{
|
||||||
|
AtomicBatchStep, AtomicDocumentStep, AtomicTaskStep, AtomicUpdateFileStep, CreateIndexProgress,
|
||||||
|
DeleteIndexProgress, DocumentDeletionProgress, DocumentEditionProgress,
|
||||||
|
DocumentOperationProgress, DumpCreationProgress, InnerSwappingTwoIndexes, SettingsProgress,
|
||||||
|
SnapshotCreationProgress, SwappingTheIndexes, TaskCancelationProgress, TaskDeletionProgress,
|
||||||
|
UpdateIndexProgress, VariableNameStep,
|
||||||
|
};
|
||||||
use crate::utils::{self, swap_index_uid_in_task, ProcessingBatch};
|
use crate::utils::{self, swap_index_uid_in_task, ProcessingBatch};
|
||||||
use crate::{Error, IndexScheduler, Result, TaskId};
|
use crate::{Error, IndexScheduler, Result, TaskId};
|
||||||
|
|
||||||
@ -561,11 +566,12 @@ impl IndexScheduler {
|
|||||||
/// The list of tasks that were processed. The metadata of each task in the returned
|
/// The list of tasks that were processed. The metadata of each task in the returned
|
||||||
/// list is updated accordingly, with the exception of the its date fields
|
/// list is updated accordingly, with the exception of the its date fields
|
||||||
/// [`finished_at`](meilisearch_types::tasks::Task::finished_at) and [`started_at`](meilisearch_types::tasks::Task::started_at).
|
/// [`finished_at`](meilisearch_types::tasks::Task::finished_at) and [`started_at`](meilisearch_types::tasks::Task::started_at).
|
||||||
#[tracing::instrument(level = "trace", skip(self, batch), target = "indexing::scheduler", fields(batch=batch.to_string()))]
|
#[tracing::instrument(level = "trace", skip(self, batch, progress), target = "indexing::scheduler", fields(batch=batch.to_string()))]
|
||||||
pub(crate) fn process_batch(
|
pub(crate) fn process_batch(
|
||||||
&self,
|
&self,
|
||||||
batch: Batch,
|
batch: Batch,
|
||||||
current_batch: &mut ProcessingBatch,
|
current_batch: &mut ProcessingBatch,
|
||||||
|
progress: Progress,
|
||||||
) -> Result<Vec<Task>> {
|
) -> Result<Vec<Task>> {
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
{
|
{
|
||||||
@ -585,8 +591,13 @@ impl IndexScheduler {
|
|||||||
};
|
};
|
||||||
|
|
||||||
let rtxn = self.env.read_txn()?;
|
let rtxn = self.env.read_txn()?;
|
||||||
let mut canceled_tasks =
|
let mut canceled_tasks = self.cancel_matched_tasks(
|
||||||
self.cancel_matched_tasks(&rtxn, task.uid, current_batch, matched_tasks)?;
|
&rtxn,
|
||||||
|
task.uid,
|
||||||
|
current_batch,
|
||||||
|
matched_tasks,
|
||||||
|
&progress,
|
||||||
|
)?;
|
||||||
|
|
||||||
task.status = Status::Succeeded;
|
task.status = Status::Succeeded;
|
||||||
match &mut task.details {
|
match &mut task.details {
|
||||||
@ -617,7 +628,8 @@ impl IndexScheduler {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let mut wtxn = self.env.write_txn()?;
|
let mut wtxn = self.env.write_txn()?;
|
||||||
let mut deleted_tasks = self.delete_matched_tasks(&mut wtxn, &matched_tasks)?;
|
let mut deleted_tasks =
|
||||||
|
self.delete_matched_tasks(&mut wtxn, &matched_tasks, &progress)?;
|
||||||
wtxn.commit()?;
|
wtxn.commit()?;
|
||||||
|
|
||||||
for task in tasks.iter_mut() {
|
for task in tasks.iter_mut() {
|
||||||
@ -643,6 +655,8 @@ impl IndexScheduler {
|
|||||||
Ok(tasks)
|
Ok(tasks)
|
||||||
}
|
}
|
||||||
Batch::SnapshotCreation(mut tasks) => {
|
Batch::SnapshotCreation(mut tasks) => {
|
||||||
|
progress.update_progress(SnapshotCreationProgress::StartTheSnapshotCreation);
|
||||||
|
|
||||||
fs::create_dir_all(&self.snapshots_path)?;
|
fs::create_dir_all(&self.snapshots_path)?;
|
||||||
let temp_snapshot_dir = tempfile::tempdir()?;
|
let temp_snapshot_dir = tempfile::tempdir()?;
|
||||||
|
|
||||||
@ -663,6 +677,7 @@ impl IndexScheduler {
|
|||||||
// two read operations as the task processing is synchronous.
|
// two read operations as the task processing is synchronous.
|
||||||
|
|
||||||
// 2.1 First copy the LMDB env of the index-scheduler
|
// 2.1 First copy the LMDB env of the index-scheduler
|
||||||
|
progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexScheduler);
|
||||||
let dst = temp_snapshot_dir.path().join("tasks");
|
let dst = temp_snapshot_dir.path().join("tasks");
|
||||||
fs::create_dir_all(&dst)?;
|
fs::create_dir_all(&dst)?;
|
||||||
self.env.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?;
|
self.env.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?;
|
||||||
@ -675,18 +690,29 @@ impl IndexScheduler {
|
|||||||
fs::create_dir_all(&update_files_dir)?;
|
fs::create_dir_all(&update_files_dir)?;
|
||||||
|
|
||||||
// 2.4 Only copy the update files of the enqueued tasks
|
// 2.4 Only copy the update files of the enqueued tasks
|
||||||
for task_id in self.get_status(&rtxn, Status::Enqueued)? {
|
progress.update_progress(SnapshotCreationProgress::SnapshotTheUpdateFiles);
|
||||||
|
let enqueued = self.get_status(&rtxn, Status::Enqueued)?;
|
||||||
|
let (atomic, update_file_progress) =
|
||||||
|
AtomicUpdateFileStep::new(enqueued.len() as u32);
|
||||||
|
progress.update_progress(update_file_progress);
|
||||||
|
for task_id in enqueued {
|
||||||
let task = self.get_task(&rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
let task = self.get_task(&rtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||||
if let Some(content_uuid) = task.content_uuid() {
|
if let Some(content_uuid) = task.content_uuid() {
|
||||||
let src = self.file_store.get_update_path(content_uuid);
|
let src = self.file_store.get_update_path(content_uuid);
|
||||||
let dst = update_files_dir.join(content_uuid.to_string());
|
let dst = update_files_dir.join(content_uuid.to_string());
|
||||||
fs::copy(src, dst)?;
|
fs::copy(src, dst)?;
|
||||||
}
|
}
|
||||||
|
atomic.fetch_add(1, Ordering::Relaxed);
|
||||||
}
|
}
|
||||||
|
|
||||||
// 3. Snapshot every indexes
|
// 3. Snapshot every indexes
|
||||||
for result in self.index_mapper.index_mapping.iter(&rtxn)? {
|
progress.update_progress(SnapshotCreationProgress::SnapshotTheIndexes);
|
||||||
|
let index_mapping = self.index_mapper.index_mapping;
|
||||||
|
let nb_indexes = index_mapping.len(&rtxn)? as u32;
|
||||||
|
|
||||||
|
for (i, result) in index_mapping.iter(&rtxn)?.enumerate() {
|
||||||
let (name, uuid) = result?;
|
let (name, uuid) = result?;
|
||||||
|
progress.update_progress(VariableNameStep::new(name, i as u32, nb_indexes));
|
||||||
let index = self.index_mapper.index(&rtxn, name)?;
|
let index = self.index_mapper.index(&rtxn, name)?;
|
||||||
let dst = temp_snapshot_dir.path().join("indexes").join(uuid.to_string());
|
let dst = temp_snapshot_dir.path().join("indexes").join(uuid.to_string());
|
||||||
fs::create_dir_all(&dst)?;
|
fs::create_dir_all(&dst)?;
|
||||||
@ -698,6 +724,7 @@ impl IndexScheduler {
|
|||||||
drop(rtxn);
|
drop(rtxn);
|
||||||
|
|
||||||
// 4. Snapshot the auth LMDB env
|
// 4. Snapshot the auth LMDB env
|
||||||
|
progress.update_progress(SnapshotCreationProgress::SnapshotTheApiKeys);
|
||||||
let dst = temp_snapshot_dir.path().join("auth");
|
let dst = temp_snapshot_dir.path().join("auth");
|
||||||
fs::create_dir_all(&dst)?;
|
fs::create_dir_all(&dst)?;
|
||||||
// TODO We can't use the open_auth_store_env function here but we should
|
// TODO We can't use the open_auth_store_env function here but we should
|
||||||
@ -710,6 +737,7 @@ impl IndexScheduler {
|
|||||||
auth.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?;
|
auth.copy_to_file(dst.join("data.mdb"), CompactionOption::Enabled)?;
|
||||||
|
|
||||||
// 5. Copy and tarball the flat snapshot
|
// 5. Copy and tarball the flat snapshot
|
||||||
|
progress.update_progress(SnapshotCreationProgress::CreateTheTarball);
|
||||||
// 5.1 Find the original name of the database
|
// 5.1 Find the original name of the database
|
||||||
// TODO find a better way to get this path
|
// TODO find a better way to get this path
|
||||||
let mut base_path = self.env.path().to_owned();
|
let mut base_path = self.env.path().to_owned();
|
||||||
@ -742,6 +770,7 @@ impl IndexScheduler {
|
|||||||
Ok(tasks)
|
Ok(tasks)
|
||||||
}
|
}
|
||||||
Batch::Dump(mut task) => {
|
Batch::Dump(mut task) => {
|
||||||
|
progress.update_progress(DumpCreationProgress::StartTheDumpCreation);
|
||||||
let started_at = OffsetDateTime::now_utc();
|
let started_at = OffsetDateTime::now_utc();
|
||||||
let (keys, instance_uid) =
|
let (keys, instance_uid) =
|
||||||
if let KindWithContent::DumpCreation { keys, instance_uid } = &task.kind {
|
if let KindWithContent::DumpCreation { keys, instance_uid } = &task.kind {
|
||||||
@ -752,6 +781,7 @@ impl IndexScheduler {
|
|||||||
let dump = dump::DumpWriter::new(*instance_uid)?;
|
let dump = dump::DumpWriter::new(*instance_uid)?;
|
||||||
|
|
||||||
// 1. dump the keys
|
// 1. dump the keys
|
||||||
|
progress.update_progress(DumpCreationProgress::DumpTheApiKeys);
|
||||||
let mut dump_keys = dump.create_keys()?;
|
let mut dump_keys = dump.create_keys()?;
|
||||||
for key in keys {
|
for key in keys {
|
||||||
dump_keys.push_key(key)?;
|
dump_keys.push_key(key)?;
|
||||||
@ -761,7 +791,13 @@ impl IndexScheduler {
|
|||||||
let rtxn = self.env.read_txn()?;
|
let rtxn = self.env.read_txn()?;
|
||||||
|
|
||||||
// 2. dump the tasks
|
// 2. dump the tasks
|
||||||
|
progress.update_progress(DumpCreationProgress::DumpTheTasks);
|
||||||
let mut dump_tasks = dump.create_tasks_queue()?;
|
let mut dump_tasks = dump.create_tasks_queue()?;
|
||||||
|
|
||||||
|
let (atomic, update_task_progress) =
|
||||||
|
AtomicTaskStep::new(self.all_tasks.len(&rtxn)? as u32);
|
||||||
|
progress.update_progress(update_task_progress);
|
||||||
|
|
||||||
for ret in self.all_tasks.iter(&rtxn)? {
|
for ret in self.all_tasks.iter(&rtxn)? {
|
||||||
if self.must_stop_processing.get() {
|
if self.must_stop_processing.get() {
|
||||||
return Err(Error::AbortedTask);
|
return Err(Error::AbortedTask);
|
||||||
@ -811,11 +847,22 @@ impl IndexScheduler {
|
|||||||
dump_content_file.flush()?;
|
dump_content_file.flush()?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
atomic.fetch_add(1, Ordering::Relaxed);
|
||||||
}
|
}
|
||||||
dump_tasks.flush()?;
|
dump_tasks.flush()?;
|
||||||
|
|
||||||
// 3. Dump the indexes
|
// 3. Dump the indexes
|
||||||
|
progress.update_progress(DumpCreationProgress::DumpTheIndexes);
|
||||||
|
let nb_indexes = self.index_mapper.index_mapping.len(&rtxn)? as u32;
|
||||||
|
let mut count = 0;
|
||||||
self.index_mapper.try_for_each_index(&rtxn, |uid, index| -> Result<()> {
|
self.index_mapper.try_for_each_index(&rtxn, |uid, index| -> Result<()> {
|
||||||
|
progress.update_progress(VariableNameStep::new(
|
||||||
|
uid.to_string(),
|
||||||
|
count,
|
||||||
|
nb_indexes,
|
||||||
|
));
|
||||||
|
count += 1;
|
||||||
|
|
||||||
let rtxn = index.read_txn()?;
|
let rtxn = index.read_txn()?;
|
||||||
let metadata = IndexMetadata {
|
let metadata = IndexMetadata {
|
||||||
uid: uid.to_owned(),
|
uid: uid.to_owned(),
|
||||||
@ -835,6 +882,12 @@ impl IndexScheduler {
|
|||||||
.embedding_configs(&rtxn)
|
.embedding_configs(&rtxn)
|
||||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||||
|
|
||||||
|
let nb_documents = index
|
||||||
|
.number_of_documents(&rtxn)
|
||||||
|
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?
|
||||||
|
as u32;
|
||||||
|
let (atomic, update_document_progress) = AtomicDocumentStep::new(nb_documents);
|
||||||
|
progress.update_progress(update_document_progress);
|
||||||
let documents = index
|
let documents = index
|
||||||
.all_documents(&rtxn)
|
.all_documents(&rtxn)
|
||||||
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
|
||||||
@ -904,6 +957,7 @@ impl IndexScheduler {
|
|||||||
}
|
}
|
||||||
|
|
||||||
index_dumper.push_document(&document)?;
|
index_dumper.push_document(&document)?;
|
||||||
|
atomic.fetch_add(1, Ordering::Relaxed);
|
||||||
}
|
}
|
||||||
|
|
||||||
// 3.2. Dump the settings
|
// 3.2. Dump the settings
|
||||||
@ -918,6 +972,7 @@ impl IndexScheduler {
|
|||||||
})?;
|
})?;
|
||||||
|
|
||||||
// 4. Dump experimental feature settings
|
// 4. Dump experimental feature settings
|
||||||
|
progress.update_progress(DumpCreationProgress::DumpTheExperimentalFeatures);
|
||||||
let features = self.features().runtime_features();
|
let features = self.features().runtime_features();
|
||||||
dump.create_experimental_features(features)?;
|
dump.create_experimental_features(features)?;
|
||||||
|
|
||||||
@ -928,6 +983,7 @@ impl IndexScheduler {
|
|||||||
if self.must_stop_processing.get() {
|
if self.must_stop_processing.get() {
|
||||||
return Err(Error::AbortedTask);
|
return Err(Error::AbortedTask);
|
||||||
}
|
}
|
||||||
|
progress.update_progress(DumpCreationProgress::CompressTheDump);
|
||||||
let path = self.dumps_path.join(format!("{}.dump", dump_uid));
|
let path = self.dumps_path.join(format!("{}.dump", dump_uid));
|
||||||
let file = File::create(path)?;
|
let file = File::create(path)?;
|
||||||
dump.persist_to(BufWriter::new(file))?;
|
dump.persist_to(BufWriter::new(file))?;
|
||||||
@ -953,7 +1009,7 @@ impl IndexScheduler {
|
|||||||
.set_currently_updating_index(Some((index_uid.clone(), index.clone())));
|
.set_currently_updating_index(Some((index_uid.clone(), index.clone())));
|
||||||
|
|
||||||
let mut index_wtxn = index.write_txn()?;
|
let mut index_wtxn = index.write_txn()?;
|
||||||
let tasks = self.apply_index_operation(&mut index_wtxn, &index, op)?;
|
let tasks = self.apply_index_operation(&mut index_wtxn, &index, op, progress)?;
|
||||||
|
|
||||||
{
|
{
|
||||||
let span = tracing::trace_span!(target: "indexing::scheduler", "commit");
|
let span = tracing::trace_span!(target: "indexing::scheduler", "commit");
|
||||||
@ -987,6 +1043,8 @@ impl IndexScheduler {
|
|||||||
Ok(tasks)
|
Ok(tasks)
|
||||||
}
|
}
|
||||||
Batch::IndexCreation { index_uid, primary_key, task } => {
|
Batch::IndexCreation { index_uid, primary_key, task } => {
|
||||||
|
progress.update_progress(CreateIndexProgress::CreatingTheIndex);
|
||||||
|
|
||||||
let wtxn = self.env.write_txn()?;
|
let wtxn = self.env.write_txn()?;
|
||||||
if self.index_mapper.exists(&wtxn, &index_uid)? {
|
if self.index_mapper.exists(&wtxn, &index_uid)? {
|
||||||
return Err(Error::IndexAlreadyExists(index_uid));
|
return Err(Error::IndexAlreadyExists(index_uid));
|
||||||
@ -996,9 +1054,11 @@ impl IndexScheduler {
|
|||||||
self.process_batch(
|
self.process_batch(
|
||||||
Batch::IndexUpdate { index_uid, primary_key, task },
|
Batch::IndexUpdate { index_uid, primary_key, task },
|
||||||
current_batch,
|
current_batch,
|
||||||
|
progress,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
Batch::IndexUpdate { index_uid, primary_key, mut task } => {
|
Batch::IndexUpdate { index_uid, primary_key, mut task } => {
|
||||||
|
progress.update_progress(UpdateIndexProgress::UpdatingTheIndex);
|
||||||
let rtxn = self.env.read_txn()?;
|
let rtxn = self.env.read_txn()?;
|
||||||
let index = self.index_mapper.index(&rtxn, &index_uid)?;
|
let index = self.index_mapper.index(&rtxn, &index_uid)?;
|
||||||
|
|
||||||
@ -1051,6 +1111,7 @@ impl IndexScheduler {
|
|||||||
Ok(vec![task])
|
Ok(vec![task])
|
||||||
}
|
}
|
||||||
Batch::IndexDeletion { index_uid, index_has_been_created, mut tasks } => {
|
Batch::IndexDeletion { index_uid, index_has_been_created, mut tasks } => {
|
||||||
|
progress.update_progress(DeleteIndexProgress::DeletingTheIndex);
|
||||||
let wtxn = self.env.write_txn()?;
|
let wtxn = self.env.write_txn()?;
|
||||||
|
|
||||||
// it's possible that the index doesn't exist
|
// it's possible that the index doesn't exist
|
||||||
@ -1084,6 +1145,8 @@ impl IndexScheduler {
|
|||||||
Ok(tasks)
|
Ok(tasks)
|
||||||
}
|
}
|
||||||
Batch::IndexSwap { mut task } => {
|
Batch::IndexSwap { mut task } => {
|
||||||
|
progress.update_progress(SwappingTheIndexes::EnsuringCorrectnessOfTheSwap);
|
||||||
|
|
||||||
let mut wtxn = self.env.write_txn()?;
|
let mut wtxn = self.env.write_txn()?;
|
||||||
let swaps = if let KindWithContent::IndexSwap { swaps } = &task.kind {
|
let swaps = if let KindWithContent::IndexSwap { swaps } = &task.kind {
|
||||||
swaps
|
swaps
|
||||||
@ -1110,8 +1173,20 @@ impl IndexScheduler {
|
|||||||
));
|
));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for swap in swaps {
|
progress.update_progress(SwappingTheIndexes::SwappingTheIndexes);
|
||||||
self.apply_index_swap(&mut wtxn, task.uid, &swap.indexes.0, &swap.indexes.1)?;
|
for (step, swap) in swaps.iter().enumerate() {
|
||||||
|
progress.update_progress(VariableNameStep::new(
|
||||||
|
format!("swapping index {} and {}", swap.indexes.0, swap.indexes.1),
|
||||||
|
step as u32,
|
||||||
|
swaps.len() as u32,
|
||||||
|
));
|
||||||
|
self.apply_index_swap(
|
||||||
|
&mut wtxn,
|
||||||
|
&progress,
|
||||||
|
task.uid,
|
||||||
|
&swap.indexes.0,
|
||||||
|
&swap.indexes.1,
|
||||||
|
)?;
|
||||||
}
|
}
|
||||||
wtxn.commit()?;
|
wtxn.commit()?;
|
||||||
task.status = Status::Succeeded;
|
task.status = Status::Succeeded;
|
||||||
@ -1121,7 +1196,15 @@ impl IndexScheduler {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Swap the index `lhs` with the index `rhs`.
|
/// Swap the index `lhs` with the index `rhs`.
|
||||||
fn apply_index_swap(&self, wtxn: &mut RwTxn, task_id: u32, lhs: &str, rhs: &str) -> Result<()> {
|
fn apply_index_swap(
|
||||||
|
&self,
|
||||||
|
wtxn: &mut RwTxn,
|
||||||
|
progress: &Progress,
|
||||||
|
task_id: u32,
|
||||||
|
lhs: &str,
|
||||||
|
rhs: &str,
|
||||||
|
) -> Result<()> {
|
||||||
|
progress.update_progress(InnerSwappingTwoIndexes::RetrieveTheTasks);
|
||||||
// 1. Verify that both lhs and rhs are existing indexes
|
// 1. Verify that both lhs and rhs are existing indexes
|
||||||
let index_lhs_exists = self.index_mapper.index_exists(wtxn, lhs)?;
|
let index_lhs_exists = self.index_mapper.index_exists(wtxn, lhs)?;
|
||||||
if !index_lhs_exists {
|
if !index_lhs_exists {
|
||||||
@ -1139,14 +1222,21 @@ impl IndexScheduler {
|
|||||||
index_rhs_task_ids.remove_range(task_id..);
|
index_rhs_task_ids.remove_range(task_id..);
|
||||||
|
|
||||||
// 3. before_name -> new_name in the task's KindWithContent
|
// 3. before_name -> new_name in the task's KindWithContent
|
||||||
for task_id in &index_lhs_task_ids | &index_rhs_task_ids {
|
progress.update_progress(InnerSwappingTwoIndexes::UpdateTheTasks);
|
||||||
|
let tasks_to_update = &index_lhs_task_ids | &index_rhs_task_ids;
|
||||||
|
let (atomic, task_progress) = AtomicTaskStep::new(tasks_to_update.len() as u32);
|
||||||
|
progress.update_progress(task_progress);
|
||||||
|
|
||||||
|
for task_id in tasks_to_update {
|
||||||
let mut task = self.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
let mut task = self.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||||
swap_index_uid_in_task(&mut task, (lhs, rhs));
|
swap_index_uid_in_task(&mut task, (lhs, rhs));
|
||||||
self.all_tasks.put(wtxn, &task_id, &task)?;
|
self.all_tasks.put(wtxn, &task_id, &task)?;
|
||||||
|
atomic.fetch_add(1, Ordering::Relaxed);
|
||||||
}
|
}
|
||||||
|
|
||||||
// 4. remove the task from indexuid = before_name
|
// 4. remove the task from indexuid = before_name
|
||||||
// 5. add the task to indexuid = after_name
|
// 5. add the task to indexuid = after_name
|
||||||
|
progress.update_progress(InnerSwappingTwoIndexes::UpdateTheIndexesMetadata);
|
||||||
self.update_index(wtxn, lhs, |lhs_tasks| {
|
self.update_index(wtxn, lhs, |lhs_tasks| {
|
||||||
*lhs_tasks -= &index_lhs_task_ids;
|
*lhs_tasks -= &index_lhs_task_ids;
|
||||||
*lhs_tasks |= &index_rhs_task_ids;
|
*lhs_tasks |= &index_rhs_task_ids;
|
||||||
@ -1168,7 +1258,7 @@ impl IndexScheduler {
|
|||||||
/// The list of processed tasks.
|
/// The list of processed tasks.
|
||||||
#[tracing::instrument(
|
#[tracing::instrument(
|
||||||
level = "trace",
|
level = "trace",
|
||||||
skip(self, index_wtxn, index),
|
skip(self, index_wtxn, index, progress),
|
||||||
target = "indexing::scheduler"
|
target = "indexing::scheduler"
|
||||||
)]
|
)]
|
||||||
fn apply_index_operation<'i>(
|
fn apply_index_operation<'i>(
|
||||||
@ -1176,44 +1266,12 @@ impl IndexScheduler {
|
|||||||
index_wtxn: &mut RwTxn<'i>,
|
index_wtxn: &mut RwTxn<'i>,
|
||||||
index: &'i Index,
|
index: &'i Index,
|
||||||
operation: IndexOperation,
|
operation: IndexOperation,
|
||||||
|
progress: Progress,
|
||||||
) -> Result<Vec<Task>> {
|
) -> Result<Vec<Task>> {
|
||||||
let indexer_alloc = Bump::new();
|
let indexer_alloc = Bump::new();
|
||||||
|
|
||||||
let started_processing_at = std::time::Instant::now();
|
let started_processing_at = std::time::Instant::now();
|
||||||
let secs_since_started_processing_at = AtomicU64::new(0);
|
|
||||||
const PRINT_SECS_DELTA: u64 = 5;
|
|
||||||
|
|
||||||
let processing_tasks = self.processing_tasks.clone();
|
|
||||||
let must_stop_processing = self.must_stop_processing.clone();
|
let must_stop_processing = self.must_stop_processing.clone();
|
||||||
let send_progress = |progress| {
|
|
||||||
let now = std::time::Instant::now();
|
|
||||||
let elapsed = secs_since_started_processing_at.load(atomic::Ordering::Relaxed);
|
|
||||||
let previous = started_processing_at + Duration::from_secs(elapsed);
|
|
||||||
let elapsed = now - previous;
|
|
||||||
|
|
||||||
if elapsed.as_secs() < PRINT_SECS_DELTA {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
secs_since_started_processing_at
|
|
||||||
.store((now - started_processing_at).as_secs(), atomic::Ordering::Relaxed);
|
|
||||||
|
|
||||||
let TaskProgress {
|
|
||||||
current_step,
|
|
||||||
finished_steps,
|
|
||||||
total_steps,
|
|
||||||
finished_substeps,
|
|
||||||
total_substeps,
|
|
||||||
} = processing_tasks.write().unwrap().update_progress(progress);
|
|
||||||
|
|
||||||
tracing::info!(
|
|
||||||
current_step,
|
|
||||||
finished_steps,
|
|
||||||
total_steps,
|
|
||||||
finished_substeps,
|
|
||||||
total_substeps
|
|
||||||
);
|
|
||||||
};
|
|
||||||
|
|
||||||
match operation {
|
match operation {
|
||||||
IndexOperation::DocumentClear { index_uid, mut tasks } => {
|
IndexOperation::DocumentClear { index_uid, mut tasks } => {
|
||||||
@ -1245,6 +1303,7 @@ impl IndexScheduler {
|
|||||||
operations,
|
operations,
|
||||||
mut tasks,
|
mut tasks,
|
||||||
} => {
|
} => {
|
||||||
|
progress.update_progress(DocumentOperationProgress::RetrievingConfig);
|
||||||
// TODO: at some point, for better efficiency we might want to reuse the bumpalo for successive batches.
|
// TODO: at some point, for better efficiency we might want to reuse the bumpalo for successive batches.
|
||||||
// this is made difficult by the fact we're doing private clones of the index scheduler and sending it
|
// this is made difficult by the fact we're doing private clones of the index scheduler and sending it
|
||||||
// to a fresh thread.
|
// to a fresh thread.
|
||||||
@ -1300,6 +1359,7 @@ impl IndexScheduler {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
progress.update_progress(DocumentOperationProgress::ComputingDocumentChanges);
|
||||||
let (document_changes, operation_stats, primary_key) = indexer
|
let (document_changes, operation_stats, primary_key) = indexer
|
||||||
.into_changes(
|
.into_changes(
|
||||||
&indexer_alloc,
|
&indexer_alloc,
|
||||||
@ -1308,7 +1368,7 @@ impl IndexScheduler {
|
|||||||
primary_key.as_deref(),
|
primary_key.as_deref(),
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| must_stop_processing.get(),
|
&|| must_stop_processing.get(),
|
||||||
&send_progress,
|
progress.clone(),
|
||||||
)
|
)
|
||||||
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
|
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
|
||||||
|
|
||||||
@ -1344,6 +1404,7 @@ impl IndexScheduler {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
progress.update_progress(DocumentOperationProgress::Indexing);
|
||||||
if tasks.iter().any(|res| res.error.is_none()) {
|
if tasks.iter().any(|res| res.error.is_none()) {
|
||||||
indexer::index(
|
indexer::index(
|
||||||
index_wtxn,
|
index_wtxn,
|
||||||
@ -1356,7 +1417,7 @@ impl IndexScheduler {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| must_stop_processing.get(),
|
&|| must_stop_processing.get(),
|
||||||
&send_progress,
|
&progress,
|
||||||
)
|
)
|
||||||
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
|
.map_err(|e| Error::from_milli(e, Some(index_uid.clone())))?;
|
||||||
|
|
||||||
@ -1373,6 +1434,8 @@ impl IndexScheduler {
|
|||||||
Ok(tasks)
|
Ok(tasks)
|
||||||
}
|
}
|
||||||
IndexOperation::DocumentEdition { index_uid, mut task } => {
|
IndexOperation::DocumentEdition { index_uid, mut task } => {
|
||||||
|
progress.update_progress(DocumentEditionProgress::RetrievingConfig);
|
||||||
|
|
||||||
let (filter, code) = if let KindWithContent::DocumentEdition {
|
let (filter, code) = if let KindWithContent::DocumentEdition {
|
||||||
filter_expr,
|
filter_expr,
|
||||||
context: _,
|
context: _,
|
||||||
@ -1446,6 +1509,7 @@ impl IndexScheduler {
|
|||||||
};
|
};
|
||||||
|
|
||||||
let candidates_count = candidates.len();
|
let candidates_count = candidates.len();
|
||||||
|
progress.update_progress(DocumentEditionProgress::ComputingDocumentChanges);
|
||||||
let indexer = UpdateByFunction::new(candidates, context.clone(), code.clone());
|
let indexer = UpdateByFunction::new(candidates, context.clone(), code.clone());
|
||||||
let document_changes = pool
|
let document_changes = pool
|
||||||
.install(|| {
|
.install(|| {
|
||||||
@ -1459,6 +1523,7 @@ impl IndexScheduler {
|
|||||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
|
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
|
||||||
let embedders = self.embedders(index_uid.clone(), embedders)?;
|
let embedders = self.embedders(index_uid.clone(), embedders)?;
|
||||||
|
|
||||||
|
progress.update_progress(DocumentEditionProgress::Indexing);
|
||||||
indexer::index(
|
indexer::index(
|
||||||
index_wtxn,
|
index_wtxn,
|
||||||
index,
|
index,
|
||||||
@ -1470,7 +1535,7 @@ impl IndexScheduler {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| must_stop_processing.get(),
|
&|| must_stop_processing.get(),
|
||||||
&send_progress,
|
&progress,
|
||||||
)
|
)
|
||||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
|
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
|
||||||
|
|
||||||
@ -1511,6 +1576,8 @@ impl IndexScheduler {
|
|||||||
Ok(vec![task])
|
Ok(vec![task])
|
||||||
}
|
}
|
||||||
IndexOperation::DocumentDeletion { mut tasks, index_uid } => {
|
IndexOperation::DocumentDeletion { mut tasks, index_uid } => {
|
||||||
|
progress.update_progress(DocumentDeletionProgress::RetrievingConfig);
|
||||||
|
|
||||||
let mut to_delete = RoaringBitmap::new();
|
let mut to_delete = RoaringBitmap::new();
|
||||||
let external_documents_ids = index.external_documents_ids();
|
let external_documents_ids = index.external_documents_ids();
|
||||||
|
|
||||||
@ -1601,6 +1668,7 @@ impl IndexScheduler {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
progress.update_progress(DocumentDeletionProgress::DeleteDocuments);
|
||||||
let mut indexer = indexer::DocumentDeletion::new();
|
let mut indexer = indexer::DocumentDeletion::new();
|
||||||
let candidates_count = to_delete.len();
|
let candidates_count = to_delete.len();
|
||||||
indexer.delete_documents_by_docids(to_delete);
|
indexer.delete_documents_by_docids(to_delete);
|
||||||
@ -1610,6 +1678,7 @@ impl IndexScheduler {
|
|||||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
|
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
|
||||||
let embedders = self.embedders(index_uid.clone(), embedders)?;
|
let embedders = self.embedders(index_uid.clone(), embedders)?;
|
||||||
|
|
||||||
|
progress.update_progress(DocumentDeletionProgress::Indexing);
|
||||||
indexer::index(
|
indexer::index(
|
||||||
index_wtxn,
|
index_wtxn,
|
||||||
index,
|
index,
|
||||||
@ -1621,7 +1690,7 @@ impl IndexScheduler {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| must_stop_processing.get(),
|
&|| must_stop_processing.get(),
|
||||||
&send_progress,
|
&progress,
|
||||||
)
|
)
|
||||||
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
|
.map_err(|err| Error::from_milli(err, Some(index_uid.clone())))?;
|
||||||
|
|
||||||
@ -1638,6 +1707,7 @@ impl IndexScheduler {
|
|||||||
Ok(tasks)
|
Ok(tasks)
|
||||||
}
|
}
|
||||||
IndexOperation::Settings { index_uid, settings, mut tasks } => {
|
IndexOperation::Settings { index_uid, settings, mut tasks } => {
|
||||||
|
progress.update_progress(SettingsProgress::RetrievingAndMergingTheSettings);
|
||||||
let indexer_config = self.index_mapper.indexer_config();
|
let indexer_config = self.index_mapper.indexer_config();
|
||||||
let mut builder = milli::update::Settings::new(index_wtxn, index, indexer_config);
|
let mut builder = milli::update::Settings::new(index_wtxn, index, indexer_config);
|
||||||
|
|
||||||
@ -1651,6 +1721,7 @@ impl IndexScheduler {
|
|||||||
task.status = Status::Succeeded;
|
task.status = Status::Succeeded;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
progress.update_progress(SettingsProgress::ApplyTheSettings);
|
||||||
builder
|
builder
|
||||||
.execute(
|
.execute(
|
||||||
|indexing_step| tracing::debug!(update = ?indexing_step),
|
|indexing_step| tracing::debug!(update = ?indexing_step),
|
||||||
@ -1673,12 +1744,14 @@ impl IndexScheduler {
|
|||||||
index_uid: index_uid.clone(),
|
index_uid: index_uid.clone(),
|
||||||
tasks: cleared_tasks,
|
tasks: cleared_tasks,
|
||||||
},
|
},
|
||||||
|
progress.clone(),
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
let settings_tasks = self.apply_index_operation(
|
let settings_tasks = self.apply_index_operation(
|
||||||
index_wtxn,
|
index_wtxn,
|
||||||
index,
|
index,
|
||||||
IndexOperation::Settings { index_uid, settings, tasks: settings_tasks },
|
IndexOperation::Settings { index_uid, settings, tasks: settings_tasks },
|
||||||
|
progress,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
let mut tasks = settings_tasks;
|
let mut tasks = settings_tasks;
|
||||||
@ -1695,15 +1768,18 @@ impl IndexScheduler {
|
|||||||
&self,
|
&self,
|
||||||
wtxn: &mut RwTxn,
|
wtxn: &mut RwTxn,
|
||||||
matched_tasks: &RoaringBitmap,
|
matched_tasks: &RoaringBitmap,
|
||||||
|
progress: &Progress,
|
||||||
) -> Result<RoaringBitmap> {
|
) -> Result<RoaringBitmap> {
|
||||||
|
progress.update_progress(TaskDeletionProgress::DeletingTasksDateTime);
|
||||||
|
|
||||||
// 1. Remove from this list the tasks that we are not allowed to delete
|
// 1. Remove from this list the tasks that we are not allowed to delete
|
||||||
let enqueued_tasks = self.get_status(wtxn, Status::Enqueued)?;
|
let enqueued_tasks = self.get_status(wtxn, Status::Enqueued)?;
|
||||||
let processing_tasks = &self.processing_tasks.read().unwrap().processing.clone();
|
let processing_tasks = &self.processing_tasks.read().unwrap().processing.clone();
|
||||||
|
|
||||||
let all_task_ids = self.all_task_ids(wtxn)?;
|
let all_task_ids = self.all_task_ids(wtxn)?;
|
||||||
let mut to_delete_tasks = all_task_ids & matched_tasks;
|
let mut to_delete_tasks = all_task_ids & matched_tasks;
|
||||||
to_delete_tasks -= processing_tasks;
|
to_delete_tasks -= &**processing_tasks;
|
||||||
to_delete_tasks -= enqueued_tasks;
|
to_delete_tasks -= &enqueued_tasks;
|
||||||
|
|
||||||
// 2. We now have a list of tasks to delete, delete them
|
// 2. We now have a list of tasks to delete, delete them
|
||||||
|
|
||||||
@ -1714,6 +1790,8 @@ impl IndexScheduler {
|
|||||||
// The tasks that have been removed *per batches*.
|
// The tasks that have been removed *per batches*.
|
||||||
let mut affected_batches: HashMap<BatchId, RoaringBitmap> = HashMap::new();
|
let mut affected_batches: HashMap<BatchId, RoaringBitmap> = HashMap::new();
|
||||||
|
|
||||||
|
let (atomic_progress, task_progress) = AtomicTaskStep::new(to_delete_tasks.len() as u32);
|
||||||
|
progress.update_progress(task_progress);
|
||||||
for task_id in to_delete_tasks.iter() {
|
for task_id in to_delete_tasks.iter() {
|
||||||
let task = self.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
let task = self.get_task(wtxn, task_id)?.ok_or(Error::CorruptedTaskQueue)?;
|
||||||
|
|
||||||
@ -1737,22 +1815,35 @@ impl IndexScheduler {
|
|||||||
if let Some(batch_uid) = task.batch_uid {
|
if let Some(batch_uid) = task.batch_uid {
|
||||||
affected_batches.entry(batch_uid).or_default().insert(task_id);
|
affected_batches.entry(batch_uid).or_default().insert(task_id);
|
||||||
}
|
}
|
||||||
|
atomic_progress.fetch_add(1, Ordering::Relaxed);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
progress.update_progress(TaskDeletionProgress::DeletingTasksMetadata);
|
||||||
|
let (atomic_progress, task_progress) = AtomicTaskStep::new(
|
||||||
|
(affected_indexes.len() + affected_statuses.len() + affected_kinds.len()) as u32,
|
||||||
|
);
|
||||||
|
progress.update_progress(task_progress);
|
||||||
for index in affected_indexes.iter() {
|
for index in affected_indexes.iter() {
|
||||||
self.update_index(wtxn, index, |bitmap| *bitmap -= &to_delete_tasks)?;
|
self.update_index(wtxn, index, |bitmap| *bitmap -= &to_delete_tasks)?;
|
||||||
|
atomic_progress.fetch_add(1, Ordering::Relaxed);
|
||||||
}
|
}
|
||||||
|
|
||||||
for status in affected_statuses.iter() {
|
for status in affected_statuses.iter() {
|
||||||
self.update_status(wtxn, *status, |bitmap| *bitmap -= &to_delete_tasks)?;
|
self.update_status(wtxn, *status, |bitmap| *bitmap -= &to_delete_tasks)?;
|
||||||
|
atomic_progress.fetch_add(1, Ordering::Relaxed);
|
||||||
}
|
}
|
||||||
|
|
||||||
for kind in affected_kinds.iter() {
|
for kind in affected_kinds.iter() {
|
||||||
self.update_kind(wtxn, *kind, |bitmap| *bitmap -= &to_delete_tasks)?;
|
self.update_kind(wtxn, *kind, |bitmap| *bitmap -= &to_delete_tasks)?;
|
||||||
|
atomic_progress.fetch_add(1, Ordering::Relaxed);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
progress.update_progress(TaskDeletionProgress::DeletingTasks);
|
||||||
|
let (atomic_progress, task_progress) = AtomicTaskStep::new(to_delete_tasks.len() as u32);
|
||||||
|
progress.update_progress(task_progress);
|
||||||
for task in to_delete_tasks.iter() {
|
for task in to_delete_tasks.iter() {
|
||||||
self.all_tasks.delete(wtxn, &task)?;
|
self.all_tasks.delete(wtxn, &task)?;
|
||||||
|
atomic_progress.fetch_add(1, Ordering::Relaxed);
|
||||||
}
|
}
|
||||||
for canceled_by in affected_canceled_by {
|
for canceled_by in affected_canceled_by {
|
||||||
if let Some(mut tasks) = self.canceled_by.get(wtxn, &canceled_by)? {
|
if let Some(mut tasks) = self.canceled_by.get(wtxn, &canceled_by)? {
|
||||||
@ -1764,6 +1855,9 @@ impl IndexScheduler {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
progress.update_progress(TaskDeletionProgress::DeletingBatches);
|
||||||
|
let (atomic_progress, batch_progress) = AtomicBatchStep::new(affected_batches.len() as u32);
|
||||||
|
progress.update_progress(batch_progress);
|
||||||
for (batch_id, to_delete_tasks) in affected_batches {
|
for (batch_id, to_delete_tasks) in affected_batches {
|
||||||
if let Some(mut tasks) = self.batch_to_tasks_mapping.get(wtxn, &batch_id)? {
|
if let Some(mut tasks) = self.batch_to_tasks_mapping.get(wtxn, &batch_id)? {
|
||||||
tasks -= &to_delete_tasks;
|
tasks -= &to_delete_tasks;
|
||||||
@ -1805,6 +1899,7 @@ impl IndexScheduler {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
atomic_progress.fetch_add(1, Ordering::Relaxed);
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(to_delete_tasks)
|
Ok(to_delete_tasks)
|
||||||
@ -1819,21 +1914,36 @@ impl IndexScheduler {
|
|||||||
cancel_task_id: TaskId,
|
cancel_task_id: TaskId,
|
||||||
current_batch: &mut ProcessingBatch,
|
current_batch: &mut ProcessingBatch,
|
||||||
matched_tasks: &RoaringBitmap,
|
matched_tasks: &RoaringBitmap,
|
||||||
|
progress: &Progress,
|
||||||
) -> Result<Vec<Task>> {
|
) -> Result<Vec<Task>> {
|
||||||
|
progress.update_progress(TaskCancelationProgress::RetrievingTasks);
|
||||||
|
|
||||||
// 1. Remove from this list the tasks that we are not allowed to cancel
|
// 1. Remove from this list the tasks that we are not allowed to cancel
|
||||||
// Notice that only the _enqueued_ ones are cancelable and we should
|
// Notice that only the _enqueued_ ones are cancelable and we should
|
||||||
// have already aborted the indexation of the _processing_ ones
|
// have already aborted the indexation of the _processing_ ones
|
||||||
let cancelable_tasks = self.get_status(rtxn, Status::Enqueued)?;
|
let cancelable_tasks = self.get_status(rtxn, Status::Enqueued)?;
|
||||||
let tasks_to_cancel = cancelable_tasks & matched_tasks;
|
let tasks_to_cancel = cancelable_tasks & matched_tasks;
|
||||||
|
|
||||||
// 2. We now have a list of tasks to cancel, cancel them
|
let (task_progress, progress_obj) = AtomicTaskStep::new(tasks_to_cancel.len() as u32);
|
||||||
let mut tasks = self.get_existing_tasks(rtxn, tasks_to_cancel.iter())?;
|
progress.update_progress(progress_obj);
|
||||||
|
|
||||||
|
// 2. We now have a list of tasks to cancel, cancel them
|
||||||
|
let mut tasks = self.get_existing_tasks(
|
||||||
|
rtxn,
|
||||||
|
tasks_to_cancel.iter().inspect(|_| {
|
||||||
|
task_progress.fetch_add(1, Ordering::Relaxed);
|
||||||
|
}),
|
||||||
|
)?;
|
||||||
|
|
||||||
|
progress.update_progress(TaskCancelationProgress::UpdatingTasks);
|
||||||
|
let (task_progress, progress_obj) = AtomicTaskStep::new(tasks_to_cancel.len() as u32);
|
||||||
|
progress.update_progress(progress_obj);
|
||||||
for task in tasks.iter_mut() {
|
for task in tasks.iter_mut() {
|
||||||
task.status = Status::Canceled;
|
task.status = Status::Canceled;
|
||||||
task.canceled_by = Some(cancel_task_id);
|
task.canceled_by = Some(cancel_task_id);
|
||||||
task.details = task.details.as_ref().map(|d| d.to_failed());
|
task.details = task.details.as_ref().map(|d| d.to_failed());
|
||||||
current_batch.processing(Some(task));
|
current_batch.processing(Some(task));
|
||||||
|
task_progress.fetch_add(1, Ordering::Relaxed);
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(tasks)
|
Ok(tasks)
|
||||||
|
@ -3,10 +3,6 @@ use std::sync::{Arc, RwLock};
|
|||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
use std::{fs, thread};
|
use std::{fs, thread};
|
||||||
|
|
||||||
use self::index_map::IndexMap;
|
|
||||||
use self::IndexStatus::{Available, BeingDeleted, Closing, Missing};
|
|
||||||
use crate::uuid_codec::UuidCodec;
|
|
||||||
use crate::{Error, Result};
|
|
||||||
use meilisearch_types::heed::types::{SerdeJson, Str};
|
use meilisearch_types::heed::types::{SerdeJson, Str};
|
||||||
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
|
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
|
||||||
use meilisearch_types::milli;
|
use meilisearch_types::milli;
|
||||||
@ -17,6 +13,11 @@ use time::OffsetDateTime;
|
|||||||
use tracing::error;
|
use tracing::error;
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
|
||||||
|
use self::index_map::IndexMap;
|
||||||
|
use self::IndexStatus::{Available, BeingDeleted, Closing, Missing};
|
||||||
|
use crate::uuid_codec::UuidCodec;
|
||||||
|
use crate::{Error, Result};
|
||||||
|
|
||||||
mod index_map;
|
mod index_map;
|
||||||
|
|
||||||
const INDEX_MAPPING: &str = "index-mapping";
|
const INDEX_MAPPING: &str = "index-mapping";
|
||||||
|
@ -353,7 +353,7 @@ pub fn snapshot_canceled_by(rtxn: &RoTxn, db: Database<BEU32, RoaringBitmapCodec
|
|||||||
|
|
||||||
pub fn snapshot_batch(batch: &Batch) -> String {
|
pub fn snapshot_batch(batch: &Batch) -> String {
|
||||||
let mut snap = String::new();
|
let mut snap = String::new();
|
||||||
let Batch { uid, details, stats, started_at, finished_at } = batch;
|
let Batch { uid, details, stats, started_at, finished_at, progress: _ } = batch;
|
||||||
if let Some(finished_at) = finished_at {
|
if let Some(finished_at) = finished_at {
|
||||||
assert!(finished_at > started_at);
|
assert!(finished_at > started_at);
|
||||||
}
|
}
|
||||||
|
@ -26,6 +26,7 @@ mod index_mapper;
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod insta_snapshot;
|
mod insta_snapshot;
|
||||||
mod lru;
|
mod lru;
|
||||||
|
mod processing;
|
||||||
mod utils;
|
mod utils;
|
||||||
pub mod uuid_codec;
|
pub mod uuid_codec;
|
||||||
|
|
||||||
@ -56,12 +57,12 @@ use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str, I128};
|
|||||||
use meilisearch_types::heed::{self, Database, Env, PutFlags, RoTxn, RwTxn};
|
use meilisearch_types::heed::{self, Database, Env, PutFlags, RoTxn, RwTxn};
|
||||||
use meilisearch_types::milli::documents::DocumentsBatchBuilder;
|
use meilisearch_types::milli::documents::DocumentsBatchBuilder;
|
||||||
use meilisearch_types::milli::index::IndexEmbeddingConfig;
|
use meilisearch_types::milli::index::IndexEmbeddingConfig;
|
||||||
use meilisearch_types::milli::update::new::indexer::document_changes::Progress;
|
|
||||||
use meilisearch_types::milli::update::IndexerConfig;
|
use meilisearch_types::milli::update::IndexerConfig;
|
||||||
use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs};
|
use meilisearch_types::milli::vector::{Embedder, EmbedderOptions, EmbeddingConfigs};
|
||||||
use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
|
use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmapCodec, BEU32};
|
||||||
use meilisearch_types::task_view::TaskView;
|
use meilisearch_types::task_view::TaskView;
|
||||||
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task, TaskProgress};
|
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
|
||||||
|
use processing::ProcessingTasks;
|
||||||
use rayon::current_num_threads;
|
use rayon::current_num_threads;
|
||||||
use rayon::prelude::{IntoParallelIterator, ParallelIterator};
|
use rayon::prelude::{IntoParallelIterator, ParallelIterator};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
@ -72,7 +73,8 @@ use utils::{filter_out_references_to_newer_tasks, keep_ids_within_datetimes, map
|
|||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
|
||||||
use crate::index_mapper::IndexMapper;
|
use crate::index_mapper::IndexMapper;
|
||||||
use crate::utils::{check_index_swap_validity, clamp_to_page_size, ProcessingBatch};
|
use crate::processing::{AtomicTaskStep, BatchProgress};
|
||||||
|
use crate::utils::{check_index_swap_validity, clamp_to_page_size};
|
||||||
|
|
||||||
pub(crate) type BEI128 = I128<BE>;
|
pub(crate) type BEI128 = I128<BE>;
|
||||||
|
|
||||||
@ -163,48 +165,6 @@ impl Query {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
pub struct ProcessingTasks {
|
|
||||||
batch: Option<ProcessingBatch>,
|
|
||||||
/// The list of tasks ids that are currently running.
|
|
||||||
processing: RoaringBitmap,
|
|
||||||
/// The progress on processing tasks
|
|
||||||
progress: Option<TaskProgress>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl ProcessingTasks {
|
|
||||||
/// Creates an empty `ProcessingAt` struct.
|
|
||||||
fn new() -> ProcessingTasks {
|
|
||||||
ProcessingTasks { batch: None, processing: RoaringBitmap::new(), progress: None }
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Stores the currently processing tasks, and the date time at which it started.
|
|
||||||
fn start_processing(&mut self, processing_batch: ProcessingBatch, processing: RoaringBitmap) {
|
|
||||||
self.batch = Some(processing_batch);
|
|
||||||
self.processing = processing;
|
|
||||||
}
|
|
||||||
|
|
||||||
fn update_progress(&mut self, progress: Progress) -> TaskProgress {
|
|
||||||
self.progress.get_or_insert_with(TaskProgress::default).update(progress)
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Set the processing tasks to an empty list
|
|
||||||
fn stop_processing(&mut self) -> Self {
|
|
||||||
self.progress = None;
|
|
||||||
|
|
||||||
Self {
|
|
||||||
batch: std::mem::take(&mut self.batch),
|
|
||||||
processing: std::mem::take(&mut self.processing),
|
|
||||||
progress: None,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns `true` if there, at least, is one task that is currently processing that we must stop.
|
|
||||||
fn must_cancel_processing_tasks(&self, canceled_tasks: &RoaringBitmap) -> bool {
|
|
||||||
!self.processing.is_disjoint(canceled_tasks)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Default, Clone, Debug)]
|
#[derive(Default, Clone, Debug)]
|
||||||
struct MustStopProcessing(Arc<AtomicBool>);
|
struct MustStopProcessing(Arc<AtomicBool>);
|
||||||
|
|
||||||
@ -813,7 +773,7 @@ impl IndexScheduler {
|
|||||||
let mut batch_tasks = RoaringBitmap::new();
|
let mut batch_tasks = RoaringBitmap::new();
|
||||||
for batch_uid in batch_uids {
|
for batch_uid in batch_uids {
|
||||||
if processing_batch.as_ref().map_or(false, |batch| batch.uid == *batch_uid) {
|
if processing_batch.as_ref().map_or(false, |batch| batch.uid == *batch_uid) {
|
||||||
batch_tasks |= &processing_tasks;
|
batch_tasks |= &*processing_tasks;
|
||||||
} else {
|
} else {
|
||||||
batch_tasks |= self.tasks_in_batch(rtxn, *batch_uid)?;
|
batch_tasks |= self.tasks_in_batch(rtxn, *batch_uid)?;
|
||||||
}
|
}
|
||||||
@ -827,13 +787,13 @@ impl IndexScheduler {
|
|||||||
match status {
|
match status {
|
||||||
// special case for Processing tasks
|
// special case for Processing tasks
|
||||||
Status::Processing => {
|
Status::Processing => {
|
||||||
status_tasks |= &processing_tasks;
|
status_tasks |= &*processing_tasks;
|
||||||
}
|
}
|
||||||
status => status_tasks |= &self.get_status(rtxn, *status)?,
|
status => status_tasks |= &self.get_status(rtxn, *status)?,
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
if !status.contains(&Status::Processing) {
|
if !status.contains(&Status::Processing) {
|
||||||
tasks -= &processing_tasks;
|
tasks -= &*processing_tasks;
|
||||||
}
|
}
|
||||||
tasks &= status_tasks;
|
tasks &= status_tasks;
|
||||||
}
|
}
|
||||||
@ -882,7 +842,7 @@ impl IndexScheduler {
|
|||||||
// Once we have filtered the two subsets, we put them back together and assign it back to `tasks`.
|
// Once we have filtered the two subsets, we put them back together and assign it back to `tasks`.
|
||||||
tasks = {
|
tasks = {
|
||||||
let (mut filtered_non_processing_tasks, mut filtered_processing_tasks) =
|
let (mut filtered_non_processing_tasks, mut filtered_processing_tasks) =
|
||||||
(&tasks - &processing_tasks, &tasks & &processing_tasks);
|
(&tasks - &*processing_tasks, &tasks & &*processing_tasks);
|
||||||
|
|
||||||
// special case for Processing tasks
|
// special case for Processing tasks
|
||||||
// A closure that clears the filtered_processing_tasks if their started_at date falls outside the given bounds
|
// A closure that clears the filtered_processing_tasks if their started_at date falls outside the given bounds
|
||||||
@ -1090,7 +1050,7 @@ impl IndexScheduler {
|
|||||||
// Once we have filtered the two subsets, we put them back together and assign it back to `batches`.
|
// Once we have filtered the two subsets, we put them back together and assign it back to `batches`.
|
||||||
batches = {
|
batches = {
|
||||||
let (mut filtered_non_processing_batches, mut filtered_processing_batches) =
|
let (mut filtered_non_processing_batches, mut filtered_processing_batches) =
|
||||||
(&batches - &processing.processing, &batches & &processing.processing);
|
(&batches - &*processing.processing, &batches & &*processing.processing);
|
||||||
|
|
||||||
// special case for Processing batches
|
// special case for Processing batches
|
||||||
// A closure that clears the filtered_processing_batches if their started_at date falls outside the given bounds
|
// A closure that clears the filtered_processing_batches if their started_at date falls outside the given bounds
|
||||||
@ -1606,7 +1566,8 @@ impl IndexScheduler {
|
|||||||
|
|
||||||
// We reset the must_stop flag to be sure that we don't stop processing tasks
|
// We reset the must_stop flag to be sure that we don't stop processing tasks
|
||||||
self.must_stop_processing.reset();
|
self.must_stop_processing.reset();
|
||||||
self.processing_tasks
|
let progress = self
|
||||||
|
.processing_tasks
|
||||||
.write()
|
.write()
|
||||||
.unwrap()
|
.unwrap()
|
||||||
// We can clone the processing batch here because we don't want its modification to affect the view of the processing batches
|
// We can clone the processing batch here because we don't want its modification to affect the view of the processing batches
|
||||||
@ -1619,11 +1580,12 @@ impl IndexScheduler {
|
|||||||
let res = {
|
let res = {
|
||||||
let cloned_index_scheduler = self.private_clone();
|
let cloned_index_scheduler = self.private_clone();
|
||||||
let processing_batch = &mut processing_batch;
|
let processing_batch = &mut processing_batch;
|
||||||
|
let progress = progress.clone();
|
||||||
std::thread::scope(|s| {
|
std::thread::scope(|s| {
|
||||||
let handle = std::thread::Builder::new()
|
let handle = std::thread::Builder::new()
|
||||||
.name(String::from("batch-operation"))
|
.name(String::from("batch-operation"))
|
||||||
.spawn_scoped(s, move || {
|
.spawn_scoped(s, move || {
|
||||||
cloned_index_scheduler.process_batch(batch, processing_batch)
|
cloned_index_scheduler.process_batch(batch, processing_batch, progress)
|
||||||
})
|
})
|
||||||
.unwrap();
|
.unwrap();
|
||||||
handle.join().unwrap_or(Err(Error::ProcessBatchPanicked))
|
handle.join().unwrap_or(Err(Error::ProcessBatchPanicked))
|
||||||
@ -1636,6 +1598,7 @@ impl IndexScheduler {
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
self.maybe_fail(tests::FailureLocation::AcquiringWtxn)?;
|
self.maybe_fail(tests::FailureLocation::AcquiringWtxn)?;
|
||||||
|
|
||||||
|
progress.update_progress(BatchProgress::WritingTasksToDisk);
|
||||||
processing_batch.finished();
|
processing_batch.finished();
|
||||||
let mut wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?;
|
let mut wtxn = self.env.write_txn().map_err(Error::HeedTransaction)?;
|
||||||
let mut canceled = RoaringBitmap::new();
|
let mut canceled = RoaringBitmap::new();
|
||||||
@ -1645,12 +1608,15 @@ impl IndexScheduler {
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
self.breakpoint(Breakpoint::ProcessBatchSucceeded);
|
self.breakpoint(Breakpoint::ProcessBatchSucceeded);
|
||||||
|
|
||||||
|
let (task_progress, task_progress_obj) = AtomicTaskStep::new(tasks.len() as u32);
|
||||||
|
progress.update_progress(task_progress_obj);
|
||||||
let mut success = 0;
|
let mut success = 0;
|
||||||
let mut failure = 0;
|
let mut failure = 0;
|
||||||
let mut canceled_by = None;
|
let mut canceled_by = None;
|
||||||
|
|
||||||
#[allow(unused_variables)]
|
#[allow(unused_variables)]
|
||||||
for (i, mut task) in tasks.into_iter().enumerate() {
|
for (i, mut task) in tasks.into_iter().enumerate() {
|
||||||
|
task_progress.fetch_add(1, Ordering::Relaxed);
|
||||||
processing_batch.update(&mut task);
|
processing_batch.update(&mut task);
|
||||||
if task.status == Status::Canceled {
|
if task.status == Status::Canceled {
|
||||||
canceled.insert(task.uid);
|
canceled.insert(task.uid);
|
||||||
@ -1718,8 +1684,12 @@ impl IndexScheduler {
|
|||||||
Err(err) => {
|
Err(err) => {
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
self.breakpoint(Breakpoint::ProcessBatchFailed);
|
self.breakpoint(Breakpoint::ProcessBatchFailed);
|
||||||
|
let (task_progress, task_progress_obj) = AtomicTaskStep::new(ids.len() as u32);
|
||||||
|
progress.update_progress(task_progress_obj);
|
||||||
|
|
||||||
let error: ResponseError = err.into();
|
let error: ResponseError = err.into();
|
||||||
for id in ids.iter() {
|
for id in ids.iter() {
|
||||||
|
task_progress.fetch_add(1, Ordering::Relaxed);
|
||||||
let mut task = self
|
let mut task = self
|
||||||
.get_task(&wtxn, id)
|
.get_task(&wtxn, id)
|
||||||
.map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?
|
.map_err(|e| Error::TaskDatabaseUpdate(Box::new(e)))?
|
||||||
|
316
crates/index-scheduler/src/processing.rs
Normal file
316
crates/index-scheduler/src/processing.rs
Normal file
@ -0,0 +1,316 @@
|
|||||||
|
use std::borrow::Cow;
|
||||||
|
use std::sync::Arc;
|
||||||
|
|
||||||
|
use enum_iterator::Sequence;
|
||||||
|
use meilisearch_types::milli::progress::{AtomicSubStep, NamedStep, Progress, ProgressView, Step};
|
||||||
|
use meilisearch_types::milli::{make_atomic_progress, make_enum_progress};
|
||||||
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
|
use crate::utils::ProcessingBatch;
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct ProcessingTasks {
|
||||||
|
pub batch: Option<Arc<ProcessingBatch>>,
|
||||||
|
/// The list of tasks ids that are currently running.
|
||||||
|
pub processing: Arc<RoaringBitmap>,
|
||||||
|
/// The progress on processing tasks
|
||||||
|
pub progress: Option<Progress>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ProcessingTasks {
|
||||||
|
/// Creates an empty `ProcessingAt` struct.
|
||||||
|
pub fn new() -> ProcessingTasks {
|
||||||
|
ProcessingTasks { batch: None, processing: Arc::new(RoaringBitmap::new()), progress: None }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_progress_view(&self) -> Option<ProgressView> {
|
||||||
|
Some(self.progress.as_ref()?.as_progress_view())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Stores the currently processing tasks, and the date time at which it started.
|
||||||
|
pub fn start_processing(
|
||||||
|
&mut self,
|
||||||
|
processing_batch: ProcessingBatch,
|
||||||
|
processing: RoaringBitmap,
|
||||||
|
) -> Progress {
|
||||||
|
self.batch = Some(Arc::new(processing_batch));
|
||||||
|
self.processing = Arc::new(processing);
|
||||||
|
let progress = Progress::default();
|
||||||
|
progress.update_progress(BatchProgress::ProcessingTasks);
|
||||||
|
self.progress = Some(progress.clone());
|
||||||
|
|
||||||
|
progress
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Set the processing tasks to an empty list
|
||||||
|
pub fn stop_processing(&mut self) -> Self {
|
||||||
|
self.progress = None;
|
||||||
|
|
||||||
|
Self {
|
||||||
|
batch: std::mem::take(&mut self.batch),
|
||||||
|
processing: std::mem::take(&mut self.processing),
|
||||||
|
progress: None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns `true` if there, at least, is one task that is currently processing that we must stop.
|
||||||
|
pub fn must_cancel_processing_tasks(&self, canceled_tasks: &RoaringBitmap) -> bool {
|
||||||
|
!self.processing.is_disjoint(canceled_tasks)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
make_enum_progress! {
|
||||||
|
pub enum BatchProgress {
|
||||||
|
ProcessingTasks,
|
||||||
|
WritingTasksToDisk,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
make_enum_progress! {
|
||||||
|
pub enum TaskCancelationProgress {
|
||||||
|
RetrievingTasks,
|
||||||
|
UpdatingTasks,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
make_enum_progress! {
|
||||||
|
pub enum TaskDeletionProgress {
|
||||||
|
DeletingTasksDateTime,
|
||||||
|
DeletingTasksMetadata,
|
||||||
|
DeletingTasks,
|
||||||
|
DeletingBatches,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
make_enum_progress! {
|
||||||
|
pub enum SnapshotCreationProgress {
|
||||||
|
StartTheSnapshotCreation,
|
||||||
|
SnapshotTheIndexScheduler,
|
||||||
|
SnapshotTheUpdateFiles,
|
||||||
|
SnapshotTheIndexes,
|
||||||
|
SnapshotTheApiKeys,
|
||||||
|
CreateTheTarball,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
make_enum_progress! {
|
||||||
|
pub enum DumpCreationProgress {
|
||||||
|
StartTheDumpCreation,
|
||||||
|
DumpTheApiKeys,
|
||||||
|
DumpTheTasks,
|
||||||
|
DumpTheIndexes,
|
||||||
|
DumpTheExperimentalFeatures,
|
||||||
|
CompressTheDump,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
make_enum_progress! {
|
||||||
|
pub enum CreateIndexProgress {
|
||||||
|
CreatingTheIndex,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
make_enum_progress! {
|
||||||
|
pub enum UpdateIndexProgress {
|
||||||
|
UpdatingTheIndex,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
make_enum_progress! {
|
||||||
|
pub enum DeleteIndexProgress {
|
||||||
|
DeletingTheIndex,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
make_enum_progress! {
|
||||||
|
pub enum SwappingTheIndexes {
|
||||||
|
EnsuringCorrectnessOfTheSwap,
|
||||||
|
SwappingTheIndexes,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
make_enum_progress! {
|
||||||
|
pub enum InnerSwappingTwoIndexes {
|
||||||
|
RetrieveTheTasks,
|
||||||
|
UpdateTheTasks,
|
||||||
|
UpdateTheIndexesMetadata,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
make_enum_progress! {
|
||||||
|
pub enum DocumentOperationProgress {
|
||||||
|
RetrievingConfig,
|
||||||
|
ComputingDocumentChanges,
|
||||||
|
Indexing,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
make_enum_progress! {
|
||||||
|
pub enum DocumentEditionProgress {
|
||||||
|
RetrievingConfig,
|
||||||
|
ComputingDocumentChanges,
|
||||||
|
Indexing,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
make_enum_progress! {
|
||||||
|
pub enum DocumentDeletionProgress {
|
||||||
|
RetrievingConfig,
|
||||||
|
DeleteDocuments,
|
||||||
|
Indexing,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
make_enum_progress! {
|
||||||
|
pub enum SettingsProgress {
|
||||||
|
RetrievingAndMergingTheSettings,
|
||||||
|
ApplyTheSettings,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
make_atomic_progress!(Task alias AtomicTaskStep => "task" );
|
||||||
|
make_atomic_progress!(Document alias AtomicDocumentStep => "document" );
|
||||||
|
make_atomic_progress!(Batch alias AtomicBatchStep => "batch" );
|
||||||
|
make_atomic_progress!(UpdateFile alias AtomicUpdateFileStep => "update file" );
|
||||||
|
|
||||||
|
pub struct VariableNameStep {
|
||||||
|
name: String,
|
||||||
|
current: u32,
|
||||||
|
total: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl VariableNameStep {
|
||||||
|
pub fn new(name: impl Into<String>, current: u32, total: u32) -> Self {
|
||||||
|
Self { name: name.into(), current, total }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Step for VariableNameStep {
|
||||||
|
fn name(&self) -> Cow<'static, str> {
|
||||||
|
self.name.clone().into()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn current(&self) -> u32 {
|
||||||
|
self.current
|
||||||
|
}
|
||||||
|
|
||||||
|
fn total(&self) -> u32 {
|
||||||
|
self.total
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod test {
|
||||||
|
use std::sync::atomic::Ordering;
|
||||||
|
|
||||||
|
use meili_snap::{json_string, snapshot};
|
||||||
|
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn one_level() {
|
||||||
|
let mut processing = ProcessingTasks::new();
|
||||||
|
processing.start_processing(ProcessingBatch::new(0), RoaringBitmap::new());
|
||||||
|
snapshot!(json_string!(processing.get_progress_view()), @r#"
|
||||||
|
{
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"currentStep": "processing tasks",
|
||||||
|
"finished": 0,
|
||||||
|
"total": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"percentage": 0.0
|
||||||
|
}
|
||||||
|
"#);
|
||||||
|
processing.progress.as_ref().unwrap().update_progress(BatchProgress::WritingTasksToDisk);
|
||||||
|
snapshot!(json_string!(processing.get_progress_view()), @r#"
|
||||||
|
{
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"currentStep": "writing tasks to disk",
|
||||||
|
"finished": 1,
|
||||||
|
"total": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"percentage": 50.0
|
||||||
|
}
|
||||||
|
"#);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn task_progress() {
|
||||||
|
let mut processing = ProcessingTasks::new();
|
||||||
|
processing.start_processing(ProcessingBatch::new(0), RoaringBitmap::new());
|
||||||
|
let (atomic, tasks) = AtomicTaskStep::new(10);
|
||||||
|
processing.progress.as_ref().unwrap().update_progress(tasks);
|
||||||
|
snapshot!(json_string!(processing.get_progress_view()), @r#"
|
||||||
|
{
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"currentStep": "processing tasks",
|
||||||
|
"finished": 0,
|
||||||
|
"total": 2
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"currentStep": "task",
|
||||||
|
"finished": 0,
|
||||||
|
"total": 10
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"percentage": 0.0
|
||||||
|
}
|
||||||
|
"#);
|
||||||
|
atomic.fetch_add(6, Ordering::Relaxed);
|
||||||
|
snapshot!(json_string!(processing.get_progress_view()), @r#"
|
||||||
|
{
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"currentStep": "processing tasks",
|
||||||
|
"finished": 0,
|
||||||
|
"total": 2
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"currentStep": "task",
|
||||||
|
"finished": 6,
|
||||||
|
"total": 10
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"percentage": 30.000002
|
||||||
|
}
|
||||||
|
"#);
|
||||||
|
processing.progress.as_ref().unwrap().update_progress(BatchProgress::WritingTasksToDisk);
|
||||||
|
snapshot!(json_string!(processing.get_progress_view()), @r#"
|
||||||
|
{
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"currentStep": "writing tasks to disk",
|
||||||
|
"finished": 1,
|
||||||
|
"total": 2
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"percentage": 50.0
|
||||||
|
}
|
||||||
|
"#);
|
||||||
|
let (atomic, tasks) = AtomicTaskStep::new(5);
|
||||||
|
processing.progress.as_ref().unwrap().update_progress(tasks);
|
||||||
|
atomic.fetch_add(4, Ordering::Relaxed);
|
||||||
|
snapshot!(json_string!(processing.get_progress_view()), @r#"
|
||||||
|
{
|
||||||
|
"steps": [
|
||||||
|
{
|
||||||
|
"currentStep": "writing tasks to disk",
|
||||||
|
"finished": 1,
|
||||||
|
"total": 2
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"currentStep": "task",
|
||||||
|
"finished": 4,
|
||||||
|
"total": 5
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"percentage": 90.0
|
||||||
|
}
|
||||||
|
"#);
|
||||||
|
}
|
||||||
|
}
|
@ -134,6 +134,7 @@ impl ProcessingBatch {
|
|||||||
pub fn to_batch(&self) -> Batch {
|
pub fn to_batch(&self) -> Batch {
|
||||||
Batch {
|
Batch {
|
||||||
uid: self.uid,
|
uid: self.uid,
|
||||||
|
progress: None,
|
||||||
details: self.details.clone(),
|
details: self.details.clone(),
|
||||||
stats: self.stats.clone(),
|
stats: self.stats.clone(),
|
||||||
started_at: self.started_at,
|
started_at: self.started_at,
|
||||||
@ -187,6 +188,7 @@ impl IndexScheduler {
|
|||||||
&batch.uid,
|
&batch.uid,
|
||||||
&Batch {
|
&Batch {
|
||||||
uid: batch.uid,
|
uid: batch.uid,
|
||||||
|
progress: None,
|
||||||
details: batch.details,
|
details: batch.details,
|
||||||
stats: batch.stats,
|
stats: batch.stats,
|
||||||
started_at: batch.started_at,
|
started_at: batch.started_at,
|
||||||
@ -273,7 +275,9 @@ impl IndexScheduler {
|
|||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|batch_id| {
|
.map(|batch_id| {
|
||||||
if Some(batch_id) == processing.batch.as_ref().map(|batch| batch.uid) {
|
if Some(batch_id) == processing.batch.as_ref().map(|batch| batch.uid) {
|
||||||
Ok(processing.batch.as_ref().unwrap().to_batch())
|
let mut batch = processing.batch.as_ref().unwrap().to_batch();
|
||||||
|
batch.progress = processing.get_progress_view();
|
||||||
|
Ok(batch)
|
||||||
} else {
|
} else {
|
||||||
self.get_batch(rtxn, batch_id)
|
self.get_batch(rtxn, batch_id)
|
||||||
.and_then(|task| task.ok_or(Error::CorruptedTaskQueue))
|
.and_then(|task| task.ok_or(Error::CorruptedTaskQueue))
|
||||||
|
@ -1,16 +1,16 @@
|
|||||||
|
use milli::progress::ProgressView;
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
use time::{Duration, OffsetDateTime};
|
use time::{Duration, OffsetDateTime};
|
||||||
|
|
||||||
use crate::{
|
use crate::batches::{Batch, BatchId, BatchStats};
|
||||||
batches::{Batch, BatchId, BatchStats},
|
use crate::task_view::DetailsView;
|
||||||
task_view::DetailsView,
|
use crate::tasks::serialize_duration;
|
||||||
tasks::serialize_duration,
|
|
||||||
};
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, Serialize)]
|
#[derive(Debug, Clone, Serialize)]
|
||||||
#[serde(rename_all = "camelCase")]
|
#[serde(rename_all = "camelCase")]
|
||||||
pub struct BatchView {
|
pub struct BatchView {
|
||||||
pub uid: BatchId,
|
pub uid: BatchId,
|
||||||
|
pub progress: Option<ProgressView>,
|
||||||
pub details: DetailsView,
|
pub details: DetailsView,
|
||||||
pub stats: BatchStats,
|
pub stats: BatchStats,
|
||||||
#[serde(serialize_with = "serialize_duration", default)]
|
#[serde(serialize_with = "serialize_duration", default)]
|
||||||
@ -25,6 +25,7 @@ impl BatchView {
|
|||||||
pub fn from_batch(batch: &Batch) -> Self {
|
pub fn from_batch(batch: &Batch) -> Self {
|
||||||
Self {
|
Self {
|
||||||
uid: batch.uid,
|
uid: batch.uid,
|
||||||
|
progress: batch.progress.clone(),
|
||||||
details: batch.details.clone(),
|
details: batch.details.clone(),
|
||||||
stats: batch.stats.clone(),
|
stats: batch.stats.clone(),
|
||||||
duration: batch.finished_at.map(|finished_at| finished_at - batch.started_at),
|
duration: batch.finished_at.map(|finished_at| finished_at - batch.started_at),
|
||||||
|
@ -1,12 +1,11 @@
|
|||||||
use std::collections::BTreeMap;
|
use std::collections::BTreeMap;
|
||||||
|
|
||||||
|
use milli::progress::ProgressView;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use time::OffsetDateTime;
|
use time::OffsetDateTime;
|
||||||
|
|
||||||
use crate::{
|
use crate::task_view::DetailsView;
|
||||||
task_view::DetailsView,
|
use crate::tasks::{Kind, Status};
|
||||||
tasks::{Kind, Status},
|
|
||||||
};
|
|
||||||
|
|
||||||
pub type BatchId = u32;
|
pub type BatchId = u32;
|
||||||
|
|
||||||
@ -15,6 +14,8 @@ pub type BatchId = u32;
|
|||||||
pub struct Batch {
|
pub struct Batch {
|
||||||
pub uid: BatchId,
|
pub uid: BatchId,
|
||||||
|
|
||||||
|
#[serde(skip)]
|
||||||
|
pub progress: Option<ProgressView>,
|
||||||
pub details: DetailsView,
|
pub details: DetailsView,
|
||||||
pub stats: BatchStats,
|
pub stats: BatchStats,
|
||||||
|
|
||||||
|
@ -4,7 +4,6 @@ use std::fmt::{Display, Write};
|
|||||||
use std::str::FromStr;
|
use std::str::FromStr;
|
||||||
|
|
||||||
use enum_iterator::Sequence;
|
use enum_iterator::Sequence;
|
||||||
use milli::update::new::indexer::document_changes::Progress;
|
|
||||||
use milli::update::IndexDocumentsMethod;
|
use milli::update::IndexDocumentsMethod;
|
||||||
use milli::Object;
|
use milli::Object;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
@ -41,62 +40,6 @@ pub struct Task {
|
|||||||
pub kind: KindWithContent,
|
pub kind: KindWithContent,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)]
|
|
||||||
#[serde(rename_all = "camelCase")]
|
|
||||||
pub struct TaskProgress {
|
|
||||||
pub current_step: &'static str,
|
|
||||||
pub finished_steps: u16,
|
|
||||||
pub total_steps: u16,
|
|
||||||
pub finished_substeps: Option<u32>,
|
|
||||||
pub total_substeps: Option<u32>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Default for TaskProgress {
|
|
||||||
fn default() -> Self {
|
|
||||||
Self::new()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl TaskProgress {
|
|
||||||
pub fn new() -> Self {
|
|
||||||
Self {
|
|
||||||
current_step: "start",
|
|
||||||
finished_steps: 0,
|
|
||||||
total_steps: 1,
|
|
||||||
finished_substeps: None,
|
|
||||||
total_substeps: None,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn update(&mut self, progress: Progress) -> TaskProgress {
|
|
||||||
if self.finished_steps > progress.finished_steps {
|
|
||||||
return *self;
|
|
||||||
}
|
|
||||||
|
|
||||||
if self.current_step != progress.step_name {
|
|
||||||
self.current_step = progress.step_name
|
|
||||||
}
|
|
||||||
|
|
||||||
self.total_steps = progress.total_steps;
|
|
||||||
|
|
||||||
if self.finished_steps < progress.finished_steps {
|
|
||||||
self.finished_substeps = None;
|
|
||||||
self.total_substeps = None;
|
|
||||||
}
|
|
||||||
self.finished_steps = progress.finished_steps;
|
|
||||||
if let Some((finished_substeps, total_substeps)) = progress.finished_total_substep {
|
|
||||||
if let Some(task_finished_substeps) = self.finished_substeps {
|
|
||||||
if task_finished_substeps > finished_substeps {
|
|
||||||
return *self;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
self.finished_substeps = Some(finished_substeps);
|
|
||||||
self.total_substeps = Some(total_substeps);
|
|
||||||
}
|
|
||||||
*self
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Task {
|
impl Task {
|
||||||
pub fn index_uid(&self) -> Option<&str> {
|
pub fn index_uid(&self) -> Option<&str> {
|
||||||
use KindWithContent::*;
|
use KindWithContent::*;
|
||||||
|
@ -1,18 +1,18 @@
|
|||||||
use actix_web::{
|
use actix_web::web::{self, Data};
|
||||||
web::{self, Data},
|
use actix_web::HttpResponse;
|
||||||
HttpResponse,
|
|
||||||
};
|
|
||||||
use deserr::actix_web::AwebQueryParameter;
|
use deserr::actix_web::AwebQueryParameter;
|
||||||
use index_scheduler::{IndexScheduler, Query};
|
use index_scheduler::{IndexScheduler, Query};
|
||||||
use meilisearch_types::{
|
use meilisearch_types::batch_view::BatchView;
|
||||||
batch_view::BatchView, batches::BatchId, deserr::DeserrQueryParamError, error::ResponseError,
|
use meilisearch_types::batches::BatchId;
|
||||||
keys::actions,
|
use meilisearch_types::deserr::DeserrQueryParamError;
|
||||||
};
|
use meilisearch_types::error::ResponseError;
|
||||||
|
use meilisearch_types::keys::actions;
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
|
|
||||||
use crate::extractors::{authentication::GuardedData, sequential_extractor::SeqHandler};
|
use super::tasks::TasksFilterQuery;
|
||||||
|
use super::ActionPolicy;
|
||||||
use super::{tasks::TasksFilterQuery, ActionPolicy};
|
use crate::extractors::authentication::GuardedData;
|
||||||
|
use crate::extractors::sequential_extractor::SeqHandler;
|
||||||
|
|
||||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||||
cfg.service(web::resource("").route(web::get().to(SeqHandler(get_batches))))
|
cfg.service(web::resource("").route(web::get().to(SeqHandler(get_batches))))
|
||||||
|
@ -284,6 +284,7 @@ async fn test_summarized_document_addition_or_update() {
|
|||||||
@r#"
|
@r#"
|
||||||
{
|
{
|
||||||
"uid": 0,
|
"uid": 0,
|
||||||
|
"progress": null,
|
||||||
"details": {
|
"details": {
|
||||||
"receivedDocuments": 1,
|
"receivedDocuments": 1,
|
||||||
"indexedDocuments": 1
|
"indexedDocuments": 1
|
||||||
@ -314,6 +315,7 @@ async fn test_summarized_document_addition_or_update() {
|
|||||||
@r#"
|
@r#"
|
||||||
{
|
{
|
||||||
"uid": 1,
|
"uid": 1,
|
||||||
|
"progress": null,
|
||||||
"details": {
|
"details": {
|
||||||
"receivedDocuments": 1,
|
"receivedDocuments": 1,
|
||||||
"indexedDocuments": 1
|
"indexedDocuments": 1
|
||||||
@ -349,6 +351,7 @@ async fn test_summarized_delete_documents_by_batch() {
|
|||||||
@r#"
|
@r#"
|
||||||
{
|
{
|
||||||
"uid": 0,
|
"uid": 0,
|
||||||
|
"progress": null,
|
||||||
"details": {
|
"details": {
|
||||||
"providedIds": 3,
|
"providedIds": 3,
|
||||||
"deletedDocuments": 0
|
"deletedDocuments": 0
|
||||||
@ -380,6 +383,7 @@ async fn test_summarized_delete_documents_by_batch() {
|
|||||||
@r#"
|
@r#"
|
||||||
{
|
{
|
||||||
"uid": 2,
|
"uid": 2,
|
||||||
|
"progress": null,
|
||||||
"details": {
|
"details": {
|
||||||
"providedIds": 1,
|
"providedIds": 1,
|
||||||
"deletedDocuments": 0
|
"deletedDocuments": 0
|
||||||
@ -416,6 +420,7 @@ async fn test_summarized_delete_documents_by_filter() {
|
|||||||
@r#"
|
@r#"
|
||||||
{
|
{
|
||||||
"uid": 0,
|
"uid": 0,
|
||||||
|
"progress": null,
|
||||||
"details": {
|
"details": {
|
||||||
"providedIds": 0,
|
"providedIds": 0,
|
||||||
"deletedDocuments": 0,
|
"deletedDocuments": 0,
|
||||||
@ -448,6 +453,7 @@ async fn test_summarized_delete_documents_by_filter() {
|
|||||||
@r#"
|
@r#"
|
||||||
{
|
{
|
||||||
"uid": 2,
|
"uid": 2,
|
||||||
|
"progress": null,
|
||||||
"details": {
|
"details": {
|
||||||
"providedIds": 0,
|
"providedIds": 0,
|
||||||
"deletedDocuments": 0,
|
"deletedDocuments": 0,
|
||||||
@ -480,6 +486,7 @@ async fn test_summarized_delete_documents_by_filter() {
|
|||||||
@r#"
|
@r#"
|
||||||
{
|
{
|
||||||
"uid": 4,
|
"uid": 4,
|
||||||
|
"progress": null,
|
||||||
"details": {
|
"details": {
|
||||||
"providedIds": 0,
|
"providedIds": 0,
|
||||||
"deletedDocuments": 0,
|
"deletedDocuments": 0,
|
||||||
@ -516,6 +523,7 @@ async fn test_summarized_delete_document_by_id() {
|
|||||||
@r#"
|
@r#"
|
||||||
{
|
{
|
||||||
"uid": 0,
|
"uid": 0,
|
||||||
|
"progress": null,
|
||||||
"details": {
|
"details": {
|
||||||
"providedIds": 1,
|
"providedIds": 1,
|
||||||
"deletedDocuments": 0
|
"deletedDocuments": 0
|
||||||
@ -547,6 +555,7 @@ async fn test_summarized_delete_document_by_id() {
|
|||||||
@r#"
|
@r#"
|
||||||
{
|
{
|
||||||
"uid": 2,
|
"uid": 2,
|
||||||
|
"progress": null,
|
||||||
"details": {
|
"details": {
|
||||||
"providedIds": 1,
|
"providedIds": 1,
|
||||||
"deletedDocuments": 0
|
"deletedDocuments": 0
|
||||||
@ -594,6 +603,7 @@ async fn test_summarized_settings_update() {
|
|||||||
@r#"
|
@r#"
|
||||||
{
|
{
|
||||||
"uid": 0,
|
"uid": 0,
|
||||||
|
"progress": null,
|
||||||
"details": {
|
"details": {
|
||||||
"displayedAttributes": [
|
"displayedAttributes": [
|
||||||
"doggos",
|
"doggos",
|
||||||
@ -638,6 +648,7 @@ async fn test_summarized_index_creation() {
|
|||||||
@r#"
|
@r#"
|
||||||
{
|
{
|
||||||
"uid": 0,
|
"uid": 0,
|
||||||
|
"progress": null,
|
||||||
"details": {},
|
"details": {},
|
||||||
"stats": {
|
"stats": {
|
||||||
"totalNbTasks": 1,
|
"totalNbTasks": 1,
|
||||||
@ -665,6 +676,7 @@ async fn test_summarized_index_creation() {
|
|||||||
@r#"
|
@r#"
|
||||||
{
|
{
|
||||||
"uid": 1,
|
"uid": 1,
|
||||||
|
"progress": null,
|
||||||
"details": {
|
"details": {
|
||||||
"primaryKey": "doggos"
|
"primaryKey": "doggos"
|
||||||
},
|
},
|
||||||
@ -809,6 +821,7 @@ async fn test_summarized_index_update() {
|
|||||||
@r#"
|
@r#"
|
||||||
{
|
{
|
||||||
"uid": 0,
|
"uid": 0,
|
||||||
|
"progress": null,
|
||||||
"details": {},
|
"details": {},
|
||||||
"stats": {
|
"stats": {
|
||||||
"totalNbTasks": 1,
|
"totalNbTasks": 1,
|
||||||
@ -836,6 +849,7 @@ async fn test_summarized_index_update() {
|
|||||||
@r#"
|
@r#"
|
||||||
{
|
{
|
||||||
"uid": 1,
|
"uid": 1,
|
||||||
|
"progress": null,
|
||||||
"details": {
|
"details": {
|
||||||
"primaryKey": "bones"
|
"primaryKey": "bones"
|
||||||
},
|
},
|
||||||
@ -868,6 +882,7 @@ async fn test_summarized_index_update() {
|
|||||||
@r#"
|
@r#"
|
||||||
{
|
{
|
||||||
"uid": 3,
|
"uid": 3,
|
||||||
|
"progress": null,
|
||||||
"details": {},
|
"details": {},
|
||||||
"stats": {
|
"stats": {
|
||||||
"totalNbTasks": 1,
|
"totalNbTasks": 1,
|
||||||
@ -895,6 +910,7 @@ async fn test_summarized_index_update() {
|
|||||||
@r#"
|
@r#"
|
||||||
{
|
{
|
||||||
"uid": 4,
|
"uid": 4,
|
||||||
|
"progress": null,
|
||||||
"details": {
|
"details": {
|
||||||
"primaryKey": "bones"
|
"primaryKey": "bones"
|
||||||
},
|
},
|
||||||
@ -932,6 +948,7 @@ async fn test_summarized_index_swap() {
|
|||||||
@r#"
|
@r#"
|
||||||
{
|
{
|
||||||
"uid": 0,
|
"uid": 0,
|
||||||
|
"progress": null,
|
||||||
"details": {
|
"details": {
|
||||||
"swaps": [
|
"swaps": [
|
||||||
{
|
{
|
||||||
@ -972,6 +989,7 @@ async fn test_summarized_index_swap() {
|
|||||||
@r#"
|
@r#"
|
||||||
{
|
{
|
||||||
"uid": 3,
|
"uid": 3,
|
||||||
|
"progress": null,
|
||||||
"details": {
|
"details": {
|
||||||
"swaps": [
|
"swaps": [
|
||||||
{
|
{
|
||||||
@ -1014,6 +1032,7 @@ async fn test_summarized_batch_cancelation() {
|
|||||||
@r#"
|
@r#"
|
||||||
{
|
{
|
||||||
"uid": 1,
|
"uid": 1,
|
||||||
|
"progress": null,
|
||||||
"details": {
|
"details": {
|
||||||
"matchedTasks": 1,
|
"matchedTasks": 1,
|
||||||
"canceledTasks": 0,
|
"canceledTasks": 0,
|
||||||
@ -1051,6 +1070,7 @@ async fn test_summarized_batch_deletion() {
|
|||||||
@r#"
|
@r#"
|
||||||
{
|
{
|
||||||
"uid": 1,
|
"uid": 1,
|
||||||
|
"progress": null,
|
||||||
"details": {
|
"details": {
|
||||||
"matchedTasks": 1,
|
"matchedTasks": 1,
|
||||||
"deletedTasks": 1,
|
"deletedTasks": 1,
|
||||||
@ -1084,6 +1104,7 @@ async fn test_summarized_dump_creation() {
|
|||||||
@r#"
|
@r#"
|
||||||
{
|
{
|
||||||
"uid": 0,
|
"uid": 0,
|
||||||
|
"progress": null,
|
||||||
"details": {
|
"details": {
|
||||||
"dumpUid": "[dumpUid]"
|
"dumpUid": "[dumpUid]"
|
||||||
},
|
},
|
||||||
|
@ -7,7 +7,6 @@ use std::path::{Path, PathBuf};
|
|||||||
|
|
||||||
use anyhow::{bail, Context};
|
use anyhow::{bail, Context};
|
||||||
use meilisearch_types::versioning::create_version_file;
|
use meilisearch_types::versioning::create_version_file;
|
||||||
|
|
||||||
use v1_10::v1_9_to_v1_10;
|
use v1_10::v1_9_to_v1_10;
|
||||||
use v1_12::v1_11_to_v1_12;
|
use v1_12::v1_11_to_v1_12;
|
||||||
|
|
||||||
|
@ -1,18 +1,13 @@
|
|||||||
use anyhow::bail;
|
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
use anyhow::Context;
|
use anyhow::{bail, Context};
|
||||||
use meilisearch_types::{
|
use meilisearch_types::heed::types::{SerdeJson, Str};
|
||||||
heed::{
|
use meilisearch_types::heed::{Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified};
|
||||||
types::{SerdeJson, Str},
|
use meilisearch_types::milli::index::{db_name, main_key};
|
||||||
Database, Env, EnvOpenOptions, RoTxn, RwTxn, Unspecified,
|
|
||||||
},
|
|
||||||
milli::index::{db_name, main_key},
|
|
||||||
};
|
|
||||||
|
|
||||||
use crate::{try_opening_database, try_opening_poly_database, uuid_codec::UuidCodec};
|
|
||||||
|
|
||||||
use super::v1_9;
|
use super::v1_9;
|
||||||
|
use crate::uuid_codec::UuidCodec;
|
||||||
|
use crate::{try_opening_database, try_opening_poly_database};
|
||||||
|
|
||||||
pub type FieldDistribution = std::collections::BTreeMap<String, u64>;
|
pub type FieldDistribution = std::collections::BTreeMap<String, u64>;
|
||||||
|
|
||||||
|
@ -7,12 +7,12 @@
|
|||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
use anyhow::Context;
|
use anyhow::Context;
|
||||||
use meilisearch_types::{
|
use meilisearch_types::heed::types::Str;
|
||||||
heed::{types::Str, Database, EnvOpenOptions},
|
use meilisearch_types::heed::{Database, EnvOpenOptions};
|
||||||
milli::index::db_name,
|
use meilisearch_types::milli::index::db_name;
|
||||||
};
|
|
||||||
|
|
||||||
use crate::{try_opening_database, try_opening_poly_database, uuid_codec::UuidCodec};
|
use crate::uuid_codec::UuidCodec;
|
||||||
|
use crate::{try_opening_database, try_opening_poly_database};
|
||||||
|
|
||||||
pub fn v1_10_to_v1_11(db_path: &Path) -> anyhow::Result<()> {
|
pub fn v1_10_to_v1_11(db_path: &Path) -> anyhow::Result<()> {
|
||||||
println!("Upgrading from v1.10.0 to v1.11.0");
|
println!("Upgrading from v1.10.0 to v1.11.0");
|
||||||
|
@ -1,7 +1,8 @@
|
|||||||
//! The breaking changes that happened between the v1.11 and the v1.12 are:
|
//! The breaking changes that happened between the v1.11 and the v1.12 are:
|
||||||
//! - The new indexer changed the update files format from OBKV to ndjson. https://github.com/meilisearch/meilisearch/pull/4900
|
//! - The new indexer changed the update files format from OBKV to ndjson. https://github.com/meilisearch/meilisearch/pull/4900
|
||||||
|
|
||||||
use std::{io::BufWriter, path::Path};
|
use std::io::BufWriter;
|
||||||
|
use std::path::Path;
|
||||||
|
|
||||||
use anyhow::Context;
|
use anyhow::Context;
|
||||||
use file_store::FileStore;
|
use file_store::FileStore;
|
||||||
|
@ -1734,6 +1734,7 @@ pub(crate) mod tests {
|
|||||||
|
|
||||||
use crate::error::{Error, InternalError};
|
use crate::error::{Error, InternalError};
|
||||||
use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS};
|
use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS};
|
||||||
|
use crate::progress::Progress;
|
||||||
use crate::update::new::indexer;
|
use crate::update::new::indexer;
|
||||||
use crate::update::settings::InnerIndexSettings;
|
use crate::update::settings::InnerIndexSettings;
|
||||||
use crate::update::{
|
use crate::update::{
|
||||||
@ -1810,7 +1811,7 @@ pub(crate) mod tests {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_progress| (),
|
Progress::default(),
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
if let Some(error) = operation_stats.into_iter().find_map(|stat| stat.error) {
|
if let Some(error) = operation_stats.into_iter().find_map(|stat| stat.error) {
|
||||||
@ -1829,7 +1830,7 @@ pub(crate) mod tests {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_| (),
|
&Progress::default(),
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
.unwrap()?;
|
.unwrap()?;
|
||||||
@ -1901,7 +1902,7 @@ pub(crate) mod tests {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_progress| (),
|
Progress::default(),
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
if let Some(error) = operation_stats.into_iter().find_map(|stat| stat.error) {
|
if let Some(error) = operation_stats.into_iter().find_map(|stat| stat.error) {
|
||||||
@ -1920,7 +1921,7 @@ pub(crate) mod tests {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_| (),
|
&Progress::default(),
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
.unwrap()?;
|
.unwrap()?;
|
||||||
@ -1982,7 +1983,7 @@ pub(crate) mod tests {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_progress| (),
|
Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -2001,7 +2002,7 @@ pub(crate) mod tests {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| should_abort.load(Relaxed),
|
&|| should_abort.load(Relaxed),
|
||||||
&|_| (),
|
&Progress::default(),
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
.unwrap()
|
.unwrap()
|
||||||
|
@ -31,6 +31,7 @@ pub mod vector;
|
|||||||
#[macro_use]
|
#[macro_use]
|
||||||
pub mod snapshot_tests;
|
pub mod snapshot_tests;
|
||||||
mod fieldids_weights_map;
|
mod fieldids_weights_map;
|
||||||
|
pub mod progress;
|
||||||
|
|
||||||
use std::collections::{BTreeMap, HashMap};
|
use std::collections::{BTreeMap, HashMap};
|
||||||
use std::convert::{TryFrom, TryInto};
|
use std::convert::{TryFrom, TryInto};
|
||||||
|
152
crates/milli/src/progress.rs
Normal file
152
crates/milli/src/progress.rs
Normal file
@ -0,0 +1,152 @@
|
|||||||
|
use std::any::TypeId;
|
||||||
|
use std::borrow::Cow;
|
||||||
|
use std::sync::atomic::{AtomicU32, Ordering};
|
||||||
|
use std::sync::{Arc, RwLock};
|
||||||
|
|
||||||
|
use serde::Serialize;
|
||||||
|
|
||||||
|
pub trait Step: 'static + Send + Sync {
|
||||||
|
fn name(&self) -> Cow<'static, str>;
|
||||||
|
fn current(&self) -> u32;
|
||||||
|
fn total(&self) -> u32;
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Clone, Default)]
|
||||||
|
pub struct Progress {
|
||||||
|
steps: Arc<RwLock<Vec<(TypeId, Box<dyn Step>)>>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Progress {
|
||||||
|
pub fn update_progress<P: Step>(&self, sub_progress: P) {
|
||||||
|
let mut steps = self.steps.write().unwrap();
|
||||||
|
let step_type = TypeId::of::<P>();
|
||||||
|
if let Some(idx) = steps.iter().position(|(id, _)| *id == step_type) {
|
||||||
|
steps.truncate(idx);
|
||||||
|
}
|
||||||
|
steps.push((step_type, Box::new(sub_progress)));
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: This code should be in meilisearch_types but cannot because milli can't depend on meilisearch_types
|
||||||
|
pub fn as_progress_view(&self) -> ProgressView {
|
||||||
|
let steps = self.steps.read().unwrap();
|
||||||
|
|
||||||
|
let mut percentage = 0.0;
|
||||||
|
let mut prev_factors = 1.0;
|
||||||
|
|
||||||
|
let mut step_view = Vec::with_capacity(steps.len());
|
||||||
|
for (_, step) in steps.iter() {
|
||||||
|
prev_factors *= step.total() as f32;
|
||||||
|
percentage += step.current() as f32 / prev_factors;
|
||||||
|
|
||||||
|
step_view.push(ProgressStepView {
|
||||||
|
current_step: step.name(),
|
||||||
|
finished: step.current(),
|
||||||
|
total: step.total(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
ProgressView { steps: step_view, percentage: percentage * 100.0 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// This trait lets you use the AtomicSubStep defined right below.
|
||||||
|
/// The name must be a const that never changed but that can't be enforced by the type system because it make the trait non object-safe.
|
||||||
|
/// By forcing the Default trait + the &'static str we make it harder to miss-use the trait.
|
||||||
|
pub trait NamedStep: 'static + Send + Sync + Default {
|
||||||
|
fn name(&self) -> &'static str;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Structure to quickly define steps that need very quick, lockless updating of their current step.
|
||||||
|
/// You can use this struct if:
|
||||||
|
/// - The name of the step doesn't change
|
||||||
|
/// - The total number of steps doesn't change
|
||||||
|
pub struct AtomicSubStep<Name: NamedStep> {
|
||||||
|
unit_name: Name,
|
||||||
|
current: Arc<AtomicU32>,
|
||||||
|
total: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<Name: NamedStep> AtomicSubStep<Name> {
|
||||||
|
pub fn new(total: u32) -> (Arc<AtomicU32>, Self) {
|
||||||
|
let current = Arc::new(AtomicU32::new(0));
|
||||||
|
(current.clone(), Self { current, total, unit_name: Name::default() })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<Name: NamedStep> Step for AtomicSubStep<Name> {
|
||||||
|
fn name(&self) -> Cow<'static, str> {
|
||||||
|
self.unit_name.name().into()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn current(&self) -> u32 {
|
||||||
|
self.current.load(Ordering::Relaxed)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn total(&self) -> u32 {
|
||||||
|
self.total
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[macro_export]
|
||||||
|
macro_rules! make_enum_progress {
|
||||||
|
($visibility:vis enum $name:ident { $($variant:ident,)+ }) => {
|
||||||
|
#[repr(u8)]
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Sequence)]
|
||||||
|
#[allow(clippy::enum_variant_names)]
|
||||||
|
$visibility enum $name {
|
||||||
|
$($variant),+
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Step for $name {
|
||||||
|
fn name(&self) -> Cow<'static, str> {
|
||||||
|
use convert_case::Casing;
|
||||||
|
|
||||||
|
match self {
|
||||||
|
$(
|
||||||
|
$name::$variant => stringify!($variant).from_case(convert_case::Case::Camel).to_case(convert_case::Case::Lower).into()
|
||||||
|
),+
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn current(&self) -> u32 {
|
||||||
|
*self as u32
|
||||||
|
}
|
||||||
|
|
||||||
|
fn total(&self) -> u32 {
|
||||||
|
Self::CARDINALITY as u32
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
#[macro_export]
|
||||||
|
macro_rules! make_atomic_progress {
|
||||||
|
($struct_name:ident alias $atomic_struct_name:ident => $step_name:literal) => {
|
||||||
|
#[derive(Default, Debug, Clone, Copy)]
|
||||||
|
pub struct $struct_name {}
|
||||||
|
impl NamedStep for $struct_name {
|
||||||
|
fn name(&self) -> &'static str {
|
||||||
|
$step_name
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pub type $atomic_struct_name = AtomicSubStep<$struct_name>;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
make_atomic_progress!(Document alias AtomicDocumentStep => "document" );
|
||||||
|
make_atomic_progress!(Payload alias AtomicPayloadStep => "payload" );
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize, Clone)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct ProgressView {
|
||||||
|
pub steps: Vec<ProgressStepView>,
|
||||||
|
pub percentage: f32,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Serialize, Clone)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct ProgressStepView {
|
||||||
|
pub current_step: Cow<'static, str>,
|
||||||
|
pub finished: u32,
|
||||||
|
pub total: u32,
|
||||||
|
}
|
@ -5,6 +5,7 @@ use bumpalo::Bump;
|
|||||||
use heed::EnvOpenOptions;
|
use heed::EnvOpenOptions;
|
||||||
use maplit::{btreemap, hashset};
|
use maplit::{btreemap, hashset};
|
||||||
|
|
||||||
|
use crate::progress::Progress;
|
||||||
use crate::update::new::indexer;
|
use crate::update::new::indexer;
|
||||||
use crate::update::{IndexDocumentsMethod, IndexerConfig, Settings};
|
use crate::update::{IndexDocumentsMethod, IndexerConfig, Settings};
|
||||||
use crate::vector::EmbeddingConfigs;
|
use crate::vector::EmbeddingConfigs;
|
||||||
@ -72,7 +73,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_progress| (),
|
Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -91,7 +92,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_| (),
|
&Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
@ -766,6 +766,7 @@ mod tests {
|
|||||||
use crate::documents::mmap_from_objects;
|
use crate::documents::mmap_from_objects;
|
||||||
use crate::index::tests::TempIndex;
|
use crate::index::tests::TempIndex;
|
||||||
use crate::index::IndexEmbeddingConfig;
|
use crate::index::IndexEmbeddingConfig;
|
||||||
|
use crate::progress::Progress;
|
||||||
use crate::search::TermsMatchingStrategy;
|
use crate::search::TermsMatchingStrategy;
|
||||||
use crate::update::new::indexer;
|
use crate::update::new::indexer;
|
||||||
use crate::update::Setting;
|
use crate::update::Setting;
|
||||||
@ -1964,7 +1965,7 @@ mod tests {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_progress| (),
|
Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -2148,7 +2149,7 @@ mod tests {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_progress| (),
|
Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -2163,7 +2164,7 @@ mod tests {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_| (),
|
&Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
@ -2210,7 +2211,7 @@ mod tests {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_progress| (),
|
Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -2225,7 +2226,7 @@ mod tests {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_| (),
|
&Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
@ -2263,7 +2264,7 @@ mod tests {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_progress| (),
|
Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -2278,7 +2279,7 @@ mod tests {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_| (),
|
&Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
@ -2315,7 +2316,7 @@ mod tests {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_progress| (),
|
Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -2330,7 +2331,7 @@ mod tests {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_| (),
|
&Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
@ -2369,7 +2370,7 @@ mod tests {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_progress| (),
|
Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -2384,7 +2385,7 @@ mod tests {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_| (),
|
&Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
@ -2428,7 +2429,7 @@ mod tests {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_progress| (),
|
Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -2443,7 +2444,7 @@ mod tests {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_| (),
|
&Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
@ -2480,7 +2481,7 @@ mod tests {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_progress| (),
|
Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -2495,7 +2496,7 @@ mod tests {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_| (),
|
&Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
@ -2532,7 +2533,7 @@ mod tests {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_progress| (),
|
Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -2547,7 +2548,7 @@ mod tests {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_| (),
|
&Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
@ -2726,7 +2727,7 @@ mod tests {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_progress| (),
|
Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -2741,7 +2742,7 @@ mod tests {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_| (),
|
&Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
@ -2785,7 +2786,7 @@ mod tests {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_progress| (),
|
Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -2800,7 +2801,7 @@ mod tests {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_| (),
|
&Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
@ -2841,7 +2842,7 @@ mod tests {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_progress| (),
|
Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -2856,7 +2857,7 @@ mod tests {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_| (),
|
&Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
wtxn.commit().unwrap();
|
wtxn.commit().unwrap();
|
||||||
|
@ -16,10 +16,10 @@ use crate::update::del_add::DelAdd;
|
|||||||
use crate::update::new::channel::FieldIdDocidFacetSender;
|
use crate::update::new::channel::FieldIdDocidFacetSender;
|
||||||
use crate::update::new::extract::perm_json_p;
|
use crate::update::new::extract::perm_json_p;
|
||||||
use crate::update::new::indexer::document_changes::{
|
use crate::update::new::indexer::document_changes::{
|
||||||
extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext, Progress,
|
extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext,
|
||||||
};
|
};
|
||||||
use crate::update::new::ref_cell_ext::RefCellExt as _;
|
use crate::update::new::ref_cell_ext::RefCellExt as _;
|
||||||
use crate::update::new::steps::Step;
|
use crate::update::new::steps::IndexingStep;
|
||||||
use crate::update::new::thread_local::{FullySend, ThreadLocal};
|
use crate::update::new::thread_local::{FullySend, ThreadLocal};
|
||||||
use crate::update::new::DocumentChange;
|
use crate::update::new::DocumentChange;
|
||||||
use crate::update::GrenadParameters;
|
use crate::update::GrenadParameters;
|
||||||
@ -373,26 +373,16 @@ fn truncate_str(s: &str) -> &str {
|
|||||||
|
|
||||||
impl FacetedDocidsExtractor {
|
impl FacetedDocidsExtractor {
|
||||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract::faceted")]
|
#[tracing::instrument(level = "trace", skip_all, target = "indexing::extract::faceted")]
|
||||||
pub fn run_extraction<
|
pub fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
|
||||||
'pl,
|
|
||||||
'fid,
|
|
||||||
'indexer,
|
|
||||||
'index,
|
|
||||||
'extractor,
|
|
||||||
DC: DocumentChanges<'pl>,
|
|
||||||
MSP,
|
|
||||||
SP,
|
|
||||||
>(
|
|
||||||
grenad_parameters: GrenadParameters,
|
grenad_parameters: GrenadParameters,
|
||||||
document_changes: &DC,
|
document_changes: &DC,
|
||||||
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>,
|
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
|
||||||
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
|
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
|
||||||
sender: &FieldIdDocidFacetSender,
|
sender: &FieldIdDocidFacetSender,
|
||||||
step: Step,
|
step: IndexingStep,
|
||||||
) -> Result<Vec<BalancedCaches<'extractor>>>
|
) -> Result<Vec<BalancedCaches<'extractor>>>
|
||||||
where
|
where
|
||||||
MSP: Fn() -> bool + Sync,
|
MSP: Fn() -> bool + Sync,
|
||||||
SP: Fn(Progress) + Sync,
|
|
||||||
{
|
{
|
||||||
let index = indexing_context.index;
|
let index = indexing_context.index;
|
||||||
let rtxn = index.read_txn()?;
|
let rtxn = index.read_txn()?;
|
||||||
|
@ -15,23 +15,22 @@ pub use geo::*;
|
|||||||
pub use searchable::*;
|
pub use searchable::*;
|
||||||
pub use vectors::EmbeddingExtractor;
|
pub use vectors::EmbeddingExtractor;
|
||||||
|
|
||||||
use super::indexer::document_changes::{DocumentChanges, IndexingContext, Progress};
|
use super::indexer::document_changes::{DocumentChanges, IndexingContext};
|
||||||
use super::steps::Step;
|
use super::steps::IndexingStep;
|
||||||
use super::thread_local::{FullySend, ThreadLocal};
|
use super::thread_local::{FullySend, ThreadLocal};
|
||||||
use crate::update::GrenadParameters;
|
use crate::update::GrenadParameters;
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
|
|
||||||
pub trait DocidsExtractor {
|
pub trait DocidsExtractor {
|
||||||
fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP, SP>(
|
fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
|
||||||
grenad_parameters: GrenadParameters,
|
grenad_parameters: GrenadParameters,
|
||||||
document_changes: &DC,
|
document_changes: &DC,
|
||||||
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>,
|
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
|
||||||
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
|
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
|
||||||
step: Step,
|
step: IndexingStep,
|
||||||
) -> Result<Vec<BalancedCaches<'extractor>>>
|
) -> Result<Vec<BalancedCaches<'extractor>>>
|
||||||
where
|
where
|
||||||
MSP: Fn() -> bool + Sync,
|
MSP: Fn() -> bool + Sync;
|
||||||
SP: Fn(Progress) + Sync;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// TODO move in permissive json pointer
|
/// TODO move in permissive json pointer
|
||||||
|
@ -11,10 +11,10 @@ use super::tokenize_document::{tokenizer_builder, DocumentTokenizer};
|
|||||||
use crate::update::new::extract::cache::BalancedCaches;
|
use crate::update::new::extract::cache::BalancedCaches;
|
||||||
use crate::update::new::extract::perm_json_p::contained_in;
|
use crate::update::new::extract::perm_json_p::contained_in;
|
||||||
use crate::update::new::indexer::document_changes::{
|
use crate::update::new::indexer::document_changes::{
|
||||||
extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext, Progress,
|
extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext,
|
||||||
};
|
};
|
||||||
use crate::update::new::ref_cell_ext::RefCellExt as _;
|
use crate::update::new::ref_cell_ext::RefCellExt as _;
|
||||||
use crate::update::new::steps::Step;
|
use crate::update::new::steps::IndexingStep;
|
||||||
use crate::update::new::thread_local::{FullySend, MostlySend, ThreadLocal};
|
use crate::update::new::thread_local::{FullySend, MostlySend, ThreadLocal};
|
||||||
use crate::update::new::DocumentChange;
|
use crate::update::new::DocumentChange;
|
||||||
use crate::update::GrenadParameters;
|
use crate::update::GrenadParameters;
|
||||||
@ -239,25 +239,15 @@ impl<'a, 'extractor> Extractor<'extractor> for WordDocidsExtractorData<'a> {
|
|||||||
pub struct WordDocidsExtractors;
|
pub struct WordDocidsExtractors;
|
||||||
|
|
||||||
impl WordDocidsExtractors {
|
impl WordDocidsExtractors {
|
||||||
pub fn run_extraction<
|
pub fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
|
||||||
'pl,
|
|
||||||
'fid,
|
|
||||||
'indexer,
|
|
||||||
'index,
|
|
||||||
'extractor,
|
|
||||||
DC: DocumentChanges<'pl>,
|
|
||||||
MSP,
|
|
||||||
SP,
|
|
||||||
>(
|
|
||||||
grenad_parameters: GrenadParameters,
|
grenad_parameters: GrenadParameters,
|
||||||
document_changes: &DC,
|
document_changes: &DC,
|
||||||
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>,
|
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
|
||||||
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
|
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
|
||||||
step: Step,
|
step: IndexingStep,
|
||||||
) -> Result<WordDocidsCaches<'extractor>>
|
) -> Result<WordDocidsCaches<'extractor>>
|
||||||
where
|
where
|
||||||
MSP: Fn() -> bool + Sync,
|
MSP: Fn() -> bool + Sync,
|
||||||
SP: Fn(Progress) + Sync,
|
|
||||||
{
|
{
|
||||||
let index = indexing_context.index;
|
let index = indexing_context.index;
|
||||||
let rtxn = index.read_txn()?;
|
let rtxn = index.read_txn()?;
|
||||||
|
@ -14,9 +14,9 @@ use tokenize_document::{tokenizer_builder, DocumentTokenizer};
|
|||||||
use super::cache::BalancedCaches;
|
use super::cache::BalancedCaches;
|
||||||
use super::DocidsExtractor;
|
use super::DocidsExtractor;
|
||||||
use crate::update::new::indexer::document_changes::{
|
use crate::update::new::indexer::document_changes::{
|
||||||
extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext, Progress,
|
extract, DocumentChangeContext, DocumentChanges, Extractor, IndexingContext,
|
||||||
};
|
};
|
||||||
use crate::update::new::steps::Step;
|
use crate::update::new::steps::IndexingStep;
|
||||||
use crate::update::new::thread_local::{FullySend, ThreadLocal};
|
use crate::update::new::thread_local::{FullySend, ThreadLocal};
|
||||||
use crate::update::new::DocumentChange;
|
use crate::update::new::DocumentChange;
|
||||||
use crate::update::GrenadParameters;
|
use crate::update::GrenadParameters;
|
||||||
@ -56,16 +56,15 @@ impl<'a, 'extractor, EX: SearchableExtractor + Sync> Extractor<'extractor>
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub trait SearchableExtractor: Sized + Sync {
|
pub trait SearchableExtractor: Sized + Sync {
|
||||||
fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP, SP>(
|
fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
|
||||||
grenad_parameters: GrenadParameters,
|
grenad_parameters: GrenadParameters,
|
||||||
document_changes: &DC,
|
document_changes: &DC,
|
||||||
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>,
|
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
|
||||||
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
|
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
|
||||||
step: Step,
|
step: IndexingStep,
|
||||||
) -> Result<Vec<BalancedCaches<'extractor>>>
|
) -> Result<Vec<BalancedCaches<'extractor>>>
|
||||||
where
|
where
|
||||||
MSP: Fn() -> bool + Sync,
|
MSP: Fn() -> bool + Sync,
|
||||||
SP: Fn(Progress) + Sync,
|
|
||||||
{
|
{
|
||||||
let rtxn = indexing_context.index.read_txn()?;
|
let rtxn = indexing_context.index.read_txn()?;
|
||||||
let stop_words = indexing_context.index.stop_words(&rtxn)?;
|
let stop_words = indexing_context.index.stop_words(&rtxn)?;
|
||||||
@ -134,16 +133,15 @@ pub trait SearchableExtractor: Sized + Sync {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl<T: SearchableExtractor> DocidsExtractor for T {
|
impl<T: SearchableExtractor> DocidsExtractor for T {
|
||||||
fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP, SP>(
|
fn run_extraction<'pl, 'fid, 'indexer, 'index, 'extractor, DC: DocumentChanges<'pl>, MSP>(
|
||||||
grenad_parameters: GrenadParameters,
|
grenad_parameters: GrenadParameters,
|
||||||
document_changes: &DC,
|
document_changes: &DC,
|
||||||
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP, SP>,
|
indexing_context: IndexingContext<'fid, 'indexer, 'index, MSP>,
|
||||||
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
|
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
|
||||||
step: Step,
|
step: IndexingStep,
|
||||||
) -> Result<Vec<BalancedCaches<'extractor>>>
|
) -> Result<Vec<BalancedCaches<'extractor>>>
|
||||||
where
|
where
|
||||||
MSP: Fn() -> bool + Sync,
|
MSP: Fn() -> bool + Sync,
|
||||||
SP: Fn(Progress) + Sync,
|
|
||||||
{
|
{
|
||||||
Self::run_extraction(
|
Self::run_extraction(
|
||||||
grenad_parameters,
|
grenad_parameters,
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
use std::cell::{Cell, RefCell};
|
use std::cell::{Cell, RefCell};
|
||||||
|
use std::sync::atomic::Ordering;
|
||||||
use std::sync::{Arc, RwLock};
|
use std::sync::{Arc, RwLock};
|
||||||
|
|
||||||
use bumpalo::Bump;
|
use bumpalo::Bump;
|
||||||
@ -7,8 +8,9 @@ use rayon::iter::IndexedParallelIterator;
|
|||||||
|
|
||||||
use super::super::document_change::DocumentChange;
|
use super::super::document_change::DocumentChange;
|
||||||
use crate::fields_ids_map::metadata::FieldIdMapWithMetadata;
|
use crate::fields_ids_map::metadata::FieldIdMapWithMetadata;
|
||||||
|
use crate::progress::{AtomicDocumentStep, Progress};
|
||||||
use crate::update::new::parallel_iterator_ext::ParallelIteratorExt as _;
|
use crate::update::new::parallel_iterator_ext::ParallelIteratorExt as _;
|
||||||
use crate::update::new::steps::Step;
|
use crate::update::new::steps::IndexingStep;
|
||||||
use crate::update::new::thread_local::{FullySend, MostlySend, ThreadLocal};
|
use crate::update::new::thread_local::{FullySend, MostlySend, ThreadLocal};
|
||||||
use crate::{FieldsIdsMap, GlobalFieldsIdsMap, Index, InternalError, Result};
|
use crate::{FieldsIdsMap, GlobalFieldsIdsMap, Index, InternalError, Result};
|
||||||
|
|
||||||
@ -133,10 +135,8 @@ pub struct IndexingContext<
|
|||||||
'indexer, // covariant lifetime of objects that are borrowed during the entire indexing operation
|
'indexer, // covariant lifetime of objects that are borrowed during the entire indexing operation
|
||||||
'index, // covariant lifetime of the index
|
'index, // covariant lifetime of the index
|
||||||
MSP,
|
MSP,
|
||||||
SP,
|
|
||||||
> where
|
> where
|
||||||
MSP: Fn() -> bool + Sync,
|
MSP: Fn() -> bool + Sync,
|
||||||
SP: Fn(Progress) + Sync,
|
|
||||||
{
|
{
|
||||||
pub index: &'index Index,
|
pub index: &'index Index,
|
||||||
pub db_fields_ids_map: &'indexer FieldsIdsMap,
|
pub db_fields_ids_map: &'indexer FieldsIdsMap,
|
||||||
@ -144,7 +144,7 @@ pub struct IndexingContext<
|
|||||||
pub doc_allocs: &'indexer ThreadLocal<FullySend<Cell<Bump>>>,
|
pub doc_allocs: &'indexer ThreadLocal<FullySend<Cell<Bump>>>,
|
||||||
pub fields_ids_map_store: &'indexer ThreadLocal<FullySend<RefCell<GlobalFieldsIdsMap<'fid>>>>,
|
pub fields_ids_map_store: &'indexer ThreadLocal<FullySend<RefCell<GlobalFieldsIdsMap<'fid>>>>,
|
||||||
pub must_stop_processing: &'indexer MSP,
|
pub must_stop_processing: &'indexer MSP,
|
||||||
pub send_progress: &'indexer SP,
|
pub progress: &'indexer Progress,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<
|
impl<
|
||||||
@ -152,18 +152,15 @@ impl<
|
|||||||
'indexer, // covariant lifetime of objects that are borrowed during the entire indexing operation
|
'indexer, // covariant lifetime of objects that are borrowed during the entire indexing operation
|
||||||
'index, // covariant lifetime of the index
|
'index, // covariant lifetime of the index
|
||||||
MSP,
|
MSP,
|
||||||
SP,
|
|
||||||
> Copy
|
> Copy
|
||||||
for IndexingContext<
|
for IndexingContext<
|
||||||
'fid, // invariant lifetime of fields ids map
|
'fid, // invariant lifetime of fields ids map
|
||||||
'indexer, // covariant lifetime of objects that are borrowed during the entire indexing operation
|
'indexer, // covariant lifetime of objects that are borrowed during the entire indexing operation
|
||||||
'index, // covariant lifetime of the index
|
'index, // covariant lifetime of the index
|
||||||
MSP,
|
MSP,
|
||||||
SP,
|
|
||||||
>
|
>
|
||||||
where
|
where
|
||||||
MSP: Fn() -> bool + Sync,
|
MSP: Fn() -> bool + Sync,
|
||||||
SP: Fn(Progress) + Sync,
|
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -172,18 +169,15 @@ impl<
|
|||||||
'indexer, // covariant lifetime of objects that are borrowed during the entire indexing operation
|
'indexer, // covariant lifetime of objects that are borrowed during the entire indexing operation
|
||||||
'index, // covariant lifetime of the index
|
'index, // covariant lifetime of the index
|
||||||
MSP,
|
MSP,
|
||||||
SP,
|
|
||||||
> Clone
|
> Clone
|
||||||
for IndexingContext<
|
for IndexingContext<
|
||||||
'fid, // invariant lifetime of fields ids map
|
'fid, // invariant lifetime of fields ids map
|
||||||
'indexer, // covariant lifetime of objects that are borrowed during the entire indexing operation
|
'indexer, // covariant lifetime of objects that are borrowed during the entire indexing operation
|
||||||
'index, // covariant lifetime of the index
|
'index, // covariant lifetime of the index
|
||||||
MSP,
|
MSP,
|
||||||
SP,
|
|
||||||
>
|
>
|
||||||
where
|
where
|
||||||
MSP: Fn() -> bool + Sync,
|
MSP: Fn() -> bool + Sync,
|
||||||
SP: Fn(Progress) + Sync,
|
|
||||||
{
|
{
|
||||||
fn clone(&self) -> Self {
|
fn clone(&self) -> Self {
|
||||||
*self
|
*self
|
||||||
@ -202,7 +196,6 @@ pub fn extract<
|
|||||||
EX,
|
EX,
|
||||||
DC: DocumentChanges<'pl>,
|
DC: DocumentChanges<'pl>,
|
||||||
MSP,
|
MSP,
|
||||||
SP,
|
|
||||||
>(
|
>(
|
||||||
document_changes: &DC,
|
document_changes: &DC,
|
||||||
extractor: &EX,
|
extractor: &EX,
|
||||||
@ -213,18 +206,18 @@ pub fn extract<
|
|||||||
doc_allocs,
|
doc_allocs,
|
||||||
fields_ids_map_store,
|
fields_ids_map_store,
|
||||||
must_stop_processing,
|
must_stop_processing,
|
||||||
send_progress,
|
progress,
|
||||||
}: IndexingContext<'fid, 'indexer, 'index, MSP, SP>,
|
}: IndexingContext<'fid, 'indexer, 'index, MSP>,
|
||||||
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
|
extractor_allocs: &'extractor mut ThreadLocal<FullySend<Bump>>,
|
||||||
datastore: &'data ThreadLocal<EX::Data>,
|
datastore: &'data ThreadLocal<EX::Data>,
|
||||||
step: Step,
|
step: IndexingStep,
|
||||||
) -> Result<()>
|
) -> Result<()>
|
||||||
where
|
where
|
||||||
EX: Extractor<'extractor>,
|
EX: Extractor<'extractor>,
|
||||||
MSP: Fn() -> bool + Sync,
|
MSP: Fn() -> bool + Sync,
|
||||||
SP: Fn(Progress) + Sync,
|
|
||||||
{
|
{
|
||||||
tracing::trace!("We are resetting the extractor allocators");
|
tracing::trace!("We are resetting the extractor allocators");
|
||||||
|
progress.update_progress(step);
|
||||||
// Clean up and reuse the extractor allocs
|
// Clean up and reuse the extractor allocs
|
||||||
for extractor_alloc in extractor_allocs.iter_mut() {
|
for extractor_alloc in extractor_allocs.iter_mut() {
|
||||||
tracing::trace!("\tWith {} bytes reset", extractor_alloc.0.allocated_bytes());
|
tracing::trace!("\tWith {} bytes reset", extractor_alloc.0.allocated_bytes());
|
||||||
@ -232,9 +225,11 @@ where
|
|||||||
}
|
}
|
||||||
|
|
||||||
let total_documents = document_changes.len() as u32;
|
let total_documents = document_changes.len() as u32;
|
||||||
|
let (step, progress_step) = AtomicDocumentStep::new(total_documents);
|
||||||
|
progress.update_progress(progress_step);
|
||||||
|
|
||||||
let pi = document_changes.iter(CHUNK_SIZE);
|
let pi = document_changes.iter(CHUNK_SIZE);
|
||||||
pi.enumerate().try_arc_for_each_try_init(
|
pi.try_arc_for_each_try_init(
|
||||||
|| {
|
|| {
|
||||||
DocumentChangeContext::new(
|
DocumentChangeContext::new(
|
||||||
index,
|
index,
|
||||||
@ -247,13 +242,10 @@ where
|
|||||||
move |index_alloc| extractor.init_data(index_alloc),
|
move |index_alloc| extractor.init_data(index_alloc),
|
||||||
)
|
)
|
||||||
},
|
},
|
||||||
|context, (finished_documents, items)| {
|
|context, items| {
|
||||||
if (must_stop_processing)() {
|
if (must_stop_processing)() {
|
||||||
return Err(Arc::new(InternalError::AbortedIndexation.into()));
|
return Err(Arc::new(InternalError::AbortedIndexation.into()));
|
||||||
}
|
}
|
||||||
let finished_documents = (finished_documents * CHUNK_SIZE) as u32;
|
|
||||||
|
|
||||||
(send_progress)(Progress::from_step_substep(step, finished_documents, total_documents));
|
|
||||||
|
|
||||||
// Clean up and reuse the document-specific allocator
|
// Clean up and reuse the document-specific allocator
|
||||||
context.doc_alloc.reset();
|
context.doc_alloc.reset();
|
||||||
@ -264,6 +256,7 @@ where
|
|||||||
});
|
});
|
||||||
|
|
||||||
let res = extractor.process(changes, context).map_err(Arc::new);
|
let res = extractor.process(changes, context).map_err(Arc::new);
|
||||||
|
step.fetch_add(items.as_ref().len() as u32, Ordering::Relaxed);
|
||||||
|
|
||||||
// send back the doc_alloc in the pool
|
// send back the doc_alloc in the pool
|
||||||
context.doc_allocs.get_or_default().0.set(std::mem::take(&mut context.doc_alloc));
|
context.doc_allocs.get_or_default().0.set(std::mem::take(&mut context.doc_alloc));
|
||||||
@ -271,32 +264,7 @@ where
|
|||||||
res
|
res
|
||||||
},
|
},
|
||||||
)?;
|
)?;
|
||||||
|
step.store(total_documents, Ordering::Relaxed);
|
||||||
(send_progress)(Progress::from_step_substep(step, total_documents, total_documents));
|
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct Progress {
|
|
||||||
pub finished_steps: u16,
|
|
||||||
pub total_steps: u16,
|
|
||||||
pub step_name: &'static str,
|
|
||||||
pub finished_total_substep: Option<(u32, u32)>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Progress {
|
|
||||||
pub fn from_step(step: Step) -> Self {
|
|
||||||
Self {
|
|
||||||
finished_steps: step.finished_steps(),
|
|
||||||
total_steps: Step::total_steps(),
|
|
||||||
step_name: step.name(),
|
|
||||||
finished_total_substep: None,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
pub fn from_step_substep(step: Step, finished_substep: u32, total_substep: u32) -> Self {
|
|
||||||
Self {
|
|
||||||
finished_total_substep: Some((finished_substep, total_substep)),
|
|
||||||
..Progress::from_step(step)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
@ -92,11 +92,12 @@ mod test {
|
|||||||
|
|
||||||
use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
|
use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
|
||||||
use crate::index::tests::TempIndex;
|
use crate::index::tests::TempIndex;
|
||||||
|
use crate::progress::Progress;
|
||||||
use crate::update::new::indexer::document_changes::{
|
use crate::update::new::indexer::document_changes::{
|
||||||
extract, DocumentChangeContext, Extractor, IndexingContext,
|
extract, DocumentChangeContext, Extractor, IndexingContext,
|
||||||
};
|
};
|
||||||
use crate::update::new::indexer::DocumentDeletion;
|
use crate::update::new::indexer::DocumentDeletion;
|
||||||
use crate::update::new::steps::Step;
|
use crate::update::new::steps::IndexingStep;
|
||||||
use crate::update::new::thread_local::{MostlySend, ThreadLocal};
|
use crate::update::new::thread_local::{MostlySend, ThreadLocal};
|
||||||
use crate::update::new::DocumentChange;
|
use crate::update::new::DocumentChange;
|
||||||
use crate::DocumentId;
|
use crate::DocumentId;
|
||||||
@ -164,7 +165,7 @@ mod test {
|
|||||||
doc_allocs: &doc_allocs,
|
doc_allocs: &doc_allocs,
|
||||||
fields_ids_map_store: &fields_ids_map_store,
|
fields_ids_map_store: &fields_ids_map_store,
|
||||||
must_stop_processing: &(|| false),
|
must_stop_processing: &(|| false),
|
||||||
send_progress: &(|_progress| {}),
|
progress: &Progress::default(),
|
||||||
};
|
};
|
||||||
|
|
||||||
for _ in 0..3 {
|
for _ in 0..3 {
|
||||||
@ -176,7 +177,7 @@ mod test {
|
|||||||
context,
|
context,
|
||||||
&mut extractor_allocs,
|
&mut extractor_allocs,
|
||||||
&datastore,
|
&datastore,
|
||||||
Step::ExtractingDocuments,
|
IndexingStep::ExtractingDocuments,
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
use std::sync::atomic::Ordering;
|
||||||
|
|
||||||
use bumpalo::collections::CollectIn;
|
use bumpalo::collections::CollectIn;
|
||||||
use bumpalo::Bump;
|
use bumpalo::Bump;
|
||||||
use bumparaw_collections::RawMap;
|
use bumparaw_collections::RawMap;
|
||||||
@ -10,11 +12,12 @@ use serde_json::value::RawValue;
|
|||||||
use serde_json::Deserializer;
|
use serde_json::Deserializer;
|
||||||
|
|
||||||
use super::super::document_change::DocumentChange;
|
use super::super::document_change::DocumentChange;
|
||||||
use super::document_changes::{DocumentChangeContext, DocumentChanges, Progress};
|
use super::document_changes::{DocumentChangeContext, DocumentChanges};
|
||||||
use super::retrieve_or_guess_primary_key;
|
use super::retrieve_or_guess_primary_key;
|
||||||
use crate::documents::PrimaryKey;
|
use crate::documents::PrimaryKey;
|
||||||
|
use crate::progress::{AtomicPayloadStep, Progress};
|
||||||
use crate::update::new::document::Versions;
|
use crate::update::new::document::Versions;
|
||||||
use crate::update::new::steps::Step;
|
use crate::update::new::steps::IndexingStep;
|
||||||
use crate::update::new::thread_local::MostlySend;
|
use crate::update::new::thread_local::MostlySend;
|
||||||
use crate::update::new::{Deletion, Insertion, Update};
|
use crate::update::new::{Deletion, Insertion, Update};
|
||||||
use crate::update::{AvailableIds, IndexDocumentsMethod};
|
use crate::update::{AvailableIds, IndexDocumentsMethod};
|
||||||
@ -45,7 +48,7 @@ impl<'pl> DocumentOperation<'pl> {
|
|||||||
|
|
||||||
#[allow(clippy::too_many_arguments)]
|
#[allow(clippy::too_many_arguments)]
|
||||||
#[tracing::instrument(level = "trace", skip_all, target = "indexing::document_operation")]
|
#[tracing::instrument(level = "trace", skip_all, target = "indexing::document_operation")]
|
||||||
pub fn into_changes<MSP, SP>(
|
pub fn into_changes<MSP>(
|
||||||
self,
|
self,
|
||||||
indexer: &'pl Bump,
|
indexer: &'pl Bump,
|
||||||
index: &Index,
|
index: &Index,
|
||||||
@ -53,12 +56,12 @@ impl<'pl> DocumentOperation<'pl> {
|
|||||||
primary_key_from_op: Option<&'pl str>,
|
primary_key_from_op: Option<&'pl str>,
|
||||||
new_fields_ids_map: &mut FieldsIdsMap,
|
new_fields_ids_map: &mut FieldsIdsMap,
|
||||||
must_stop_processing: &MSP,
|
must_stop_processing: &MSP,
|
||||||
send_progress: &SP,
|
progress: Progress,
|
||||||
) -> Result<(DocumentOperationChanges<'pl>, Vec<PayloadStats>, Option<PrimaryKey<'pl>>)>
|
) -> Result<(DocumentOperationChanges<'pl>, Vec<PayloadStats>, Option<PrimaryKey<'pl>>)>
|
||||||
where
|
where
|
||||||
MSP: Fn() -> bool,
|
MSP: Fn() -> bool,
|
||||||
SP: Fn(Progress),
|
|
||||||
{
|
{
|
||||||
|
progress.update_progress(IndexingStep::PreparingPayloads);
|
||||||
let Self { operations, method } = self;
|
let Self { operations, method } = self;
|
||||||
|
|
||||||
let documents_ids = index.documents_ids(rtxn)?;
|
let documents_ids = index.documents_ids(rtxn)?;
|
||||||
@ -68,16 +71,14 @@ impl<'pl> DocumentOperation<'pl> {
|
|||||||
let mut primary_key = None;
|
let mut primary_key = None;
|
||||||
|
|
||||||
let payload_count = operations.len();
|
let payload_count = operations.len();
|
||||||
|
let (step, progress_step) = AtomicPayloadStep::new(payload_count as u32);
|
||||||
|
progress.update_progress(progress_step);
|
||||||
|
|
||||||
for (payload_index, operation) in operations.into_iter().enumerate() {
|
for (payload_index, operation) in operations.into_iter().enumerate() {
|
||||||
if must_stop_processing() {
|
if must_stop_processing() {
|
||||||
return Err(InternalError::AbortedIndexation.into());
|
return Err(InternalError::AbortedIndexation.into());
|
||||||
}
|
}
|
||||||
send_progress(Progress::from_step_substep(
|
step.store(payload_index as u32, Ordering::Relaxed);
|
||||||
Step::PreparingPayloads,
|
|
||||||
payload_index as u32,
|
|
||||||
payload_count as u32,
|
|
||||||
));
|
|
||||||
|
|
||||||
let mut bytes = 0;
|
let mut bytes = 0;
|
||||||
let result = match operation {
|
let result = match operation {
|
||||||
@ -118,12 +119,7 @@ impl<'pl> DocumentOperation<'pl> {
|
|||||||
};
|
};
|
||||||
operations_stats.push(PayloadStats { document_count, bytes, error });
|
operations_stats.push(PayloadStats { document_count, bytes, error });
|
||||||
}
|
}
|
||||||
|
step.store(payload_count as u32, Ordering::Relaxed);
|
||||||
send_progress(Progress::from_step_substep(
|
|
||||||
Step::PreparingPayloads,
|
|
||||||
payload_count as u32,
|
|
||||||
payload_count as u32,
|
|
||||||
));
|
|
||||||
|
|
||||||
// TODO We must drain the HashMap into a Vec because rayon::hash_map::IntoIter: !Clone
|
// TODO We must drain the HashMap into a Vec because rayon::hash_map::IntoIter: !Clone
|
||||||
let mut docids_version_offsets: bumpalo::collections::vec::Vec<_> =
|
let mut docids_version_offsets: bumpalo::collections::vec::Vec<_> =
|
||||||
|
@ -5,7 +5,7 @@ use std::thread::{self, Builder};
|
|||||||
|
|
||||||
use big_s::S;
|
use big_s::S;
|
||||||
use bumparaw_collections::RawMap;
|
use bumparaw_collections::RawMap;
|
||||||
use document_changes::{extract, DocumentChanges, IndexingContext, Progress};
|
use document_changes::{extract, DocumentChanges, IndexingContext};
|
||||||
pub use document_deletion::DocumentDeletion;
|
pub use document_deletion::DocumentDeletion;
|
||||||
pub use document_operation::{DocumentOperation, PayloadStats};
|
pub use document_operation::{DocumentOperation, PayloadStats};
|
||||||
use hashbrown::HashMap;
|
use hashbrown::HashMap;
|
||||||
@ -22,7 +22,7 @@ use super::channel::*;
|
|||||||
use super::extract::*;
|
use super::extract::*;
|
||||||
use super::facet_search_builder::FacetSearchBuilder;
|
use super::facet_search_builder::FacetSearchBuilder;
|
||||||
use super::merger::FacetFieldIdsDelta;
|
use super::merger::FacetFieldIdsDelta;
|
||||||
use super::steps::Step;
|
use super::steps::IndexingStep;
|
||||||
use super::thread_local::ThreadLocal;
|
use super::thread_local::ThreadLocal;
|
||||||
use super::word_fst_builder::{PrefixData, PrefixDelta, WordFstBuilder};
|
use super::word_fst_builder::{PrefixData, PrefixDelta, WordFstBuilder};
|
||||||
use super::words_prefix_docids::{
|
use super::words_prefix_docids::{
|
||||||
@ -33,6 +33,7 @@ use crate::documents::{PrimaryKey, DEFAULT_PRIMARY_KEY};
|
|||||||
use crate::facet::FacetType;
|
use crate::facet::FacetType;
|
||||||
use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
|
use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
|
||||||
use crate::index::main_key::{WORDS_FST_KEY, WORDS_PREFIXES_FST_KEY};
|
use crate::index::main_key::{WORDS_FST_KEY, WORDS_PREFIXES_FST_KEY};
|
||||||
|
use crate::progress::Progress;
|
||||||
use crate::proximity::ProximityPrecision;
|
use crate::proximity::ProximityPrecision;
|
||||||
use crate::update::del_add::DelAdd;
|
use crate::update::del_add::DelAdd;
|
||||||
use crate::update::new::extract::EmbeddingExtractor;
|
use crate::update::new::extract::EmbeddingExtractor;
|
||||||
@ -60,7 +61,7 @@ mod update_by_function;
|
|||||||
///
|
///
|
||||||
/// TODO return stats
|
/// TODO return stats
|
||||||
#[allow(clippy::too_many_arguments)] // clippy: 😝
|
#[allow(clippy::too_many_arguments)] // clippy: 😝
|
||||||
pub fn index<'pl, 'indexer, 'index, DC, MSP, SP>(
|
pub fn index<'pl, 'indexer, 'index, DC, MSP>(
|
||||||
wtxn: &mut RwTxn,
|
wtxn: &mut RwTxn,
|
||||||
index: &'index Index,
|
index: &'index Index,
|
||||||
pool: &ThreadPoolNoAbort,
|
pool: &ThreadPoolNoAbort,
|
||||||
@ -71,12 +72,11 @@ pub fn index<'pl, 'indexer, 'index, DC, MSP, SP>(
|
|||||||
document_changes: &DC,
|
document_changes: &DC,
|
||||||
embedders: EmbeddingConfigs,
|
embedders: EmbeddingConfigs,
|
||||||
must_stop_processing: &'indexer MSP,
|
must_stop_processing: &'indexer MSP,
|
||||||
send_progress: &'indexer SP,
|
progress: &'indexer Progress,
|
||||||
) -> Result<()>
|
) -> Result<()>
|
||||||
where
|
where
|
||||||
DC: DocumentChanges<'pl>,
|
DC: DocumentChanges<'pl>,
|
||||||
MSP: Fn() -> bool + Sync,
|
MSP: Fn() -> bool + Sync,
|
||||||
SP: Fn(Progress) + Sync,
|
|
||||||
{
|
{
|
||||||
let mut bbbuffers = Vec::new();
|
let mut bbbuffers = Vec::new();
|
||||||
let finished_extraction = AtomicBool::new(false);
|
let finished_extraction = AtomicBool::new(false);
|
||||||
@ -125,7 +125,7 @@ where
|
|||||||
doc_allocs: &doc_allocs,
|
doc_allocs: &doc_allocs,
|
||||||
fields_ids_map_store: &fields_ids_map_store,
|
fields_ids_map_store: &fields_ids_map_store,
|
||||||
must_stop_processing,
|
must_stop_processing,
|
||||||
send_progress,
|
progress,
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut index_embeddings = index.embedding_configs(wtxn)?;
|
let mut index_embeddings = index.embedding_configs(wtxn)?;
|
||||||
@ -159,7 +159,7 @@ where
|
|||||||
indexing_context,
|
indexing_context,
|
||||||
&mut extractor_allocs,
|
&mut extractor_allocs,
|
||||||
&datastore,
|
&datastore,
|
||||||
Step::ExtractingDocuments,
|
IndexingStep::ExtractingDocuments,
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
@ -191,7 +191,7 @@ where
|
|||||||
indexing_context,
|
indexing_context,
|
||||||
&mut extractor_allocs,
|
&mut extractor_allocs,
|
||||||
&extractor_sender.field_id_docid_facet_sender(),
|
&extractor_sender.field_id_docid_facet_sender(),
|
||||||
Step::ExtractingFacets
|
IndexingStep::ExtractingFacets
|
||||||
)?
|
)?
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -224,7 +224,7 @@ where
|
|||||||
document_changes,
|
document_changes,
|
||||||
indexing_context,
|
indexing_context,
|
||||||
&mut extractor_allocs,
|
&mut extractor_allocs,
|
||||||
Step::ExtractingWords
|
IndexingStep::ExtractingWords
|
||||||
)?
|
)?
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -302,7 +302,7 @@ where
|
|||||||
document_changes,
|
document_changes,
|
||||||
indexing_context,
|
indexing_context,
|
||||||
&mut extractor_allocs,
|
&mut extractor_allocs,
|
||||||
Step::ExtractingWordProximity,
|
IndexingStep::ExtractingWordProximity,
|
||||||
)?
|
)?
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -338,7 +338,7 @@ where
|
|||||||
indexing_context,
|
indexing_context,
|
||||||
&mut extractor_allocs,
|
&mut extractor_allocs,
|
||||||
&datastore,
|
&datastore,
|
||||||
Step::ExtractingEmbeddings,
|
IndexingStep::ExtractingEmbeddings,
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
@ -371,7 +371,7 @@ where
|
|||||||
indexing_context,
|
indexing_context,
|
||||||
&mut extractor_allocs,
|
&mut extractor_allocs,
|
||||||
&datastore,
|
&datastore,
|
||||||
Step::WritingGeoPoints
|
IndexingStep::WritingGeoPoints
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -383,9 +383,7 @@ where
|
|||||||
&indexing_context.must_stop_processing,
|
&indexing_context.must_stop_processing,
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
|
indexing_context.progress.update_progress(IndexingStep::WritingToDatabase);
|
||||||
(indexing_context.send_progress)(Progress::from_step(Step::WritingToDatabase));
|
|
||||||
|
|
||||||
finished_extraction.store(true, std::sync::atomic::Ordering::Relaxed);
|
finished_extraction.store(true, std::sync::atomic::Ordering::Relaxed);
|
||||||
|
|
||||||
Result::Ok((facet_field_ids_delta, index_embeddings))
|
Result::Ok((facet_field_ids_delta, index_embeddings))
|
||||||
@ -485,7 +483,7 @@ where
|
|||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
(indexing_context.send_progress)(Progress::from_step(Step::WaitingForExtractors));
|
indexing_context.progress.update_progress(IndexingStep::WaitingForExtractors);
|
||||||
|
|
||||||
let (facet_field_ids_delta, index_embeddings) = extractor_handle.join().unwrap()?;
|
let (facet_field_ids_delta, index_embeddings) = extractor_handle.join().unwrap()?;
|
||||||
|
|
||||||
@ -498,10 +496,7 @@ where
|
|||||||
break 'vectors;
|
break 'vectors;
|
||||||
}
|
}
|
||||||
|
|
||||||
(indexing_context.send_progress)(Progress::from_step(
|
indexing_context.progress.update_progress(IndexingStep::WritingEmbeddingsToDatabase);
|
||||||
Step::WritingEmbeddingsToDatabase,
|
|
||||||
));
|
|
||||||
|
|
||||||
let mut rng = rand::rngs::StdRng::seed_from_u64(42);
|
let mut rng = rand::rngs::StdRng::seed_from_u64(42);
|
||||||
for (_index, (_embedder_name, _embedder, writer, dimensions)) in &mut arroy_writers {
|
for (_index, (_embedder_name, _embedder, writer, dimensions)) in &mut arroy_writers {
|
||||||
let dimensions = *dimensions;
|
let dimensions = *dimensions;
|
||||||
@ -517,21 +512,19 @@ where
|
|||||||
index.put_embedding_configs(wtxn, index_embeddings)?;
|
index.put_embedding_configs(wtxn, index_embeddings)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
(indexing_context.send_progress)(Progress::from_step(Step::PostProcessingFacets));
|
indexing_context.progress.update_progress(IndexingStep::PostProcessingFacets);
|
||||||
|
|
||||||
if index.facet_search(wtxn)? {
|
if index.facet_search(wtxn)? {
|
||||||
compute_facet_search_database(index, wtxn, global_fields_ids_map)?;
|
compute_facet_search_database(index, wtxn, global_fields_ids_map)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
compute_facet_level_database(index, wtxn, facet_field_ids_delta)?;
|
compute_facet_level_database(index, wtxn, facet_field_ids_delta)?;
|
||||||
|
|
||||||
(indexing_context.send_progress)(Progress::from_step(Step::PostProcessingWords));
|
indexing_context.progress.update_progress(IndexingStep::PostProcessingWords);
|
||||||
|
|
||||||
if let Some(prefix_delta) = compute_word_fst(index, wtxn)? {
|
if let Some(prefix_delta) = compute_word_fst(index, wtxn)? {
|
||||||
compute_prefix_database(index, wtxn, prefix_delta, grenad_parameters)?;
|
compute_prefix_database(index, wtxn, prefix_delta, grenad_parameters)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
(indexing_context.send_progress)(Progress::from_step(Step::Finalizing));
|
indexing_context.progress.update_progress(IndexingStep::Finalizing);
|
||||||
|
|
||||||
Ok(()) as Result<_>
|
Ok(()) as Result<_>
|
||||||
})?;
|
})?;
|
||||||
|
@ -1,8 +1,12 @@
|
|||||||
|
use std::borrow::Cow;
|
||||||
|
|
||||||
use enum_iterator::Sequence;
|
use enum_iterator::Sequence;
|
||||||
|
|
||||||
|
use crate::progress::Step;
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Sequence)]
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Sequence)]
|
||||||
#[repr(u16)]
|
#[repr(u8)]
|
||||||
pub enum Step {
|
pub enum IndexingStep {
|
||||||
PreparingPayloads,
|
PreparingPayloads,
|
||||||
ExtractingDocuments,
|
ExtractingDocuments,
|
||||||
ExtractingFacets,
|
ExtractingFacets,
|
||||||
@ -18,30 +22,31 @@ pub enum Step {
|
|||||||
Finalizing,
|
Finalizing,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Step {
|
impl Step for IndexingStep {
|
||||||
pub fn name(&self) -> &'static str {
|
fn name(&self) -> Cow<'static, str> {
|
||||||
match self {
|
match self {
|
||||||
Step::PreparingPayloads => "preparing update file",
|
IndexingStep::PreparingPayloads => "preparing update file",
|
||||||
Step::ExtractingDocuments => "extracting documents",
|
IndexingStep::ExtractingDocuments => "extracting documents",
|
||||||
Step::ExtractingFacets => "extracting facets",
|
IndexingStep::ExtractingFacets => "extracting facets",
|
||||||
Step::ExtractingWords => "extracting words",
|
IndexingStep::ExtractingWords => "extracting words",
|
||||||
Step::ExtractingWordProximity => "extracting word proximity",
|
IndexingStep::ExtractingWordProximity => "extracting word proximity",
|
||||||
Step::ExtractingEmbeddings => "extracting embeddings",
|
IndexingStep::ExtractingEmbeddings => "extracting embeddings",
|
||||||
Step::WritingGeoPoints => "writing geo points",
|
IndexingStep::WritingGeoPoints => "writing geo points",
|
||||||
Step::WritingToDatabase => "writing to database",
|
IndexingStep::WritingToDatabase => "writing to database",
|
||||||
Step::WaitingForExtractors => "waiting for extractors",
|
IndexingStep::WaitingForExtractors => "waiting for extractors",
|
||||||
Step::WritingEmbeddingsToDatabase => "writing embeddings to database",
|
IndexingStep::WritingEmbeddingsToDatabase => "writing embeddings to database",
|
||||||
Step::PostProcessingFacets => "post-processing facets",
|
IndexingStep::PostProcessingFacets => "post-processing facets",
|
||||||
Step::PostProcessingWords => "post-processing words",
|
IndexingStep::PostProcessingWords => "post-processing words",
|
||||||
Step::Finalizing => "finalizing",
|
IndexingStep::Finalizing => "finalizing",
|
||||||
}
|
}
|
||||||
|
.into()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn finished_steps(self) -> u16 {
|
fn current(&self) -> u32 {
|
||||||
self as u16
|
*self as u32
|
||||||
}
|
}
|
||||||
|
|
||||||
pub const fn total_steps() -> u16 {
|
fn total(&self) -> u32 {
|
||||||
Self::CARDINALITY as u16
|
Self::CARDINALITY as u32
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -3,6 +3,7 @@ use bumpalo::Bump;
|
|||||||
use heed::EnvOpenOptions;
|
use heed::EnvOpenOptions;
|
||||||
use maplit::hashset;
|
use maplit::hashset;
|
||||||
use milli::documents::mmap_from_objects;
|
use milli::documents::mmap_from_objects;
|
||||||
|
use milli::progress::Progress;
|
||||||
use milli::update::new::indexer;
|
use milli::update::new::indexer;
|
||||||
use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
|
use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
|
||||||
use milli::vector::EmbeddingConfigs;
|
use milli::vector::EmbeddingConfigs;
|
||||||
@ -57,7 +58,7 @@ fn test_facet_distribution_with_no_facet_values() {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_progress| (),
|
Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -72,7 +73,7 @@ fn test_facet_distribution_with_no_facet_values() {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_| (),
|
&Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
@ -7,6 +7,7 @@ use bumpalo::Bump;
|
|||||||
use either::{Either, Left, Right};
|
use either::{Either, Left, Right};
|
||||||
use heed::EnvOpenOptions;
|
use heed::EnvOpenOptions;
|
||||||
use maplit::{btreemap, hashset};
|
use maplit::{btreemap, hashset};
|
||||||
|
use milli::progress::Progress;
|
||||||
use milli::update::new::indexer;
|
use milli::update::new::indexer;
|
||||||
use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
|
use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
|
||||||
use milli::vector::EmbeddingConfigs;
|
use milli::vector::EmbeddingConfigs;
|
||||||
@ -90,7 +91,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_progress| (),
|
Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -109,7 +110,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_| (),
|
&Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
@ -5,6 +5,7 @@ use bumpalo::Bump;
|
|||||||
use heed::EnvOpenOptions;
|
use heed::EnvOpenOptions;
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
use maplit::hashset;
|
use maplit::hashset;
|
||||||
|
use milli::progress::Progress;
|
||||||
use milli::update::new::indexer;
|
use milli::update::new::indexer;
|
||||||
use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
|
use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
|
||||||
use milli::vector::EmbeddingConfigs;
|
use milli::vector::EmbeddingConfigs;
|
||||||
@ -326,7 +327,7 @@ fn criteria_ascdesc() {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_progress| (),
|
Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -341,7 +342,7 @@ fn criteria_ascdesc() {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_| (),
|
&Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
@ -3,6 +3,7 @@ use std::collections::BTreeSet;
|
|||||||
use bumpalo::Bump;
|
use bumpalo::Bump;
|
||||||
use heed::EnvOpenOptions;
|
use heed::EnvOpenOptions;
|
||||||
use milli::documents::mmap_from_objects;
|
use milli::documents::mmap_from_objects;
|
||||||
|
use milli::progress::Progress;
|
||||||
use milli::update::new::indexer;
|
use milli::update::new::indexer;
|
||||||
use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
|
use milli::update::{IndexDocumentsMethod, IndexerConfig, Settings};
|
||||||
use milli::vector::EmbeddingConfigs;
|
use milli::vector::EmbeddingConfigs;
|
||||||
@ -135,7 +136,7 @@ fn test_typo_disabled_on_word() {
|
|||||||
None,
|
None,
|
||||||
&mut new_fields_ids_map,
|
&mut new_fields_ids_map,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_progress| (),
|
Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -150,7 +151,7 @@ fn test_typo_disabled_on_word() {
|
|||||||
&document_changes,
|
&document_changes,
|
||||||
embedders,
|
embedders,
|
||||||
&|| false,
|
&|| false,
|
||||||
&|_| (),
|
&Progress::default(),
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user