mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-12 16:08:55 +01:00
Build mergers in parallel
This commit is contained in:
parent
39b5990f64
commit
2b317c681b
@ -426,21 +426,38 @@ impl WordDocidsMergerBuilders {
|
|||||||
current_docid: _,
|
current_docid: _,
|
||||||
} = other;
|
} = other;
|
||||||
|
|
||||||
let sorter = word_fid_docids.into_sorter()?;
|
let mut word_fid_docids_readers = Ok(vec![]);
|
||||||
let readers = sorter.into_reader_cursors()?;
|
let mut word_docids_readers = Ok(vec![]);
|
||||||
self.word_fid_docids.extend(readers);
|
let mut exact_word_docids_readers = Ok(vec![]);
|
||||||
let sorter = word_docids.into_sorter()?;
|
let mut word_position_docids_readers = Ok(vec![]);
|
||||||
let readers = sorter.into_reader_cursors()?;
|
let mut fid_word_count_docids_readers = Ok(vec![]);
|
||||||
self.word_docids.extend(readers);
|
rayon::scope(|s| {
|
||||||
let sorter = exact_word_docids.into_sorter()?;
|
s.spawn(|_| {
|
||||||
let readers = sorter.into_reader_cursors()?;
|
word_fid_docids_readers =
|
||||||
self.exact_word_docids.extend(readers);
|
word_fid_docids.into_sorter().and_then(|s| s.into_reader_cursors());
|
||||||
let sorter = word_position_docids.into_sorter()?;
|
});
|
||||||
let readers = sorter.into_reader_cursors()?;
|
s.spawn(|_| {
|
||||||
self.word_position_docids.extend(readers);
|
word_docids_readers =
|
||||||
let sorter = fid_word_count_docids.into_sorter()?;
|
word_docids.into_sorter().and_then(|s| s.into_reader_cursors());
|
||||||
let readers = sorter.into_reader_cursors()?;
|
});
|
||||||
self.fid_word_count_docids.extend(readers);
|
s.spawn(|_| {
|
||||||
|
exact_word_docids_readers =
|
||||||
|
exact_word_docids.into_sorter().and_then(|s| s.into_reader_cursors());
|
||||||
|
});
|
||||||
|
s.spawn(|_| {
|
||||||
|
word_position_docids_readers =
|
||||||
|
word_position_docids.into_sorter().and_then(|s| s.into_reader_cursors());
|
||||||
|
});
|
||||||
|
s.spawn(|_| {
|
||||||
|
fid_word_count_docids_readers =
|
||||||
|
fid_word_count_docids.into_sorter().and_then(|s| s.into_reader_cursors());
|
||||||
|
});
|
||||||
|
});
|
||||||
|
self.word_fid_docids.extend(word_fid_docids_readers?);
|
||||||
|
self.word_docids.extend(word_docids_readers?);
|
||||||
|
self.exact_word_docids.extend(exact_word_docids_readers?);
|
||||||
|
self.word_position_docids.extend(word_position_docids_readers?);
|
||||||
|
self.fid_word_count_docids.extend(fid_word_count_docids_readers?);
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
@ -509,6 +526,10 @@ impl WordDocidsExtractors {
|
|||||||
))
|
))
|
||||||
});
|
});
|
||||||
|
|
||||||
|
{
|
||||||
|
let span =
|
||||||
|
tracing::trace_span!(target: "indexing::documents::extract", "docids_extraction");
|
||||||
|
let _entered = span.enter();
|
||||||
document_changes.into_par_iter().try_for_each(|document_change| {
|
document_changes.into_par_iter().try_for_each(|document_change| {
|
||||||
context_pool.with(|(rtxn, document_tokenizer, fields_ids_map, cached_sorter)| {
|
context_pool.with(|(rtxn, document_tokenizer, fields_ids_map, cached_sorter)| {
|
||||||
Self::extract_document_change(
|
Self::extract_document_change(
|
||||||
@ -521,7 +542,12 @@ impl WordDocidsExtractors {
|
|||||||
)
|
)
|
||||||
})
|
})
|
||||||
})?;
|
})?;
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
let span =
|
||||||
|
tracing::trace_span!(target: "indexing::documents::extract", "merger_building");
|
||||||
|
let _entered = span.enter();
|
||||||
let mut builder = WordDocidsMergerBuilders::new();
|
let mut builder = WordDocidsMergerBuilders::new();
|
||||||
for (_rtxn, _tokenizer, _fields_ids_map, cache) in context_pool.into_items() {
|
for (_rtxn, _tokenizer, _fields_ids_map, cache) in context_pool.into_items() {
|
||||||
builder.add_sorters(cache)?;
|
builder.add_sorters(cache)?;
|
||||||
@ -529,6 +555,7 @@ impl WordDocidsExtractors {
|
|||||||
|
|
||||||
Ok(builder.build())
|
Ok(builder.build())
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn extract_document_change(
|
fn extract_document_change(
|
||||||
rtxn: &RoTxn,
|
rtxn: &RoTxn,
|
||||||
|
@ -107,7 +107,7 @@ impl SearchableExtractor for WordPairProximityDocidsExtractor {
|
|||||||
cached_sorter.insert_add_u32(key, docid)?;
|
cached_sorter.insert_add_u32(key, docid)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
@ -13,7 +13,7 @@ pub use extract_word_docids::{
|
|||||||
pub use extract_word_pair_proximity_docids::WordPairProximityDocidsExtractor;
|
pub use extract_word_pair_proximity_docids::WordPairProximityDocidsExtractor;
|
||||||
use grenad::Merger;
|
use grenad::Merger;
|
||||||
use heed::RoTxn;
|
use heed::RoTxn;
|
||||||
use rayon::iter::{IntoParallelIterator, ParallelIterator};
|
use rayon::iter::{IntoParallelIterator, ParallelBridge, ParallelIterator};
|
||||||
use tokenize_document::{tokenizer_builder, DocumentTokenizer};
|
use tokenize_document::{tokenizer_builder, DocumentTokenizer};
|
||||||
|
|
||||||
use super::cache::CboCachedSorter;
|
use super::cache::CboCachedSorter;
|
||||||
@ -78,6 +78,10 @@ pub trait SearchableExtractor {
|
|||||||
))
|
))
|
||||||
});
|
});
|
||||||
|
|
||||||
|
{
|
||||||
|
let span =
|
||||||
|
tracing::trace_span!(target: "indexing::documents::extract", "docids_extraction");
|
||||||
|
let _entered = span.enter();
|
||||||
document_changes.into_par_iter().try_for_each(|document_change| {
|
document_changes.into_par_iter().try_for_each(|document_change| {
|
||||||
context_pool.with(|(rtxn, document_tokenizer, fields_ids_map, cached_sorter)| {
|
context_pool.with(|(rtxn, document_tokenizer, fields_ids_map, cached_sorter)| {
|
||||||
Self::extract_document_change(
|
Self::extract_document_change(
|
||||||
@ -90,16 +94,27 @@ pub trait SearchableExtractor {
|
|||||||
)
|
)
|
||||||
})
|
})
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
let mut builder = grenad::MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
|
|
||||||
for (_rtxn, _tokenizer, _fields_ids_map, cache) in context_pool.into_items() {
|
|
||||||
let sorter = cache.into_sorter()?;
|
|
||||||
let readers = sorter.into_reader_cursors()?;
|
|
||||||
builder.extend(readers);
|
|
||||||
}
|
}
|
||||||
|
{
|
||||||
|
let mut builder = grenad::MergerBuilder::new(MergeDeladdCboRoaringBitmaps);
|
||||||
|
let span =
|
||||||
|
tracing::trace_span!(target: "indexing::documents::extract", "merger_building");
|
||||||
|
let _entered = span.enter();
|
||||||
|
|
||||||
|
let readers: Vec<_> = context_pool
|
||||||
|
.into_items()
|
||||||
|
.par_bridge()
|
||||||
|
.map(|(_rtxn, _tokenizer, _fields_ids_map, cached_sorter)| {
|
||||||
|
let sorter = cached_sorter.into_sorter()?;
|
||||||
|
sorter.into_reader_cursors()
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
for reader in readers {
|
||||||
|
builder.extend(reader?);
|
||||||
|
}
|
||||||
Ok(builder.build())
|
Ok(builder.build())
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn extract_document_change(
|
fn extract_document_change(
|
||||||
rtxn: &RoTxn,
|
rtxn: &RoTxn,
|
||||||
|
Loading…
Reference in New Issue
Block a user