mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 12:27:13 +02:00
Introduce the ThreadPoolNoAbort wrapper
This commit is contained in:
parent
b3173d0423
commit
d4aeff92d0
14 changed files with 129 additions and 60 deletions
|
@ -19,7 +19,7 @@ use crate::update::del_add::{DelAdd, KvReaderDelAdd, KvWriterDelAdd};
|
|||
use crate::update::index_documents::helpers::try_split_at;
|
||||
use crate::update::settings::InnerIndexSettingsDiff;
|
||||
use crate::vector::Embedder;
|
||||
use crate::{DocumentId, InternalError, Result, VectorOrArrayOfVectors};
|
||||
use crate::{DocumentId, InternalError, Result, ThreadPoolNoAbort, VectorOrArrayOfVectors};
|
||||
|
||||
/// The length of the elements that are always in the buffer when inserting new values.
|
||||
const TRUNCATE_SIZE: usize = size_of::<DocumentId>();
|
||||
|
@ -347,7 +347,7 @@ pub fn extract_embeddings<R: io::Read + io::Seek>(
|
|||
prompt_reader: grenad::Reader<R>,
|
||||
indexer: GrenadParameters,
|
||||
embedder: Arc<Embedder>,
|
||||
request_threads: &rayon::ThreadPool,
|
||||
request_threads: &ThreadPoolNoAbort,
|
||||
) -> Result<grenad::Reader<BufReader<File>>> {
|
||||
puffin::profile_function!();
|
||||
let n_chunks = embedder.chunk_count_hint(); // chunk level parallelism
|
||||
|
|
|
@ -31,7 +31,7 @@ use self::extract_word_position_docids::extract_word_position_docids;
|
|||
use super::helpers::{as_cloneable_grenad, CursorClonableMmap, GrenadParameters};
|
||||
use super::{helpers, TypedChunk};
|
||||
use crate::update::settings::InnerIndexSettingsDiff;
|
||||
use crate::{FieldId, Result};
|
||||
use crate::{FieldId, Result, ThreadPoolNoAbortBuilder};
|
||||
|
||||
/// Extract data for each databases from obkv documents in parallel.
|
||||
/// Send data in grenad file over provided Sender.
|
||||
|
@ -229,7 +229,7 @@ fn send_original_documents_data(
|
|||
let documents_chunk_cloned = original_documents_chunk.clone();
|
||||
let lmdb_writer_sx_cloned = lmdb_writer_sx.clone();
|
||||
|
||||
let request_threads = rayon::ThreadPoolBuilder::new()
|
||||
let request_threads = ThreadPoolNoAbortBuilder::new()
|
||||
.num_threads(crate::vector::REQUEST_PARALLELISM)
|
||||
.thread_name(|index| format!("embedding-request-{index}"))
|
||||
.build()?;
|
||||
|
|
|
@ -8,7 +8,6 @@ use std::collections::{HashMap, HashSet};
|
|||
use std::io::{Read, Seek};
|
||||
use std::num::NonZeroU32;
|
||||
use std::result::Result as StdResult;
|
||||
use std::sync::atomic::Ordering;
|
||||
use std::sync::Arc;
|
||||
|
||||
use crossbeam_channel::{Receiver, Sender};
|
||||
|
@ -34,6 +33,7 @@ use self::helpers::{grenad_obkv_into_chunks, GrenadParameters};
|
|||
pub use self::transform::{Transform, TransformOutput};
|
||||
use crate::documents::{obkv_to_object, DocumentsBatchReader};
|
||||
use crate::error::{Error, InternalError, UserError};
|
||||
use crate::thread_pool_no_abort::ThreadPoolNoAbortBuilder;
|
||||
pub use crate::update::index_documents::helpers::CursorClonableMmap;
|
||||
use crate::update::{
|
||||
IndexerConfig, UpdateIndexingStep, WordPrefixDocids, WordPrefixIntegerDocids, WordsPrefixesFst,
|
||||
|
@ -297,17 +297,13 @@ where
|
|||
let settings_diff = Arc::new(settings_diff);
|
||||
|
||||
let backup_pool;
|
||||
let pool_catched_panic = self.indexer_config.pool_panic_catched.clone();
|
||||
let pool = match self.indexer_config.thread_pool {
|
||||
Some(ref pool) => pool,
|
||||
None => {
|
||||
// We initialize a backup pool with the default
|
||||
// settings if none have already been set.
|
||||
let mut pool_builder = rayon::ThreadPoolBuilder::new();
|
||||
pool_builder = pool_builder.panic_handler({
|
||||
let catched_panic = pool_catched_panic.clone();
|
||||
move |_result| catched_panic.store(true, Ordering::SeqCst)
|
||||
});
|
||||
#[allow(unused_mut)]
|
||||
let mut pool_builder = ThreadPoolNoAbortBuilder::new();
|
||||
|
||||
#[cfg(test)]
|
||||
{
|
||||
|
@ -538,12 +534,7 @@ where
|
|||
}
|
||||
|
||||
Ok(())
|
||||
})?;
|
||||
|
||||
// While reseting the pool panic catcher we return an error if we catched one.
|
||||
if pool_catched_panic.swap(false, Ordering::SeqCst) {
|
||||
return Err(InternalError::PanicInThreadPool.into());
|
||||
}
|
||||
}).map_err(InternalError::from)??;
|
||||
|
||||
// We write the field distribution into the main database
|
||||
self.index.put_field_distribution(self.wtxn, &field_distribution)?;
|
||||
|
@ -572,12 +563,8 @@ where
|
|||
writer.build(wtxn, &mut rng, None)?;
|
||||
}
|
||||
Result::Ok(())
|
||||
})?;
|
||||
|
||||
// While reseting the pool panic catcher we return an error if we catched one.
|
||||
if pool_catched_panic.swap(false, Ordering::SeqCst) {
|
||||
return Err(InternalError::PanicInThreadPool.into());
|
||||
}
|
||||
})
|
||||
.map_err(InternalError::from)??;
|
||||
}
|
||||
|
||||
self.execute_prefix_databases(
|
||||
|
|
|
@ -1,8 +1,6 @@
|
|||
use std::sync::atomic::AtomicBool;
|
||||
use std::sync::Arc;
|
||||
|
||||
use grenad::CompressionType;
|
||||
use rayon::ThreadPool;
|
||||
|
||||
use crate::thread_pool_no_abort::ThreadPoolNoAbort;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct IndexerConfig {
|
||||
|
@ -12,10 +10,7 @@ pub struct IndexerConfig {
|
|||
pub max_memory: Option<usize>,
|
||||
pub chunk_compression_type: CompressionType,
|
||||
pub chunk_compression_level: Option<u32>,
|
||||
pub thread_pool: Option<ThreadPool>,
|
||||
/// Set to true if the thread pool catched a panic
|
||||
/// and we must abort the task
|
||||
pub pool_panic_catched: Arc<AtomicBool>,
|
||||
pub thread_pool: Option<ThreadPoolNoAbort>,
|
||||
pub max_positions_per_attributes: Option<u32>,
|
||||
pub skip_index_budget: bool,
|
||||
}
|
||||
|
@ -30,7 +25,6 @@ impl Default for IndexerConfig {
|
|||
chunk_compression_type: CompressionType::None,
|
||||
chunk_compression_level: None,
|
||||
thread_pool: None,
|
||||
pool_panic_catched: Arc::default(),
|
||||
max_positions_per_attributes: None,
|
||||
skip_index_budget: false,
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue