Merge pull request #5527 from nnethercott/all-cpus-in-import-dump

Use all CPUs during an import dump
This commit is contained in:
Many the fish 2025-06-02 15:24:59 +00:00 committed by GitHub
commit d5526cffff
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 79 additions and 107 deletions

View file

@ -37,7 +37,9 @@ use index_scheduler::{IndexScheduler, IndexSchedulerOptions};
use meilisearch_auth::{open_auth_store_env, AuthController};
use meilisearch_types::milli::constants::VERSION_MAJOR;
use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
use meilisearch_types::milli::update::{IndexDocumentsConfig, IndexDocumentsMethod};
use meilisearch_types::milli::update::{
default_thread_pool_and_threads, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig,
};
use meilisearch_types::settings::apply_settings_to_builder;
use meilisearch_types::tasks::KindWithContent;
use meilisearch_types::versioning::{
@ -500,7 +502,19 @@ fn import_dump(
let network = dump_reader.network()?.cloned().unwrap_or_default();
index_scheduler.put_network(network)?;
let indexer_config = index_scheduler.indexer_config();
// 3.1 Use all cpus to process dump if `max_indexing_threads` not configured
let backup_config;
let base_config = index_scheduler.indexer_config();
let indexer_config = if base_config.max_threads.is_none() {
let (thread_pool, _) = default_thread_pool_and_threads();
let _config = IndexerConfig { thread_pool, ..*base_config };
backup_config = _config;
&backup_config
} else {
base_config
};
// /!\ The tasks must be imported AFTER importing the indexes or else the scheduler might
// try to process tasks while we're trying to import the indexes.

View file

@ -746,10 +746,12 @@ impl IndexerOpts {
max_indexing_memory.to_string(),
);
}
export_to_env_if_not_present(
MEILI_MAX_INDEXING_THREADS,
max_indexing_threads.0.to_string(),
);
if let Some(max_indexing_threads) = max_indexing_threads.0 {
export_to_env_if_not_present(
MEILI_MAX_INDEXING_THREADS,
max_indexing_threads.to_string(),
);
}
}
}
@ -757,15 +759,15 @@ impl TryFrom<&IndexerOpts> for IndexerConfig {
type Error = anyhow::Error;
fn try_from(other: &IndexerOpts) -> Result<Self, Self::Error> {
let thread_pool = ThreadPoolNoAbortBuilder::new()
.thread_name(|index| format!("indexing-thread:{index}"))
.num_threads(*other.max_indexing_threads)
let thread_pool = ThreadPoolNoAbortBuilder::new_for_indexing()
.num_threads(other.max_indexing_threads.unwrap_or_else(|| num_cpus::get() / 2))
.build()?;
Ok(Self {
thread_pool,
log_every_n: Some(DEFAULT_LOG_EVERY_N),
max_memory: other.max_indexing_memory.map(|b| b.as_u64() as usize),
thread_pool: Some(thread_pool),
max_threads: *other.max_indexing_threads,
max_positions_per_attributes: None,
skip_index_budget: other.skip_index_budget,
..Default::default()
@ -828,31 +830,31 @@ fn total_memory_bytes() -> Option<u64> {
}
}
#[derive(Debug, Clone, Copy, Deserialize, Serialize)]
pub struct MaxThreads(usize);
#[derive(Default, Debug, Clone, Copy, Deserialize, Serialize)]
pub struct MaxThreads(Option<usize>);
impl FromStr for MaxThreads {
type Err = ParseIntError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
usize::from_str(s).map(Self)
}
}
impl Default for MaxThreads {
fn default() -> Self {
MaxThreads(num_cpus::get() / 2)
fn from_str(s: &str) -> Result<MaxThreads, Self::Err> {
if s.is_empty() || s == "unlimited" {
return Ok(MaxThreads::default());
}
usize::from_str(s).map(Some).map(MaxThreads)
}
}
impl fmt::Display for MaxThreads {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.0)
match self.0 {
Some(threads) => write!(f, "{}", threads),
None => write!(f, "unlimited"),
}
}
}
impl Deref for MaxThreads {
type Target = usize;
type Target = Option<usize>;
fn deref(&self) -> &Self::Target {
&self.0