3331: Limit the number of concurrently opened indexes r=dureuill a=dureuill

# Pull Request

## Related issue
Relevant to #1841, fixes #3382

## What does this PR do?

### User standpoint

- Limit the number of concurrently opened indexes (currently, the number of indexes that can be concurrently opened is computed at startup)
- When too many an index is opened, the least recently used one is closed and its virtual memory released.
- This allows a user to have an arbitrary number of indexes of an arbitrary size

### Implementation standpoint

- Added a LRU cache map in `index-scheduler::lru`. A more complete implementation  (eg with helper functions not used here) is available but would better fit a dedicated crate.
- Use the LRU cache map in the `IndexScheduler`. To simplify the lifecycle of indexes, they are never removed from the cache when they are in the middle of a resize or delete operation. To achieve this, an intermediate `Vec` stores the UUIDs of the indexes that are in the middle of such an operation.
- Upon creating the index scheduler object, compute the total virtual memory that is adressable by using a dichotomic search on the max size of an index. Use this as a base to compute the number of indexes that can be open with 2TiB per index. If the virtual memory address space is lower than 2TiB, then only allow for 1 index of a fraction of that size.

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
This commit is contained in:
bors[bot] 2023-02-23 14:20:52 +00:00 committed by GitHub
commit ca25904c26
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 977 additions and 189 deletions

View file

@ -292,7 +292,8 @@ impl From<Opt> for Infos {
ScheduleSnapshot::Enabled(interval) => Some(interval),
};
let IndexerOpts { max_indexing_memory, max_indexing_threads } = indexer_options;
let IndexerOpts { max_indexing_memory, max_indexing_threads, skip_index_budget: _ } =
indexer_options;
// We're going to override every sensible information.
// We consider information sensible if it contains a path, an address, or a key.

View file

@ -45,6 +45,34 @@ use option::ScheduleSnapshot;
use crate::error::MeilisearchHttpError;
/// Default number of simultaneously opened indexes.
///
/// This value is used when dynamic computation of how many indexes can be opened at once was skipped (e.g., in tests).
///
/// Lower for Windows that dedicates a smaller virtual address space to processes.
///
/// The value was chosen this way:
///
/// - Windows provides a small virtual address space of about 10TiB to processes.
/// - The chosen value allows for indexes to use the default map size of 2TiB safely.
#[cfg(windows)]
const DEFAULT_INDEX_COUNT: usize = 4;
/// Default number of simultaneously opened indexes.
///
/// This value is used when dynamic computation of how many indexes can be opened at once was skipped (e.g., in tests).
///
/// The higher, the better for avoiding reopening indexes.
///
/// The value was chosen this way:
///
/// - Opening an index consumes a file descriptor.
/// - The default on many unices is about 256 file descriptors for a process.
/// - 100 is a little bit less than half this value.
/// - The chosen value allows for indexes to use the default map size of 2TiB safely.
#[cfg(not(windows))]
const DEFAULT_INDEX_COUNT: usize = 20;
/// Check if a db is empty. It does not provide any information on the
/// validity of the data in it.
/// We consider a database as non empty when it's a non empty directory.
@ -206,9 +234,11 @@ fn open_or_create_database_unchecked(
snapshots_path: opt.snapshot_dir.clone(),
dumps_path: opt.dump_dir.clone(),
task_db_size: opt.max_task_db_size.get_bytes() as usize,
index_size: opt.max_index_size.get_bytes() as usize,
index_base_map_size: opt.max_index_size.get_bytes() as usize,
indexer_config: (&opt.indexer_options).try_into()?,
autobatching_enabled: true,
index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().get_bytes() as usize,
index_count: DEFAULT_INDEX_COUNT,
})?)
};

View file

@ -65,11 +65,11 @@ const MEILI_MAX_INDEXING_THREADS: &str = "MEILI_MAX_INDEXING_THREADS";
const DEFAULT_LOG_EVERY_N: usize = 100_000;
// Each environment (index and task-db) is taking space in the virtual address space.
//
// The size of the virtual address space is limited by the OS. About 100TB for Linux and about 10TB for Windows.
// This means that the number of indexes is limited to about 200 for Linux and about 20 for Windows.
pub const INDEX_SIZE: u64 = 536_870_912_000; // 500 GiB
pub const TASK_DB_SIZE: u64 = 10_737_418_240; // 10 GiB
// Ideally, indexes can occupy 2TiB each to avoid having to manually resize them.
// The actual size of the virtual address space is computed at startup to determine how many 2TiB indexes can be
// opened simultaneously.
pub const INDEX_SIZE: u64 = 2 * 1024 * 1024 * 1024 * 1024; // 2 TiB
pub const TASK_DB_SIZE: u64 = 10 * 1024 * 1024 * 1024; // 10 GiB
#[derive(Debug, Default, Clone, Copy, Serialize, Deserialize)]
#[serde(rename_all = "UPPERCASE")]
@ -494,12 +494,21 @@ pub struct IndexerOpts {
#[clap(long, env = MEILI_MAX_INDEXING_THREADS, default_value_t)]
#[serde(default)]
pub max_indexing_threads: MaxThreads,
/// Whether or not we want to determine the budget of virtual memory address space we have available dynamically
/// (the default), or statically.
///
/// Determining the budget of virtual memory address space dynamically takes some time on some systems (such as macOS)
/// and may make tests non-deterministic, so we want to skip it in tests.
#[clap(skip)]
#[serde(skip)]
pub skip_index_budget: bool,
}
impl IndexerOpts {
/// Exports the values to their corresponding env vars if they are not set.
pub fn export_to_env(self) {
let IndexerOpts { max_indexing_memory, max_indexing_threads } = self;
let IndexerOpts { max_indexing_memory, max_indexing_threads, skip_index_budget: _ } = self;
if let Some(max_indexing_memory) = max_indexing_memory.0 {
export_to_env_if_not_present(
MEILI_MAX_INDEXING_MEMORY,
@ -527,6 +536,7 @@ impl TryFrom<&IndexerOpts> for IndexerConfig {
max_memory: other.max_indexing_memory.map(|b| b.get_bytes() as usize),
thread_pool: Some(thread_pool),
max_positions_per_attributes: None,
skip_index_budget: other.skip_index_budget,
..Default::default()
})
}

View file

@ -61,6 +61,8 @@ pub struct IndexView {
impl IndexView {
fn new(uid: String, index: &Index) -> Result<IndexView, milli::Error> {
// It is important that this function does not keep the Index handle or a clone of it, because
// `list_indexes` relies on this property to avoid opening all indexes at once.
let rtxn = index.read_txn()?;
Ok(IndexView {
uid,
@ -90,13 +92,15 @@ pub async fn list_indexes(
paginate: AwebQueryParameter<ListIndexes, DeserrQueryParamError>,
) -> Result<HttpResponse, ResponseError> {
let filters = index_scheduler.filters();
let indexes: Vec<_> = index_scheduler.indexes()?;
let indexes = indexes
.into_iter()
.filter(|(name, _)| filters.is_index_authorized(name))
.map(|(name, index)| IndexView::new(name, &index))
.collect::<Result<Vec<_>, _>>()?;
let indexes: Vec<Option<IndexView>> =
index_scheduler.try_for_each_index(|uid, index| -> Result<Option<IndexView>, _> {
if !filters.is_index_authorized(uid) {
return Ok(None);
}
Ok(Some(IndexView::new(uid.to_string(), index)?))
})?;
// Won't cause to open all indexes because IndexView doesn't keep the `Index` opened.
let indexes: Vec<IndexView> = indexes.into_iter().flatten().collect();
let ret = paginate.as_pagination().auto_paginate_sized(indexes.into_iter());
debug!("returns: {:?}", ret);

View file

@ -261,9 +261,9 @@ pub fn create_all_stats(
)?;
// accumulate the size of each indexes
let processing_index = processing_task.first().and_then(|task| task.index_uid());
for (name, index) in index_scheduler.indexes()? {
if !filters.is_index_authorized(&name) {
continue;
index_scheduler.try_for_each_index(|name, index| {
if !filters.is_index_authorized(name) {
return Ok(());
}
database_size += index.on_disk_size()?;
@ -278,8 +278,9 @@ pub fn create_all_stats(
let updated_at = index.updated_at(&rtxn)?;
last_task = last_task.map_or(Some(updated_at), |last| Some(last.max(updated_at)));
indexes.insert(name, stats);
}
indexes.insert(name.to_string(), stats);
Ok(())
})?;
database_size += index_scheduler.size()?;
database_size += auth_controller.size()?;

View file

@ -205,6 +205,7 @@ pub fn default_settings(dir: impl AsRef<Path>) -> Opt {
indexer_options: IndexerOpts {
// memory has to be unlimited because several meilisearch are running in test context.
max_indexing_memory: MaxMemory::unlimited(),
skip_index_budget: true,
..Parser::parse_from(None as Option<&str>)
},
#[cfg(feature = "metrics")]