Merge #3331

3331: Limit the number of concurrently opened indexes r=dureuill a=dureuill # Pull Request ## Related issue Relevant to #1841, fixes #3382 ## What does this PR do? ### User standpoint - Limit the number of concurrently opened indexes (currently, the number of indexes that can be concurrently opened is computed at startup) - When too many an index is opened, the least recently used one is closed and its virtual memory released. - This allows a user to have an arbitrary number of indexes of an arbitrary size ### Implementation standpoint - Added a LRU cache map in `index-scheduler::lru`. A more complete implementation (eg with helper functions not used here) is available but would better fit a dedicated crate. - Use the LRU cache map in the `IndexScheduler`. To simplify the lifecycle of indexes, they are never removed from the cache when they are in the middle of a resize or delete operation. To achieve this, an intermediate `Vec` stores the UUIDs of the indexes that are in the middle of such an operation. - Upon creating the index scheduler object, compute the total virtual memory that is adressable by using a dichotomic search on the max size of an index. Use this as a base to compute the number of indexes that can be open with 2TiB per index. If the virtual memory address space is lower than 2TiB, then only allow for 1 index of a fraction of that size. Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-07-03 11:57:07 +02:00 · 2023-02-23 14:20:52 +00:00 · 2023-02-23 14:20:52 +00:00 · ca25904c26
commit ca25904c26
parent b985b96e4e 71e7900c67
14 changed files with 977 additions and 189 deletions
--- a/meilisearch/src/analytics/segment_analytics.rs
+++ b/meilisearch/src/analytics/segment_analytics.rs
@ -292,7 +292,8 @@ impl From<Opt> for Infos {
            ScheduleSnapshot::Enabled(interval) => Some(interval),
        };

-        let IndexerOpts { max_indexing_memory, max_indexing_threads } = indexer_options;
+        let IndexerOpts { max_indexing_memory, max_indexing_threads, skip_index_budget: _ } =
+            indexer_options;

        // We're going to override every sensible information.
        // We consider information sensible if it contains a path, an address, or a key.
--- a/meilisearch/src/lib.rs
+++ b/meilisearch/src/lib.rs
@ -45,6 +45,34 @@ use option::ScheduleSnapshot;

 use crate::error::MeilisearchHttpError;

+/// Default number of simultaneously opened indexes.
+///
+/// This value is used when dynamic computation of how many indexes can be opened at once was skipped (e.g., in tests).
+///
+/// Lower for Windows that dedicates a smaller virtual address space to processes.
+///
+/// The value was chosen this way:
+///
+/// - Windows provides a small virtual address space of about 10TiB to processes.
+/// - The chosen value allows for indexes to use the default map size of 2TiB safely.
+#[cfg(windows)]
+const DEFAULT_INDEX_COUNT: usize = 4;
+
+/// Default number of simultaneously opened indexes.
+///
+/// This value is used when dynamic computation of how many indexes can be opened at once was skipped (e.g., in tests).
+///
+/// The higher, the better for avoiding reopening indexes.
+///
+/// The value was chosen this way:
+///
+/// - Opening an index consumes a file descriptor.
+/// - The default on many unices is about 256 file descriptors for a process.
+/// - 100 is a little bit less than half this value.
+/// - The chosen value allows for indexes to use the default map size of 2TiB safely.
+#[cfg(not(windows))]
+const DEFAULT_INDEX_COUNT: usize = 20;
+
 /// Check if a db is empty. It does not provide any information on the
 /// validity of the data in it.
 /// We consider a database as non empty when it's a non empty directory.
@ -206,9 +234,11 @@ fn open_or_create_database_unchecked(
            snapshots_path: opt.snapshot_dir.clone(),
            dumps_path: opt.dump_dir.clone(),
            task_db_size: opt.max_task_db_size.get_bytes() as usize,
-            index_size: opt.max_index_size.get_bytes() as usize,
+            index_base_map_size: opt.max_index_size.get_bytes() as usize,
            indexer_config: (&opt.indexer_options).try_into()?,
            autobatching_enabled: true,
+            index_growth_amount: byte_unit::Byte::from_str("10GiB").unwrap().get_bytes() as usize,
+            index_count: DEFAULT_INDEX_COUNT,
        })?)
    };

--- a/meilisearch/src/option.rs
+++ b/meilisearch/src/option.rs
@ -65,11 +65,11 @@ const MEILI_MAX_INDEXING_THREADS: &str = "MEILI_MAX_INDEXING_THREADS";
 const DEFAULT_LOG_EVERY_N: usize = 100_000;

 // Each environment (index and task-db) is taking space in the virtual address space.
-//
-// The size of the virtual address space is limited by the OS. About 100TB for Linux and about 10TB for Windows.
-// This means that the number of indexes is limited to about 200 for Linux and about 20 for Windows.
-pub const INDEX_SIZE: u64 = 536_870_912_000; // 500 GiB
-pub const TASK_DB_SIZE: u64 = 10_737_418_240; // 10 GiB
+// Ideally, indexes can occupy 2TiB each to avoid having to manually resize them.
+// The actual size of the virtual address space is computed at startup to determine how many 2TiB indexes can be
+// opened simultaneously.
+pub const INDEX_SIZE: u64 = 2 * 1024 * 1024 * 1024 * 1024; // 2 TiB
+pub const TASK_DB_SIZE: u64 = 10 * 1024 * 1024 * 1024; // 10 GiB

 #[derive(Debug, Default, Clone, Copy, Serialize, Deserialize)]
 #[serde(rename_all = "UPPERCASE")]
@ -494,12 +494,21 @@ pub struct IndexerOpts {
    #[clap(long, env = MEILI_MAX_INDEXING_THREADS, default_value_t)]
    #[serde(default)]
    pub max_indexing_threads: MaxThreads,
+
+    /// Whether or not we want to determine the budget of virtual memory address space we have available dynamically
+    /// (the default), or statically.
+    ///
+    /// Determining the budget of virtual memory address space dynamically takes some time on some systems (such as macOS)
+    /// and may make tests non-deterministic, so we want to skip it in tests.
+    #[clap(skip)]
+    #[serde(skip)]
+    pub skip_index_budget: bool,
 }

 impl IndexerOpts {
    /// Exports the values to their corresponding env vars if they are not set.
    pub fn export_to_env(self) {
-        let IndexerOpts { max_indexing_memory, max_indexing_threads } = self;
+        let IndexerOpts { max_indexing_memory, max_indexing_threads, skip_index_budget: _ } = self;
        if let Some(max_indexing_memory) = max_indexing_memory.0 {
            export_to_env_if_not_present(
                MEILI_MAX_INDEXING_MEMORY,
@ -527,6 +536,7 @@ impl TryFrom<&IndexerOpts> for IndexerConfig {
            max_memory: other.max_indexing_memory.map(|b| b.get_bytes() as usize),
            thread_pool: Some(thread_pool),
            max_positions_per_attributes: None,
+            skip_index_budget: other.skip_index_budget,
            ..Default::default()
        })
    }
--- a/meilisearch/src/routes/indexes/mod.rs
+++ b/meilisearch/src/routes/indexes/mod.rs
@ -61,6 +61,8 @@ pub struct IndexView {

 impl IndexView {
    fn new(uid: String, index: &Index) -> Result<IndexView, milli::Error> {
+        // It is important that this function does not keep the Index handle or a clone of it, because
+        // `list_indexes` relies on this property to avoid opening all indexes at once.
        let rtxn = index.read_txn()?;
        Ok(IndexView {
            uid,
@ -90,13 +92,15 @@ pub async fn list_indexes(
    paginate: AwebQueryParameter<ListIndexes, DeserrQueryParamError>,
 ) -> Result<HttpResponse, ResponseError> {
    let filters = index_scheduler.filters();
-    let indexes: Vec<_> = index_scheduler.indexes()?;
-    let indexes = indexes
-        .into_iter()
-        .filter(|(name, _)| filters.is_index_authorized(name))
-        .map(|(name, index)| IndexView::new(name, &index))
-        .collect::<Result<Vec<_>, _>>()?;
-
+    let indexes: Vec<Option<IndexView>> =
+        index_scheduler.try_for_each_index(|uid, index| -> Result<Option<IndexView>, _> {
+            if !filters.is_index_authorized(uid) {
+                return Ok(None);
+            }
+            Ok(Some(IndexView::new(uid.to_string(), index)?))
+        })?;
+    // Won't cause to open all indexes because IndexView doesn't keep the `Index` opened.
+    let indexes: Vec<IndexView> = indexes.into_iter().flatten().collect();
    let ret = paginate.as_pagination().auto_paginate_sized(indexes.into_iter());

    debug!("returns: {:?}", ret);
--- a/meilisearch/src/routes/mod.rs
+++ b/meilisearch/src/routes/mod.rs
@ -261,9 +261,9 @@ pub fn create_all_stats(
    )?;
    // accumulate the size of each indexes
    let processing_index = processing_task.first().and_then(|task| task.index_uid());
-    for (name, index) in index_scheduler.indexes()? {
-        if !filters.is_index_authorized(&name) {
-            continue;
+    index_scheduler.try_for_each_index(|name, index| {
+        if !filters.is_index_authorized(name) {
+            return Ok(());
        }

        database_size += index.on_disk_size()?;
@ -278,8 +278,9 @@ pub fn create_all_stats(
        let updated_at = index.updated_at(&rtxn)?;
        last_task = last_task.map_or(Some(updated_at), |last| Some(last.max(updated_at)));

-        indexes.insert(name, stats);
-    }
+        indexes.insert(name.to_string(), stats);
+        Ok(())
+    })?;

    database_size += index_scheduler.size()?;
    database_size += auth_controller.size()?;