optimize /indexes route

Optimize indexes listings by not processing all indexes and only processing the ones that are used as the result of the pagination
2024-11-22 12:54:26 +01:00 · 2024-07-12 01:20:01 +02:00 · 2024-07-12 01:20:01 +02:00 · 075fcc2c08
commit 075fcc2c08
parent 77b9347fff
3 changed files with 62 additions and 12 deletions
--- a/index-scheduler/src/index_mapper/mod.rs
+++ b/index-scheduler/src/index_mapper/mod.rs
@ -4,7 +4,7 @@ use std::time::Duration;
 use std::{fs, thread};

 use meilisearch_types::heed::types::{SerdeJson, Str};
-use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
+use meilisearch_types::heed::{Database, Env, RoIter, RoTxn, RwTxn};
 use meilisearch_types::milli::update::IndexerConfig;
 use meilisearch_types::milli::{FieldDistribution, Index};
 use serde::{Deserialize, Serialize};
@ -111,6 +111,8 @@ pub struct IndexStats {
    pub created_at: OffsetDateTime,
    /// Date of the last update of the index.
    pub updated_at: OffsetDateTime,
+    /// Primary key of the index.
+    pub primary_key: Option<String>,
 }

 impl IndexStats {
@ -127,6 +129,7 @@ impl IndexStats {
            field_distribution: index.field_distribution(rtxn)?,
            created_at: index.created_at(rtxn)?,
            updated_at: index.updated_at(rtxn)?,
+            primary_key: index.primary_key(rtxn)?.map(String::from),
        })
    }
 }
@ -416,6 +419,11 @@ impl IndexMapper {
            .collect()
    }

+    /// Return an iterator over the database entries which only lives as much as the transaction lives.
+    pub fn iter<'txn>(&self, rtxn: &'txn RoTxn) -> Result<RoIter<'txn, Str, UuidCodec>> {
+        self.index_mapping.iter(rtxn).map_err(Error::from)
+    }
+
    /// Return the name of all indexes without opening them.
    pub fn index_names(&self, rtxn: &RoTxn) -> Result<Vec<String>> {
        self.index_mapping
--- a/index-scheduler/src/lib.rs
+++ b/index-scheduler/src/lib.rs
@ -51,7 +51,7 @@ use meilisearch_types::error::ResponseError;
 use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures};
 use meilisearch_types::heed::byteorder::BE;
 use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str, I128};
-use meilisearch_types::heed::{self, Database, Env, PutFlags, RoTxn, RwTxn};
+use meilisearch_types::heed::{self, Database, Env, PutFlags, RoIter, RoTxn, RwTxn};
 use meilisearch_types::milli::documents::DocumentsBatchBuilder;
 use meilisearch_types::milli::index::IndexEmbeddingConfig;
 use meilisearch_types::milli::update::IndexerConfig;
@ -70,6 +70,7 @@ use uuid::Uuid;

 use crate::index_mapper::IndexMapper;
 use crate::utils::{check_index_swap_validity, clamp_to_page_size};
+use crate::uuid_codec::UuidCodec;

 pub(crate) type BEI128 = I128<BE>;

@ -415,6 +416,23 @@ impl IndexScheduler {
    }
 }

+/// An owned type for database entries iterator and its transaction.
+/// To get the inner iterator you should call .iter() on it.
+pub struct IndexIterator<'txn> {
+    rtxn: RoTxn<'txn>,
+    index_mapper: &'txn IndexMapper,
+}
+
+impl<'txn> IndexIterator<'txn> {
+    pub fn new(rtxn: RoTxn<'txn>, index_mapper: &'txn IndexMapper) -> IndexIterator<'txn> {
+        Self { rtxn, index_mapper }
+    }
+
+    pub fn iter(&'txn self) -> Result<RoIter<'txn, Str, UuidCodec>> {
+        self.index_mapper.iter(&self.rtxn)
+    }
+}
+
 impl IndexScheduler {
    /// Create an index scheduler and start its run loop.
    pub fn new(
@ -691,6 +709,13 @@ impl IndexScheduler {
        self.index_mapper.try_for_each_index(&rtxn, f)
    }

+    /// Return an owned type for the database entries iterator.
+    /// You should call .iter() on it to get an iterator over the database entries.
+    pub fn iter(&self) -> Result<IndexIterator> {
+        let rtxn = self.env.read_txn()?;
+        Ok(IndexIterator::new(rtxn, &self.index_mapper))
+    }
+
    /// Return the task ids matched by the given query from the index scheduler's point of view.
    pub(crate) fn get_task_ids(&self, rtxn: &RoTxn, query: &Query) -> Result<RoaringBitmap> {
        let ProcessingTasks {
--- a/meilisearch/src/routes/indexes/mod.rs
+++ b/meilisearch/src/routes/indexes/mod.rs
@ -99,16 +99,33 @@ pub async fn list_indexes(
 ) -> Result<HttpResponse, ResponseError> {
    debug!(parameters = ?paginate, "List indexes");
    let filters = index_scheduler.filters();
-    let indexes: Vec<Option<IndexView>> =
-        index_scheduler.try_for_each_index(|uid, index| -> Result<Option<IndexView>, _> {
-            if !filters.is_index_authorized(uid) {
-                return Ok(None);
-            }
-            Ok(Some(IndexView::new(uid.to_string(), index)?))
-        })?;
-    // Won't cause to open all indexes because IndexView doesn't keep the `Index` opened.
-    let indexes: Vec<IndexView> = indexes.into_iter().flatten().collect();
-    let ret = paginate.as_pagination().auto_paginate_sized(indexes.into_iter());
+    let index_iterator = index_scheduler.iter()?;
+    let database_iterator = index_iterator.iter()?;
+    let indexes = database_iterator
+        .filter(|res| {
+            res.as_ref().map(|(name, _)| filters.is_index_authorized(name)).unwrap_or(false)
+        })
+        .flat_map(|res| {
+            res.ok().and_then(|(name, _)| {
+                index_scheduler.index_stats(name).ok().map(|index| IndexView {
+                    uid: name.to_string(),
+                    created_at: index.inner_stats.created_at,
+                    updated_at: index.inner_stats.updated_at,
+                    primary_key: index.inner_stats.primary_key,
+                })
+            })
+        });
+    // The previous indexes iterator doesn't have size_hint() filled.
+    // In order to find how many elements there is we must create a new iterator that will only
+    // filter the total authorized indexes that are valid, consume it and return the number of elements.
+    let index_iterator = index_scheduler.iter()?;
+    let database_iterator = index_iterator.iter()?;
+    let count = database_iterator
+        .filter(|res| {
+            res.as_ref().ok().map(|(name, _)| filters.is_index_authorized(name)).unwrap_or(false)
+        })
+        .count();
+    let ret = paginate.as_pagination().auto_paginate_unsized(count, indexes);

    debug!(returns = ?ret, "List indexes");
    Ok(HttpResponse::Ok().json(ret))