diff --git a/crates/index-scheduler/src/index_mapper/mod.rs b/crates/index-scheduler/src/index_mapper/mod.rs index 2f5b176ed..98272542b 100644 --- a/crates/index-scheduler/src/index_mapper/mod.rs +++ b/crates/index-scheduler/src/index_mapper/mod.rs @@ -106,6 +106,8 @@ pub struct IndexStats { /// As the DB backend does not return to the disk the pages that are not currently used by the DB, /// this value is typically smaller than `database_size`. pub used_database_size: u64, + /// The primary key of the index + pub primary_key: Option, /// Association of every field name with the number of times it occurs in the documents. pub field_distribution: FieldDistribution, /// Creation date of the index. @@ -127,6 +129,7 @@ impl IndexStats { number_of_documents: index.number_of_documents(rtxn)?, database_size: index.on_disk_size()?, used_database_size: index.used_size()?, + primary_key: index.primary_key(rtxn)?.map(|s| s.to_string()), field_distribution: index.field_distribution(rtxn)?, created_at: index.created_at(rtxn)?, updated_at: index.updated_at(rtxn)?, diff --git a/crates/index-scheduler/src/lib.rs b/crates/index-scheduler/src/lib.rs index f5f73087d..8bceaddf6 100644 --- a/crates/index-scheduler/src/lib.rs +++ b/crates/index-scheduler/src/lib.rs @@ -30,7 +30,7 @@ mod processing; mod utils; pub mod uuid_codec; -pub type Result = std::result::Result; +pub type Result = std::result::Result; pub type TaskId = u32; use std::collections::{BTreeMap, HashMap}; @@ -1121,6 +1121,49 @@ impl IndexScheduler { Ok(batches) } + /// Returns the total number of indexes available for the specified filter. + /// And a `Vec` of the index_uid + its stats + pub fn get_paginated_indexes_stats( + &self, + filters: &meilisearch_auth::AuthFilter, + from: usize, + limit: usize, + ) -> Result<(usize, Vec<(String, index_mapper::IndexStats)>)> { + let rtxn = self.read_txn()?; + + let mut total = 0; + let mut iter = self + .index_mapper + .index_mapping + .iter(&rtxn)? + // in case of an error we want to keep the value to return it + .filter(|ret| { + ret.as_ref().map_or(true, |(name, _uuid)| filters.is_index_authorized(name)) + }) + .inspect(|_| total += 1) + .skip(from); + let ret = iter + .by_ref() + .take(limit) + .map(|ret| ret.map_err(Error::from)) + .map(|ret| { + ret.and_then(|(name, uuid)| { + self.index_mapper.index_stats.get(&rtxn, &uuid).map_err(Error::from).and_then( + |stat| { + stat.map(|stat| (name.to_string(), stat)) + .ok_or(Error::CorruptedTaskQueue) + }, + ) + }) + }) + .collect::>>(); + + // We must iterate on the rest of the indexes to compute the total + iter.for_each(drop); + + ret.map(|ret| (total, ret)) + } + /// The returned structure contains: /// 1. The name of the property being observed can be `statuses`, `types`, or `indexes`. /// 2. The name of the specific data related to the property can be `enqueued` for the `statuses`, `settingsUpdate` for the `types`, or the name of the index for the `indexes`, for example. @@ -1497,6 +1540,19 @@ impl IndexScheduler { Ok(index) } + pub fn refresh_index_stats(&self, name: &str) -> Result<()> { + let mut mapper_wtxn = self.env.write_txn()?; + let index = self.index_mapper.index(&mapper_wtxn, name)?; + let index_rtxn = index.read_txn()?; + + let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn) + .map_err(|e| Error::from_milli(e, Some(name.to_string())))?; + + self.index_mapper.store_stats_of(&mut mapper_wtxn, name, &stats)?; + mapper_wtxn.commit()?; + Ok(()) + } + /// Create a file and register it in the index scheduler. /// /// The returned file and uuid can be used to associate diff --git a/crates/meilisearch/src/lib.rs b/crates/meilisearch/src/lib.rs index 88d3419e3..9e6e45836 100644 --- a/crates/meilisearch/src/lib.rs +++ b/crates/meilisearch/src/lib.rs @@ -435,7 +435,7 @@ fn import_dump( let reader = DocumentsBatchReader::from_reader(reader)?; let embedder_configs = index.embedding_configs(&wtxn)?; - let embedders = index_scheduler.embedders(uid, embedder_configs)?; + let embedders = index_scheduler.embedders(uid.to_string(), embedder_configs)?; let builder = milli::update::IndexDocuments::new( &mut wtxn, @@ -457,6 +457,8 @@ fn import_dump( builder.execute()?; wtxn.commit()?; tracing::info!("All documents successfully imported."); + + index_scheduler.refresh_index_stats(&uid)?; } let mut index_scheduler_dump = index_scheduler.register_dumped_task()?; diff --git a/crates/meilisearch/src/routes/indexes/mod.rs b/crates/meilisearch/src/routes/indexes/mod.rs index 1355ac6c4..26a6569e7 100644 --- a/crates/meilisearch/src/routes/indexes/mod.rs +++ b/crates/meilisearch/src/routes/indexes/mod.rs @@ -5,7 +5,7 @@ use actix_web::web::Data; use actix_web::{web, HttpRequest, HttpResponse}; use deserr::actix_web::{AwebJson, AwebQueryParameter}; use deserr::{DeserializeError, Deserr, ValuePointerRef}; -use index_scheduler::{Error, IndexScheduler}; +use index_scheduler::IndexScheduler; use meilisearch_types::deserr::query_params::Param; use meilisearch_types::deserr::{immutable_field_error, DeserrJsonError, DeserrQueryParamError}; use meilisearch_types::error::deserr_codes::*; @@ -104,19 +104,18 @@ pub async fn list_indexes( ) -> Result { debug!(parameters = ?paginate, "List indexes"); let filters = index_scheduler.filters(); - let indexes: Vec> = - index_scheduler.try_for_each_index(|uid, index| -> Result, _> { - if !filters.is_index_authorized(uid) { - return Ok(None); - } - Ok(Some( - IndexView::new(uid.to_string(), index) - .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?, - )) - })?; - // Won't cause to open all indexes because IndexView doesn't keep the `Index` opened. - let indexes: Vec = indexes.into_iter().flatten().collect(); - let ret = paginate.as_pagination().auto_paginate_sized(indexes.into_iter()); + let (total, indexes) = + index_scheduler.get_paginated_indexes_stats(filters, *paginate.offset, *paginate.limit)?; + let indexes = indexes + .into_iter() + .map(|(name, stats)| IndexView { + uid: name, + created_at: stats.created_at, + updated_at: stats.updated_at, + primary_key: stats.primary_key, + }) + .collect::>(); + let ret = paginate.as_pagination().format_with(total, indexes); debug!(returns = ?ret, "List indexes"); Ok(HttpResponse::Ok().json(ret))