fix list indexes

This commit is contained in:
Tamo 2024-12-12 20:42:03 +01:00
parent c177210b1b
commit 52bd5740ee
No known key found for this signature in database
GPG Key ID: 20CD8020AFA88D69
4 changed files with 78 additions and 16 deletions

View File

@ -106,6 +106,8 @@ pub struct IndexStats {
/// As the DB backend does not return to the disk the pages that are not currently used by the DB, /// As the DB backend does not return to the disk the pages that are not currently used by the DB,
/// this value is typically smaller than `database_size`. /// this value is typically smaller than `database_size`.
pub used_database_size: u64, pub used_database_size: u64,
/// The primary key of the index
pub primary_key: Option<String>,
/// Association of every field name with the number of times it occurs in the documents. /// Association of every field name with the number of times it occurs in the documents.
pub field_distribution: FieldDistribution, pub field_distribution: FieldDistribution,
/// Creation date of the index. /// Creation date of the index.
@ -127,6 +129,7 @@ impl IndexStats {
number_of_documents: index.number_of_documents(rtxn)?, number_of_documents: index.number_of_documents(rtxn)?,
database_size: index.on_disk_size()?, database_size: index.on_disk_size()?,
used_database_size: index.used_size()?, used_database_size: index.used_size()?,
primary_key: index.primary_key(rtxn)?.map(|s| s.to_string()),
field_distribution: index.field_distribution(rtxn)?, field_distribution: index.field_distribution(rtxn)?,
created_at: index.created_at(rtxn)?, created_at: index.created_at(rtxn)?,
updated_at: index.updated_at(rtxn)?, updated_at: index.updated_at(rtxn)?,

View File

@ -30,7 +30,7 @@ mod processing;
mod utils; mod utils;
pub mod uuid_codec; pub mod uuid_codec;
pub type Result<T> = std::result::Result<T, Error>; pub type Result<T, E = Error> = std::result::Result<T, E>;
pub type TaskId = u32; pub type TaskId = u32;
use std::collections::{BTreeMap, HashMap}; use std::collections::{BTreeMap, HashMap};
@ -1121,6 +1121,49 @@ impl IndexScheduler {
Ok(batches) Ok(batches)
} }
/// Returns the total number of indexes available for the specified filter.
/// And a `Vec` of the index_uid + its stats
pub fn get_paginated_indexes_stats(
&self,
filters: &meilisearch_auth::AuthFilter,
from: usize,
limit: usize,
) -> Result<(usize, Vec<(String, index_mapper::IndexStats)>)> {
let rtxn = self.read_txn()?;
let mut total = 0;
let mut iter = self
.index_mapper
.index_mapping
.iter(&rtxn)?
// in case of an error we want to keep the value to return it
.filter(|ret| {
ret.as_ref().map_or(true, |(name, _uuid)| filters.is_index_authorized(name))
})
.inspect(|_| total += 1)
.skip(from);
let ret = iter
.by_ref()
.take(limit)
.map(|ret| ret.map_err(Error::from))
.map(|ret| {
ret.and_then(|(name, uuid)| {
self.index_mapper.index_stats.get(&rtxn, &uuid).map_err(Error::from).and_then(
|stat| {
stat.map(|stat| (name.to_string(), stat))
.ok_or(Error::CorruptedTaskQueue)
},
)
})
})
.collect::<Result<Vec<(String, index_mapper::IndexStats)>>>();
// We must iterate on the rest of the indexes to compute the total
iter.for_each(drop);
ret.map(|ret| (total, ret))
}
/// The returned structure contains: /// The returned structure contains:
/// 1. The name of the property being observed can be `statuses`, `types`, or `indexes`. /// 1. The name of the property being observed can be `statuses`, `types`, or `indexes`.
/// 2. The name of the specific data related to the property can be `enqueued` for the `statuses`, `settingsUpdate` for the `types`, or the name of the index for the `indexes`, for example. /// 2. The name of the specific data related to the property can be `enqueued` for the `statuses`, `settingsUpdate` for the `types`, or the name of the index for the `indexes`, for example.
@ -1497,6 +1540,19 @@ impl IndexScheduler {
Ok(index) Ok(index)
} }
pub fn refresh_index_stats(&self, name: &str) -> Result<()> {
let mut mapper_wtxn = self.env.write_txn()?;
let index = self.index_mapper.index(&mapper_wtxn, name)?;
let index_rtxn = index.read_txn()?;
let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn)
.map_err(|e| Error::from_milli(e, Some(name.to_string())))?;
self.index_mapper.store_stats_of(&mut mapper_wtxn, name, &stats)?;
mapper_wtxn.commit()?;
Ok(())
}
/// Create a file and register it in the index scheduler. /// Create a file and register it in the index scheduler.
/// ///
/// The returned file and uuid can be used to associate /// The returned file and uuid can be used to associate

View File

@ -433,7 +433,7 @@ fn import_dump(
let reader = DocumentsBatchReader::from_reader(reader)?; let reader = DocumentsBatchReader::from_reader(reader)?;
let embedder_configs = index.embedding_configs(&wtxn)?; let embedder_configs = index.embedding_configs(&wtxn)?;
let embedders = index_scheduler.embedders(uid, embedder_configs)?; let embedders = index_scheduler.embedders(uid.to_string(), embedder_configs)?;
let builder = milli::update::IndexDocuments::new( let builder = milli::update::IndexDocuments::new(
&mut wtxn, &mut wtxn,
@ -455,6 +455,8 @@ fn import_dump(
builder.execute()?; builder.execute()?;
wtxn.commit()?; wtxn.commit()?;
tracing::info!("All documents successfully imported."); tracing::info!("All documents successfully imported.");
index_scheduler.refresh_index_stats(&uid)?;
} }
let mut index_scheduler_dump = index_scheduler.register_dumped_task()?; let mut index_scheduler_dump = index_scheduler.register_dumped_task()?;

View File

@ -5,7 +5,7 @@ use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse}; use actix_web::{web, HttpRequest, HttpResponse};
use deserr::actix_web::{AwebJson, AwebQueryParameter}; use deserr::actix_web::{AwebJson, AwebQueryParameter};
use deserr::{DeserializeError, Deserr, ValuePointerRef}; use deserr::{DeserializeError, Deserr, ValuePointerRef};
use index_scheduler::{Error, IndexScheduler}; use index_scheduler::IndexScheduler;
use meilisearch_types::deserr::query_params::Param; use meilisearch_types::deserr::query_params::Param;
use meilisearch_types::deserr::{immutable_field_error, DeserrJsonError, DeserrQueryParamError}; use meilisearch_types::deserr::{immutable_field_error, DeserrJsonError, DeserrQueryParamError};
use meilisearch_types::error::deserr_codes::*; use meilisearch_types::error::deserr_codes::*;
@ -102,19 +102,20 @@ pub async fn list_indexes(
) -> Result<HttpResponse, ResponseError> { ) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?paginate, "List indexes"); debug!(parameters = ?paginate, "List indexes");
let filters = index_scheduler.filters(); let filters = index_scheduler.filters();
let indexes: Vec<Option<IndexView>> = dbg!("here");
index_scheduler.try_for_each_index(|uid, index| -> Result<Option<IndexView>, _> { let (total, indexes) =
if !filters.is_index_authorized(uid) { index_scheduler.get_paginated_indexes_stats(filters, *paginate.offset, *paginate.limit)?;
return Ok(None); dbg!("hore");
} let indexes = indexes
Ok(Some( .into_iter()
IndexView::new(uid.to_string(), index) .map(|(name, stats)| IndexView {
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?, uid: name,
)) created_at: stats.created_at,
})?; updated_at: stats.updated_at,
// Won't cause to open all indexes because IndexView doesn't keep the `Index` opened. primary_key: stats.primary_key,
let indexes: Vec<IndexView> = indexes.into_iter().flatten().collect(); })
let ret = paginate.as_pagination().auto_paginate_sized(indexes.into_iter()); .collect::<Vec<_>>();
let ret = paginate.as_pagination().format_with(total, indexes);
debug!(returns = ?ret, "List indexes"); debug!(returns = ?ret, "List indexes");
Ok(HttpResponse::Ok().json(ret)) Ok(HttpResponse::Ok().json(ret))