optimize /indexes route

Optimize indexes listings by not processing all indexes and only
processing the ones that are used as the result of the pagination
This commit is contained in:
ThalusA 2024-07-12 01:20:01 +02:00
parent 77b9347fff
commit 075fcc2c08
3 changed files with 62 additions and 12 deletions

View File

@ -4,7 +4,7 @@ use std::time::Duration;
use std::{fs, thread};
use meilisearch_types::heed::types::{SerdeJson, Str};
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
use meilisearch_types::heed::{Database, Env, RoIter, RoTxn, RwTxn};
use meilisearch_types::milli::update::IndexerConfig;
use meilisearch_types::milli::{FieldDistribution, Index};
use serde::{Deserialize, Serialize};
@ -111,6 +111,8 @@ pub struct IndexStats {
pub created_at: OffsetDateTime,
/// Date of the last update of the index.
pub updated_at: OffsetDateTime,
/// Primary key of the index.
pub primary_key: Option<String>,
}
impl IndexStats {
@ -127,6 +129,7 @@ impl IndexStats {
field_distribution: index.field_distribution(rtxn)?,
created_at: index.created_at(rtxn)?,
updated_at: index.updated_at(rtxn)?,
primary_key: index.primary_key(rtxn)?.map(String::from),
})
}
}
@ -416,6 +419,11 @@ impl IndexMapper {
.collect()
}
/// Return an iterator over the database entries which only lives as much as the transaction lives.
pub fn iter<'txn>(&self, rtxn: &'txn RoTxn) -> Result<RoIter<'txn, Str, UuidCodec>> {
self.index_mapping.iter(rtxn).map_err(Error::from)
}
/// Return the name of all indexes without opening them.
pub fn index_names(&self, rtxn: &RoTxn) -> Result<Vec<String>> {
self.index_mapping

View File

@ -51,7 +51,7 @@ use meilisearch_types::error::ResponseError;
use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures};
use meilisearch_types::heed::byteorder::BE;
use meilisearch_types::heed::types::{SerdeBincode, SerdeJson, Str, I128};
use meilisearch_types::heed::{self, Database, Env, PutFlags, RoTxn, RwTxn};
use meilisearch_types::heed::{self, Database, Env, PutFlags, RoIter, RoTxn, RwTxn};
use meilisearch_types::milli::documents::DocumentsBatchBuilder;
use meilisearch_types::milli::index::IndexEmbeddingConfig;
use meilisearch_types::milli::update::IndexerConfig;
@ -70,6 +70,7 @@ use uuid::Uuid;
use crate::index_mapper::IndexMapper;
use crate::utils::{check_index_swap_validity, clamp_to_page_size};
use crate::uuid_codec::UuidCodec;
pub(crate) type BEI128 = I128<BE>;
@ -415,6 +416,23 @@ impl IndexScheduler {
}
}
/// An owned type for database entries iterator and its transaction.
/// To get the inner iterator you should call .iter() on it.
pub struct IndexIterator<'txn> {
rtxn: RoTxn<'txn>,
index_mapper: &'txn IndexMapper,
}
impl<'txn> IndexIterator<'txn> {
pub fn new(rtxn: RoTxn<'txn>, index_mapper: &'txn IndexMapper) -> IndexIterator<'txn> {
Self { rtxn, index_mapper }
}
pub fn iter(&'txn self) -> Result<RoIter<'txn, Str, UuidCodec>> {
self.index_mapper.iter(&self.rtxn)
}
}
impl IndexScheduler {
/// Create an index scheduler and start its run loop.
pub fn new(
@ -691,6 +709,13 @@ impl IndexScheduler {
self.index_mapper.try_for_each_index(&rtxn, f)
}
/// Return an owned type for the database entries iterator.
/// You should call .iter() on it to get an iterator over the database entries.
pub fn iter(&self) -> Result<IndexIterator> {
let rtxn = self.env.read_txn()?;
Ok(IndexIterator::new(rtxn, &self.index_mapper))
}
/// Return the task ids matched by the given query from the index scheduler's point of view.
pub(crate) fn get_task_ids(&self, rtxn: &RoTxn, query: &Query) -> Result<RoaringBitmap> {
let ProcessingTasks {

View File

@ -99,16 +99,33 @@ pub async fn list_indexes(
) -> Result<HttpResponse, ResponseError> {
debug!(parameters = ?paginate, "List indexes");
let filters = index_scheduler.filters();
let indexes: Vec<Option<IndexView>> =
index_scheduler.try_for_each_index(|uid, index| -> Result<Option<IndexView>, _> {
if !filters.is_index_authorized(uid) {
return Ok(None);
}
Ok(Some(IndexView::new(uid.to_string(), index)?))
})?;
// Won't cause to open all indexes because IndexView doesn't keep the `Index` opened.
let indexes: Vec<IndexView> = indexes.into_iter().flatten().collect();
let ret = paginate.as_pagination().auto_paginate_sized(indexes.into_iter());
let index_iterator = index_scheduler.iter()?;
let database_iterator = index_iterator.iter()?;
let indexes = database_iterator
.filter(|res| {
res.as_ref().map(|(name, _)| filters.is_index_authorized(name)).unwrap_or(false)
})
.flat_map(|res| {
res.ok().and_then(|(name, _)| {
index_scheduler.index_stats(name).ok().map(|index| IndexView {
uid: name.to_string(),
created_at: index.inner_stats.created_at,
updated_at: index.inner_stats.updated_at,
primary_key: index.inner_stats.primary_key,
})
})
});
// The previous indexes iterator doesn't have size_hint() filled.
// In order to find how many elements there is we must create a new iterator that will only
// filter the total authorized indexes that are valid, consume it and return the number of elements.
let index_iterator = index_scheduler.iter()?;
let database_iterator = index_iterator.iter()?;
let count = database_iterator
.filter(|res| {
res.as_ref().ok().map(|(name, _)| filters.is_index_authorized(name)).unwrap_or(false)
})
.count();
let ret = paginate.as_pagination().auto_paginate_unsized(count, indexes);
debug!(returns = ?ret, "List indexes");
Ok(HttpResponse::Ok().json(ret))