mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-22 19:27:27 +01:00
Eagerly compute stats as fallback to the cache.
- Refactor all around to avoid spawning indexes more times than necessary
This commit is contained in:
parent
3bbf760542
commit
076a3d371c
@ -847,8 +847,10 @@ impl IndexScheduler {
|
||||
// this is a non-critical operation. If it fails, we should not fail
|
||||
// the entire batch.
|
||||
let res = || -> Result<()> {
|
||||
let index_rtxn = index.read_txn()?;
|
||||
let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn)?;
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
self.index_mapper.compute_and_store_stats_of(&mut wtxn, &index_uid)?;
|
||||
self.index_mapper.store_stats_of(&mut wtxn, &index_uid, stats)?;
|
||||
wtxn.commit()?;
|
||||
Ok(())
|
||||
}();
|
||||
@ -888,6 +890,10 @@ impl IndexScheduler {
|
||||
)?;
|
||||
index_wtxn.commit()?;
|
||||
}
|
||||
|
||||
// drop rtxn before starting a new wtxn on the same db
|
||||
rtxn.commit()?;
|
||||
|
||||
task.status = Status::Succeeded;
|
||||
task.details = Some(Details::IndexInfo { primary_key });
|
||||
|
||||
@ -897,7 +903,9 @@ impl IndexScheduler {
|
||||
// the entire batch.
|
||||
let res = || -> Result<()> {
|
||||
let mut wtxn = self.env.write_txn()?;
|
||||
self.index_mapper.compute_and_store_stats_of(&mut wtxn, &index_uid)?;
|
||||
let index_rtxn = index.read_txn()?;
|
||||
let stats = crate::index_mapper::IndexStats::new(&index, &index_rtxn)?;
|
||||
self.index_mapper.store_stats_of(&mut wtxn, &index_uid, stats)?;
|
||||
wtxn.commit()?;
|
||||
Ok(())
|
||||
}();
|
||||
|
@ -54,8 +54,11 @@ pub struct IndexMapper {
|
||||
|
||||
/// Map an index name with an index uuid currently available on disk.
|
||||
pub(crate) index_mapping: Database<Str, UuidCodec>,
|
||||
/// Map an index name with the cached stats associated to the index.
|
||||
pub(crate) index_stats: Database<Str, SerdeJson<IndexStats>>,
|
||||
/// Map an index UUID with the cached stats associated to the index.
|
||||
///
|
||||
/// Using an UUID forces to use the index_mapping table to recover the index behind a name, ensuring
|
||||
/// consistency wrt index swapping.
|
||||
pub(crate) index_stats: Database<UuidCodec, SerdeJson<IndexStats>>,
|
||||
|
||||
/// Path to the folder where the LMDB environments of each index are.
|
||||
base_path: PathBuf,
|
||||
@ -80,15 +83,39 @@ pub enum IndexStatus {
|
||||
Available(Index),
|
||||
}
|
||||
|
||||
/// The statistics that can be computed from an `Index` object.
|
||||
#[derive(Serialize, Deserialize, Debug)]
|
||||
pub struct IndexStats {
|
||||
/// Number of documents in the index.
|
||||
pub number_of_documents: u64,
|
||||
/// Size of the index' DB, in bytes.
|
||||
pub database_size: u64,
|
||||
/// Association of every field name with the number of times it occurs in the documents.
|
||||
pub field_distribution: FieldDistribution,
|
||||
/// Creation date of the index.
|
||||
pub created_at: OffsetDateTime,
|
||||
/// Date of the last update of the index.
|
||||
pub updated_at: OffsetDateTime,
|
||||
}
|
||||
|
||||
impl IndexStats {
|
||||
/// Compute the stats of an index
|
||||
///
|
||||
/// # Parameters
|
||||
///
|
||||
/// - rtxn: a RO transaction for the index, obtained from `Index::read_txn()`.
|
||||
pub fn new(index: &Index, rtxn: &RoTxn) -> Result<Self> {
|
||||
let database_size = index.on_disk_size()?;
|
||||
Ok(IndexStats {
|
||||
number_of_documents: index.number_of_documents(rtxn)?,
|
||||
database_size,
|
||||
field_distribution: index.field_distribution(rtxn)?,
|
||||
created_at: index.created_at(rtxn)?,
|
||||
updated_at: index.updated_at(rtxn)?,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl IndexMapper {
|
||||
pub fn new(
|
||||
env: &Env,
|
||||
@ -149,12 +176,14 @@ impl IndexMapper {
|
||||
/// Removes the index from the mapping table and the in-memory index map
|
||||
/// but keeps the associated tasks.
|
||||
pub fn delete_index(&self, mut wtxn: RwTxn, name: &str) -> Result<()> {
|
||||
self.index_stats.delete(&mut wtxn, name)?;
|
||||
let uuid = self
|
||||
.index_mapping
|
||||
.get(&wtxn, name)?
|
||||
.ok_or_else(|| Error::IndexNotFound(name.to_string()))?;
|
||||
|
||||
// Not an error if the index had no stats in cache.
|
||||
self.index_stats.delete(&mut wtxn, &uuid)?;
|
||||
|
||||
// Once we retrieved the UUID of the index we remove it from the mapping table.
|
||||
assert!(self.index_mapping.delete(&mut wtxn, name)?);
|
||||
|
||||
@ -375,26 +404,42 @@ impl IndexMapper {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Return the stored stats of an index.
|
||||
/// The stats of an index.
|
||||
///
|
||||
/// If available in the cache, they are directly returned.
|
||||
/// Otherwise, the `Index` is opened to compute the stats on the fly (the result is not cached).
|
||||
/// The stats for an index are cached after each `Index` update.
|
||||
pub fn stats_of(&self, rtxn: &RoTxn, index_uid: &str) -> Result<IndexStats> {
|
||||
self.index_stats
|
||||
let uuid = self
|
||||
.index_mapping
|
||||
.get(rtxn, index_uid)?
|
||||
.ok_or_else(|| Error::IndexNotFound(index_uid.to_string()))
|
||||
.ok_or_else(|| Error::IndexNotFound(index_uid.to_string()))?;
|
||||
|
||||
match self.index_stats.get(rtxn, &uuid)? {
|
||||
Some(stats) => Ok(stats),
|
||||
None => {
|
||||
let index = self.index(rtxn, index_uid)?;
|
||||
let index_rtxn = index.read_txn()?;
|
||||
IndexStats::new(&index, &index_rtxn)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Return the stats of an index and write it in the index-mapper database.
|
||||
pub fn compute_and_store_stats_of(&self, wtxn: &mut RwTxn, index_uid: &str) -> Result<()> {
|
||||
let index = self.index(wtxn, index_uid)?;
|
||||
let database_size = index.on_disk_size()?;
|
||||
let rtxn = index.read_txn()?;
|
||||
let stats = IndexStats {
|
||||
number_of_documents: index.number_of_documents(&rtxn)?,
|
||||
database_size,
|
||||
field_distribution: index.field_distribution(&rtxn)?,
|
||||
created_at: index.created_at(&rtxn)?,
|
||||
updated_at: index.updated_at(&rtxn)?,
|
||||
};
|
||||
self.index_stats.put(wtxn, index_uid, &stats)?;
|
||||
/// Stores the new stats for an index.
|
||||
///
|
||||
/// Expected usage is to compute the stats the index using `IndexStats::new`, the pass it to this function.
|
||||
pub fn store_stats_of(
|
||||
&self,
|
||||
wtxn: &mut RwTxn,
|
||||
index_uid: &str,
|
||||
stats: IndexStats,
|
||||
) -> Result<()> {
|
||||
let uuid = self
|
||||
.index_mapping
|
||||
.get(wtxn, index_uid)?
|
||||
.ok_or_else(|| Error::IndexNotFound(index_uid.to_string()))?;
|
||||
|
||||
self.index_stats.put(wtxn, &uuid, &stats)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
@ -1245,9 +1245,14 @@ struct IndexBudget {
|
||||
task_db_size: usize,
|
||||
}
|
||||
|
||||
/// The statistics that can be computed from an `Index` object and the scheduler.
|
||||
///
|
||||
/// Compared with `index_mapper::IndexStats`, it adds the scheduling status.
|
||||
#[derive(Debug)]
|
||||
pub struct IndexStats {
|
||||
/// Whether this index is currently performing indexation, according to the scheduler.
|
||||
pub is_indexing: bool,
|
||||
/// Internal stats computed from the index.
|
||||
pub inner_stats: index_mapper::IndexStats,
|
||||
}
|
||||
|
||||
|
@ -220,11 +220,15 @@ pub async fn delete_index(
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
/// Stats of an `Index`, as known to the `stats` route.
|
||||
#[derive(Serialize, Debug)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct IndexStats {
|
||||
/// Number of documents in the index
|
||||
pub number_of_documents: u64,
|
||||
/// Whether the index is currently performing indexation, according to the scheduler.
|
||||
pub is_indexing: bool,
|
||||
/// Association of every field name with the number of times it occurs in the documents.
|
||||
pub field_distribution: FieldDistribution,
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user