5325: Documents database stats r=irevoire a=ManyTheFish

# Pull Request

## Related issue
Fixes #5319

## List

- Create a DatabaseStats struct
- Compute and store the documents database stats in the IndexStats
- Force dumpless upgrade to update the index stats
- when a document addition/modification/deletion is made, we only recompute the database stats on the added/modified/deleted documents

Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: Many the fish <many@meilisearch.com>
This commit is contained in:
meili-bors[bot] 2025-02-26 10:03:45 +00:00 committed by GitHub
commit f296c325ad
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
20 changed files with 295 additions and 16 deletions

View file

@ -6,6 +6,7 @@ use std::{fs, thread};
use meilisearch_types::heed::types::{SerdeJson, Str};
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
use meilisearch_types::milli;
use meilisearch_types::milli::database_stats::DatabaseStats;
use meilisearch_types::milli::update::IndexerConfig;
use meilisearch_types::milli::{FieldDistribution, Index};
use serde::{Deserialize, Serialize};
@ -98,8 +99,9 @@ pub enum IndexStatus {
/// The statistics that can be computed from an `Index` object.
#[derive(Serialize, Deserialize, Debug)]
pub struct IndexStats {
/// Number of documents in the index.
pub number_of_documents: u64,
/// Stats of the documents database.
#[serde(default)]
pub documents_database_stats: DatabaseStats,
/// Size taken up by the index' DB, in bytes.
///
/// This includes the size taken by both the used and free pages of the DB, and as the free pages
@ -138,9 +140,9 @@ impl IndexStats {
pub fn new(index: &Index, rtxn: &RoTxn) -> milli::Result<Self> {
let arroy_stats = index.arroy_stats(rtxn)?;
Ok(IndexStats {
number_of_documents: index.number_of_documents(rtxn)?,
number_of_embeddings: Some(arroy_stats.number_of_embeddings),
number_of_embedded_documents: Some(arroy_stats.documents.len()),
documents_database_stats: index.documents_stats(rtxn)?.unwrap_or_default(),
database_size: index.on_disk_size()?,
used_database_size: index.used_size()?,
primary_key: index.primary_key(rtxn)?.map(|s| s.to_string()),

View file

@ -365,7 +365,8 @@ pub fn snapshot_index_mapper(rtxn: &RoTxn, mapper: &IndexMapper) -> String {
let stats = mapper.stats_of(rtxn, &name).unwrap();
s.push_str(&format!(
"{name}: {{ number_of_documents: {}, field_distribution: {:?} }}\n",
stats.number_of_documents, stats.field_distribution
stats.documents_database_stats.number_of_entries(),
stats.field_distribution
));
}

View file

@ -910,7 +910,11 @@ fn create_and_list_index() {
[
"kefir",
{
"number_of_documents": 0,
"documents_database_stats": {
"numberOfEntries": 0,
"totalKeySize": 0,
"totalValueSize": 0
},
"database_size": "[bytes]",
"number_of_embeddings": 0,
"number_of_embedded_documents": 0,