mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-02-11 21:13:32 +01:00
Add document database stats
This commit is contained in:
parent
4abf0db0b4
commit
b5dc971afe
@ -6,6 +6,7 @@ use std::{fs, thread};
|
|||||||
use meilisearch_types::heed::types::{SerdeJson, Str};
|
use meilisearch_types::heed::types::{SerdeJson, Str};
|
||||||
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
|
use meilisearch_types::heed::{Database, Env, RoTxn, RwTxn};
|
||||||
use meilisearch_types::milli;
|
use meilisearch_types::milli;
|
||||||
|
use meilisearch_types::milli::database_stats::DatabaseStats;
|
||||||
use meilisearch_types::milli::update::IndexerConfig;
|
use meilisearch_types::milli::update::IndexerConfig;
|
||||||
use meilisearch_types::milli::{FieldDistribution, Index};
|
use meilisearch_types::milli::{FieldDistribution, Index};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
@ -98,8 +99,9 @@ pub enum IndexStatus {
|
|||||||
/// The statistics that can be computed from an `Index` object.
|
/// The statistics that can be computed from an `Index` object.
|
||||||
#[derive(Serialize, Deserialize, Debug)]
|
#[derive(Serialize, Deserialize, Debug)]
|
||||||
pub struct IndexStats {
|
pub struct IndexStats {
|
||||||
/// Number of documents in the index.
|
/// Stats of the documents database.
|
||||||
pub number_of_documents: u64,
|
#[serde(default)]
|
||||||
|
pub documents_database_stats: DatabaseStats,
|
||||||
/// Size taken up by the index' DB, in bytes.
|
/// Size taken up by the index' DB, in bytes.
|
||||||
///
|
///
|
||||||
/// This includes the size taken by both the used and free pages of the DB, and as the free pages
|
/// This includes the size taken by both the used and free pages of the DB, and as the free pages
|
||||||
@ -131,7 +133,7 @@ impl IndexStats {
|
|||||||
/// - rtxn: a RO transaction for the index, obtained from `Index::read_txn()`.
|
/// - rtxn: a RO transaction for the index, obtained from `Index::read_txn()`.
|
||||||
pub fn new(index: &Index, rtxn: &RoTxn) -> milli::Result<Self> {
|
pub fn new(index: &Index, rtxn: &RoTxn) -> milli::Result<Self> {
|
||||||
Ok(IndexStats {
|
Ok(IndexStats {
|
||||||
number_of_documents: index.number_of_documents(rtxn)?,
|
documents_database_stats: index.documents_database_stats(rtxn)?,
|
||||||
database_size: index.on_disk_size()?,
|
database_size: index.on_disk_size()?,
|
||||||
used_database_size: index.used_size()?,
|
used_database_size: index.used_size()?,
|
||||||
primary_key: index.primary_key(rtxn)?.map(|s| s.to_string()),
|
primary_key: index.primary_key(rtxn)?.map(|s| s.to_string()),
|
||||||
|
@ -365,7 +365,8 @@ pub fn snapshot_index_mapper(rtxn: &RoTxn, mapper: &IndexMapper) -> String {
|
|||||||
let stats = mapper.stats_of(rtxn, &name).unwrap();
|
let stats = mapper.stats_of(rtxn, &name).unwrap();
|
||||||
s.push_str(&format!(
|
s.push_str(&format!(
|
||||||
"{name}: {{ number_of_documents: {}, field_distribution: {:?} }}\n",
|
"{name}: {{ number_of_documents: {}, field_distribution: {:?} }}\n",
|
||||||
stats.number_of_documents, stats.field_distribution
|
stats.documents_database_stats.number_of_entries(),
|
||||||
|
stats.field_distribution
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -903,14 +903,22 @@ fn create_and_list_index() {
|
|||||||
|
|
||||||
index_scheduler.index("kefir").unwrap();
|
index_scheduler.index("kefir").unwrap();
|
||||||
let list = index_scheduler.get_paginated_indexes_stats(&AuthFilter::default(), 0, 20).unwrap();
|
let list = index_scheduler.get_paginated_indexes_stats(&AuthFilter::default(), 0, 20).unwrap();
|
||||||
snapshot!(json_string!(list, { "[1][0][1].created_at" => "[date]", "[1][0][1].updated_at" => "[date]", "[1][0][1].used_database_size" => "[bytes]", "[1][0][1].database_size" => "[bytes]" }), @r#"
|
snapshot!(json_string!(list, { "[1][0][1].created_at" => "[date]", "[1][0][1].updated_at" => "[date]", "[1][0][1].used_database_size" => "[bytes]", "[1][0][1].database_size" => "[bytes]" }), @r###"
|
||||||
[
|
[
|
||||||
1,
|
1,
|
||||||
[
|
[
|
||||||
[
|
[
|
||||||
"kefir",
|
"kefir",
|
||||||
{
|
{
|
||||||
"number_of_documents": 0,
|
"documents_database_stats": {
|
||||||
|
"numberOfEntries": 0,
|
||||||
|
"totalKeySize": 0,
|
||||||
|
"totalValueSize": 0,
|
||||||
|
"maxKeySize": 0,
|
||||||
|
"maxValueSize": 0,
|
||||||
|
"minKeySize": 0,
|
||||||
|
"minValueSize": 0
|
||||||
|
},
|
||||||
"database_size": "[bytes]",
|
"database_size": "[bytes]",
|
||||||
"used_database_size": "[bytes]",
|
"used_database_size": "[bytes]",
|
||||||
"primary_key": null,
|
"primary_key": null,
|
||||||
@ -921,5 +929,5 @@ fn create_and_list_index() {
|
|||||||
]
|
]
|
||||||
]
|
]
|
||||||
]
|
]
|
||||||
"#);
|
"###);
|
||||||
}
|
}
|
||||||
|
@ -494,6 +494,12 @@ pub async fn delete_index(
|
|||||||
pub struct IndexStats {
|
pub struct IndexStats {
|
||||||
/// Number of documents in the index
|
/// Number of documents in the index
|
||||||
pub number_of_documents: u64,
|
pub number_of_documents: u64,
|
||||||
|
/// Size of the documents database, in bytes.
|
||||||
|
pub raw_document_db_size: u64,
|
||||||
|
/// Maximum size of a document in the documents database.
|
||||||
|
pub max_document_size: u64,
|
||||||
|
/// Average size of a document in the documents database.
|
||||||
|
pub avg_document_size: u64,
|
||||||
/// Whether or not the index is currently ingesting document
|
/// Whether or not the index is currently ingesting document
|
||||||
pub is_indexing: bool,
|
pub is_indexing: bool,
|
||||||
/// Association of every field name with the number of times it occurs in the documents.
|
/// Association of every field name with the number of times it occurs in the documents.
|
||||||
@ -504,7 +510,10 @@ pub struct IndexStats {
|
|||||||
impl From<index_scheduler::IndexStats> for IndexStats {
|
impl From<index_scheduler::IndexStats> for IndexStats {
|
||||||
fn from(stats: index_scheduler::IndexStats) -> Self {
|
fn from(stats: index_scheduler::IndexStats) -> Self {
|
||||||
IndexStats {
|
IndexStats {
|
||||||
number_of_documents: stats.inner_stats.number_of_documents,
|
number_of_documents: stats.inner_stats.documents_database_stats.number_of_entries(),
|
||||||
|
raw_document_db_size: stats.inner_stats.documents_database_stats.total_value_size(),
|
||||||
|
max_document_size: stats.inner_stats.documents_database_stats.max_value_size(),
|
||||||
|
avg_document_size: stats.inner_stats.documents_database_stats.average_value_size(),
|
||||||
is_indexing: stats.is_indexing,
|
is_indexing: stats.is_indexing,
|
||||||
field_distribution: stats.inner_stats.field_distribution,
|
field_distribution: stats.inner_stats.field_distribution,
|
||||||
}
|
}
|
||||||
|
@ -160,6 +160,9 @@ async fn delete_document_by_filter() {
|
|||||||
snapshot!(json_string!(stats), @r###"
|
snapshot!(json_string!(stats), @r###"
|
||||||
{
|
{
|
||||||
"numberOfDocuments": 4,
|
"numberOfDocuments": 4,
|
||||||
|
"rawDocumentDbSize": 42,
|
||||||
|
"maxDocumentSize": 13,
|
||||||
|
"avgDocumentSize": 10,
|
||||||
"isIndexing": false,
|
"isIndexing": false,
|
||||||
"fieldDistribution": {
|
"fieldDistribution": {
|
||||||
"color": 3,
|
"color": 3,
|
||||||
@ -207,6 +210,9 @@ async fn delete_document_by_filter() {
|
|||||||
snapshot!(json_string!(stats), @r###"
|
snapshot!(json_string!(stats), @r###"
|
||||||
{
|
{
|
||||||
"numberOfDocuments": 2,
|
"numberOfDocuments": 2,
|
||||||
|
"rawDocumentDbSize": 16,
|
||||||
|
"maxDocumentSize": 12,
|
||||||
|
"avgDocumentSize": 8,
|
||||||
"isIndexing": false,
|
"isIndexing": false,
|
||||||
"fieldDistribution": {
|
"fieldDistribution": {
|
||||||
"color": 1,
|
"color": 1,
|
||||||
@ -273,6 +279,9 @@ async fn delete_document_by_filter() {
|
|||||||
snapshot!(json_string!(stats), @r###"
|
snapshot!(json_string!(stats), @r###"
|
||||||
{
|
{
|
||||||
"numberOfDocuments": 1,
|
"numberOfDocuments": 1,
|
||||||
|
"rawDocumentDbSize": 12,
|
||||||
|
"maxDocumentSize": 12,
|
||||||
|
"avgDocumentSize": 12,
|
||||||
"isIndexing": false,
|
"isIndexing": false,
|
||||||
"fieldDistribution": {
|
"fieldDistribution": {
|
||||||
"color": 1,
|
"color": 1,
|
||||||
|
@ -172,6 +172,9 @@ async fn import_dump_v1_movie_with_settings() {
|
|||||||
@r###"
|
@r###"
|
||||||
{
|
{
|
||||||
"numberOfDocuments": 53,
|
"numberOfDocuments": 53,
|
||||||
|
"rawDocumentDbSize": 21965,
|
||||||
|
"maxDocumentSize": 743,
|
||||||
|
"avgDocumentSize": 414,
|
||||||
"isIndexing": false,
|
"isIndexing": false,
|
||||||
"fieldDistribution": {
|
"fieldDistribution": {
|
||||||
"genres": 53,
|
"genres": 53,
|
||||||
|
100
crates/milli/src/database_stats.rs
Normal file
100
crates/milli/src/database_stats.rs
Normal file
@ -0,0 +1,100 @@
|
|||||||
|
use heed::types::Bytes;
|
||||||
|
use heed::Database;
|
||||||
|
use heed::RoTxn;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
use crate::Result;
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
/// The stats of a database.
|
||||||
|
pub struct DatabaseStats {
|
||||||
|
/// The number of entries in the database.
|
||||||
|
number_of_entries: u64,
|
||||||
|
/// The total size of the keys in the database.
|
||||||
|
total_key_size: u64,
|
||||||
|
/// The total size of the values in the database.
|
||||||
|
total_value_size: u64,
|
||||||
|
/// The maximum size of a key in the database.
|
||||||
|
max_key_size: u64,
|
||||||
|
/// The maximum size of a value in the database.
|
||||||
|
max_value_size: u64,
|
||||||
|
/// The minimum size of a key in the database.
|
||||||
|
min_key_size: u64,
|
||||||
|
/// The minimum size of a value in the database.
|
||||||
|
min_value_size: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl DatabaseStats {
|
||||||
|
/// Returns the stats of the database.
|
||||||
|
///
|
||||||
|
/// This function iterates over the whole database and computes the stats.
|
||||||
|
/// It is not efficient and should be cached somewhere.
|
||||||
|
pub(crate) fn new<'a>(database: Database<Bytes, Bytes>, rtxn: &RoTxn<'a>) -> Result<Self> {
|
||||||
|
let mut database_stats = Self {
|
||||||
|
number_of_entries: 0,
|
||||||
|
total_key_size: 0,
|
||||||
|
total_value_size: 0,
|
||||||
|
max_key_size: 0,
|
||||||
|
max_value_size: 0,
|
||||||
|
min_key_size: u64::MAX,
|
||||||
|
min_value_size: u64::MAX,
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut iter = database.iter(rtxn)?;
|
||||||
|
while let Some((key, value)) = iter.next().transpose()? {
|
||||||
|
let key_size = key.len() as u64;
|
||||||
|
let value_size = value.len() as u64;
|
||||||
|
database_stats.number_of_entries += 1;
|
||||||
|
database_stats.total_key_size += key_size;
|
||||||
|
database_stats.total_value_size += value_size;
|
||||||
|
database_stats.max_key_size = database_stats.max_key_size.max(key_size);
|
||||||
|
database_stats.max_value_size = database_stats.max_value_size.max(value_size);
|
||||||
|
database_stats.min_key_size = database_stats.min_key_size.min(key_size);
|
||||||
|
database_stats.min_value_size = database_stats.min_value_size.min(value_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
if database_stats.number_of_entries == 0 {
|
||||||
|
database_stats.min_key_size = 0;
|
||||||
|
database_stats.min_value_size = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(database_stats)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn average_key_size(&self) -> u64 {
|
||||||
|
self.total_key_size / self.number_of_entries
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn average_value_size(&self) -> u64 {
|
||||||
|
self.total_value_size / self.number_of_entries
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn number_of_entries(&self) -> u64 {
|
||||||
|
self.number_of_entries
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn total_key_size(&self) -> u64 {
|
||||||
|
self.total_key_size
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn total_value_size(&self) -> u64 {
|
||||||
|
self.total_value_size
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn max_key_size(&self) -> u64 {
|
||||||
|
self.max_key_size
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn max_value_size(&self) -> u64 {
|
||||||
|
self.max_value_size
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn min_key_size(&self) -> u64 {
|
||||||
|
self.min_key_size
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn min_value_size(&self) -> u64 {
|
||||||
|
self.min_value_size
|
||||||
|
}
|
||||||
|
}
|
@ -11,6 +11,7 @@ use rstar::RTree;
|
|||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use crate::constants::{self, RESERVED_VECTORS_FIELD_NAME};
|
use crate::constants::{self, RESERVED_VECTORS_FIELD_NAME};
|
||||||
|
use crate::database_stats::DatabaseStats;
|
||||||
use crate::documents::PrimaryKey;
|
use crate::documents::PrimaryKey;
|
||||||
use crate::error::{InternalError, UserError};
|
use crate::error::{InternalError, UserError};
|
||||||
use crate::fields_ids_map::FieldsIdsMap;
|
use crate::fields_ids_map::FieldsIdsMap;
|
||||||
@ -403,6 +404,11 @@ impl Index {
|
|||||||
Ok(count.unwrap_or_default())
|
Ok(count.unwrap_or_default())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns the stats of the database.
|
||||||
|
pub fn documents_database_stats(&self, rtxn: &RoTxn<'_>) -> Result<DatabaseStats> {
|
||||||
|
Ok(DatabaseStats::new(self.documents.remap_types::<Bytes, Bytes>(), rtxn)?)
|
||||||
|
}
|
||||||
|
|
||||||
/* primary key */
|
/* primary key */
|
||||||
|
|
||||||
/// Writes the documents primary key, this is the field name that is used to store the id.
|
/// Writes the documents primary key, this is the field name that is used to store the id.
|
||||||
|
@ -10,6 +10,7 @@ pub mod documents;
|
|||||||
|
|
||||||
mod asc_desc;
|
mod asc_desc;
|
||||||
mod criterion;
|
mod criterion;
|
||||||
|
pub mod database_stats;
|
||||||
mod error;
|
mod error;
|
||||||
mod external_documents_ids;
|
mod external_documents_ids;
|
||||||
pub mod facet;
|
pub mod facet;
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
mod v1_12;
|
mod v1_12;
|
||||||
|
|
||||||
use heed::RwTxn;
|
use heed::RwTxn;
|
||||||
use v1_12::{V1_12_3_To_Current, V1_12_To_V1_12_3};
|
use v1_12::{V1_12_3_To_V1_13_0, V1_12_To_V1_12_3, V1_13_0_To_Current};
|
||||||
|
|
||||||
use crate::progress::{Progress, VariableNameStep};
|
use crate::progress::{Progress, VariableNameStep};
|
||||||
use crate::{Index, InternalError, Result};
|
use crate::{Index, InternalError, Result};
|
||||||
@ -26,11 +26,13 @@ pub fn upgrade(
|
|||||||
progress: Progress,
|
progress: Progress,
|
||||||
) -> Result<bool> {
|
) -> Result<bool> {
|
||||||
let from = index.get_version(wtxn)?.unwrap_or(db_version);
|
let from = index.get_version(wtxn)?.unwrap_or(db_version);
|
||||||
let upgrade_functions: &[&dyn UpgradeIndex] = &[&V1_12_To_V1_12_3 {}, &V1_12_3_To_Current()];
|
let upgrade_functions: &[&dyn UpgradeIndex] =
|
||||||
|
&[&V1_12_To_V1_12_3 {}, &V1_12_3_To_V1_13_0(), &V1_13_0_To_Current()];
|
||||||
|
|
||||||
let start = match from {
|
let start = match from {
|
||||||
(1, 12, 0..=2) => 0,
|
(1, 12, 0..=2) => 0,
|
||||||
(1, 12, 3..) => 1,
|
(1, 12, 3..) => 1,
|
||||||
|
(1, 13, 0) => 2,
|
||||||
// We must handle the current version in the match because in case of a failure some index may have been upgraded but not other.
|
// We must handle the current version in the match because in case of a failure some index may have been upgraded but not other.
|
||||||
(1, 13, _) => return Ok(false),
|
(1, 13, _) => return Ok(false),
|
||||||
(major, minor, patch) => {
|
(major, minor, patch) => {
|
||||||
|
@ -33,9 +33,33 @@ impl UpgradeIndex for V1_12_To_V1_12_3 {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[allow(non_camel_case_types)]
|
#[allow(non_camel_case_types)]
|
||||||
pub(super) struct V1_12_3_To_Current();
|
pub(super) struct V1_12_3_To_V1_13_0();
|
||||||
|
|
||||||
impl UpgradeIndex for V1_12_3_To_Current {
|
impl UpgradeIndex for V1_12_3_To_V1_13_0 {
|
||||||
|
fn upgrade(
|
||||||
|
&self,
|
||||||
|
_wtxn: &mut RwTxn,
|
||||||
|
_index: &Index,
|
||||||
|
_original: (u32, u32, u32),
|
||||||
|
_progress: Progress,
|
||||||
|
) -> Result<bool> {
|
||||||
|
// recompute the indexes stats
|
||||||
|
Ok(true)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn target_version(&self) -> (u32, u32, u32) {
|
||||||
|
(
|
||||||
|
VERSION_MAJOR.parse().unwrap(),
|
||||||
|
VERSION_MINOR.parse().unwrap(),
|
||||||
|
VERSION_PATCH.parse().unwrap(),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(non_camel_case_types)]
|
||||||
|
pub(super) struct V1_13_0_To_Current();
|
||||||
|
|
||||||
|
impl UpgradeIndex for V1_13_0_To_Current {
|
||||||
fn upgrade(
|
fn upgrade(
|
||||||
&self,
|
&self,
|
||||||
_wtxn: &mut RwTxn,
|
_wtxn: &mut RwTxn,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user