diff --git a/crates/meilisearch/src/routes/indexes/mod.rs b/crates/meilisearch/src/routes/indexes/mod.rs index 5aebf5cac..48ed1cfb1 100644 --- a/crates/meilisearch/src/routes/indexes/mod.rs +++ b/crates/meilisearch/src/routes/indexes/mod.rs @@ -518,7 +518,7 @@ impl From for IndexStats { .inner_stats .number_of_documents .unwrap_or(stats.inner_stats.documents_database_stats.number_of_entries()), - raw_document_db_size: stats.inner_stats.documents_database_stats.total_value_size(), + raw_document_db_size: stats.inner_stats.documents_database_stats.total_size(), avg_document_size: stats.inner_stats.documents_database_stats.average_value_size(), is_indexing: stats.is_indexing, number_of_embeddings: stats.inner_stats.number_of_embeddings, diff --git a/crates/meilisearch/tests/documents/delete_documents.rs b/crates/meilisearch/tests/documents/delete_documents.rs index 4dfe2cc79..060f17958 100644 --- a/crates/meilisearch/tests/documents/delete_documents.rs +++ b/crates/meilisearch/tests/documents/delete_documents.rs @@ -157,11 +157,14 @@ async fn delete_document_by_filter() { index.wait_task(task.uid()).await.succeeded(); let (stats, _) = index.stats().await; - snapshot!(json_string!(stats), @r###" + snapshot!(json_string!(stats, { + ".rawDocumentDbSize" => "[size]", + ".avgDocumentSize" => "[size]", + }), @r###" { "numberOfDocuments": 4, - "rawDocumentDbSize": 42, - "avgDocumentSize": 10, + "rawDocumentDbSize": "[size]", + "avgDocumentSize": "[size]", "isIndexing": false, "numberOfEmbeddings": 0, "numberOfEmbeddedDocuments": 0, @@ -208,11 +211,14 @@ async fn delete_document_by_filter() { "###); let (stats, _) = index.stats().await; - snapshot!(json_string!(stats), @r###" + snapshot!(json_string!(stats, { + ".rawDocumentDbSize" => "[size]", + ".avgDocumentSize" => "[size]", + }), @r###" { "numberOfDocuments": 2, - "rawDocumentDbSize": 16, - "avgDocumentSize": 8, + "rawDocumentDbSize": "[size]", + "avgDocumentSize": "[size]", "isIndexing": false, "numberOfEmbeddings": 0, "numberOfEmbeddedDocuments": 0, @@ -278,11 +284,14 @@ async fn delete_document_by_filter() { "###); let (stats, _) = index.stats().await; - snapshot!(json_string!(stats), @r###" + snapshot!(json_string!(stats, { + ".rawDocumentDbSize" => "[size]", + ".avgDocumentSize" => "[size]", + }), @r###" { "numberOfDocuments": 1, - "rawDocumentDbSize": 12, - "avgDocumentSize": 12, + "rawDocumentDbSize": "[size]", + "avgDocumentSize": "[size]", "isIndexing": false, "numberOfEmbeddings": 0, "numberOfEmbeddedDocuments": 0, diff --git a/crates/meilisearch/tests/dumps/mod.rs b/crates/meilisearch/tests/dumps/mod.rs index addcbeeb5..e5aa52dc6 100644 --- a/crates/meilisearch/tests/dumps/mod.rs +++ b/crates/meilisearch/tests/dumps/mod.rs @@ -28,12 +28,15 @@ async fn import_dump_v1_movie_raw() { let (stats, code) = index.stats().await; snapshot!(code, @"200 OK"); snapshot!( - json_string!(stats), + json_string!(stats, { + ".rawDocumentDbSize" => "[size]", + ".avgDocumentSize" => "[size]", + }), @r###" { "numberOfDocuments": 53, - "rawDocumentDbSize": 21965, - "avgDocumentSize": 414, + "rawDocumentDbSize": "[size]", + "avgDocumentSize": "[size]", "isIndexing": false, "numberOfEmbeddings": 0, "numberOfEmbeddedDocuments": 0, @@ -185,12 +188,15 @@ async fn import_dump_v1_movie_with_settings() { let (stats, code) = index.stats().await; snapshot!(code, @"200 OK"); snapshot!( - json_string!(stats), + json_string!(stats, { + ".rawDocumentDbSize" => "[size]", + ".avgDocumentSize" => "[size]", + }), @r###" { "numberOfDocuments": 53, - "rawDocumentDbSize": 21965, - "avgDocumentSize": 414, + "rawDocumentDbSize": "[size]", + "avgDocumentSize": "[size]", "isIndexing": false, "numberOfEmbeddings": 0, "numberOfEmbeddedDocuments": 0, @@ -355,12 +361,15 @@ async fn import_dump_v1_rubygems_with_settings() { let (stats, code) = index.stats().await; snapshot!(code, @"200 OK"); snapshot!( - json_string!(stats), + json_string!(stats, { + ".rawDocumentDbSize" => "[size]", + ".avgDocumentSize" => "[size]", + }), @r###" { "numberOfDocuments": 53, - "rawDocumentDbSize": 8606, - "avgDocumentSize": 162, + "rawDocumentDbSize": "[size]", + "avgDocumentSize": "[size]", "isIndexing": false, "numberOfEmbeddings": 0, "numberOfEmbeddedDocuments": 0, @@ -522,12 +531,15 @@ async fn import_dump_v2_movie_raw() { let (stats, code) = index.stats().await; snapshot!(code, @"200 OK"); snapshot!( - json_string!(stats), + json_string!(stats, { + ".rawDocumentDbSize" => "[size]", + ".avgDocumentSize" => "[size]", + }), @r###" { "numberOfDocuments": 53, - "rawDocumentDbSize": 21965, - "avgDocumentSize": 414, + "rawDocumentDbSize": "[size]", + "avgDocumentSize": "[size]", "isIndexing": false, "numberOfEmbeddings": 0, "numberOfEmbeddedDocuments": 0, @@ -679,12 +691,15 @@ async fn import_dump_v2_movie_with_settings() { let (stats, code) = index.stats().await; snapshot!(code, @"200 OK"); snapshot!( - json_string!(stats), + json_string!(stats, { + ".rawDocumentDbSize" => "[size]", + ".avgDocumentSize" => "[size]", + }), @r###" { "numberOfDocuments": 53, - "rawDocumentDbSize": 21965, - "avgDocumentSize": 414, + "rawDocumentDbSize": "[size]", + "avgDocumentSize": "[size]", "isIndexing": false, "numberOfEmbeddings": 0, "numberOfEmbeddedDocuments": 0, @@ -846,12 +861,15 @@ async fn import_dump_v2_rubygems_with_settings() { let (stats, code) = index.stats().await; snapshot!(code, @"200 OK"); snapshot!( - json_string!(stats), + json_string!(stats, { + ".rawDocumentDbSize" => "[size]", + ".avgDocumentSize" => "[size]", + }), @r###" { "numberOfDocuments": 53, - "rawDocumentDbSize": 8606, - "avgDocumentSize": 162, + "rawDocumentDbSize": "[size]", + "avgDocumentSize": "[size]", "isIndexing": false, "numberOfEmbeddings": 0, "numberOfEmbeddedDocuments": 0, @@ -1010,12 +1028,15 @@ async fn import_dump_v3_movie_raw() { let (stats, code) = index.stats().await; snapshot!(code, @"200 OK"); snapshot!( - json_string!(stats), + json_string!(stats, { + ".rawDocumentDbSize" => "[size]", + ".avgDocumentSize" => "[size]", + }), @r###" { "numberOfDocuments": 53, - "rawDocumentDbSize": 21965, - "avgDocumentSize": 414, + "rawDocumentDbSize": "[size]", + "avgDocumentSize": "[size]", "isIndexing": false, "numberOfEmbeddings": 0, "numberOfEmbeddedDocuments": 0, @@ -1167,12 +1188,15 @@ async fn import_dump_v3_movie_with_settings() { let (stats, code) = index.stats().await; snapshot!(code, @"200 OK"); snapshot!( - json_string!(stats), + json_string!(stats, { + ".rawDocumentDbSize" => "[size]", + ".avgDocumentSize" => "[size]", + }), @r###" { "numberOfDocuments": 53, - "rawDocumentDbSize": 21965, - "avgDocumentSize": 414, + "rawDocumentDbSize": "[size]", + "avgDocumentSize": "[size]", "isIndexing": false, "numberOfEmbeddings": 0, "numberOfEmbeddedDocuments": 0, @@ -1334,12 +1358,15 @@ async fn import_dump_v3_rubygems_with_settings() { let (stats, code) = index.stats().await; snapshot!(code, @"200 OK"); snapshot!( - json_string!(stats), + json_string!(stats, { + ".rawDocumentDbSize" => "[size]", + ".avgDocumentSize" => "[size]", + }), @r###" { "numberOfDocuments": 53, - "rawDocumentDbSize": 8606, - "avgDocumentSize": 162, + "rawDocumentDbSize": "[size]", + "avgDocumentSize": "[size]", "isIndexing": false, "numberOfEmbeddings": 0, "numberOfEmbeddedDocuments": 0, @@ -1498,12 +1525,15 @@ async fn import_dump_v4_movie_raw() { let (stats, code) = index.stats().await; snapshot!(code, @"200 OK"); snapshot!( - json_string!(stats), + json_string!(stats, { + ".rawDocumentDbSize" => "[size]", + ".avgDocumentSize" => "[size]", + }), @r###" { "numberOfDocuments": 53, - "rawDocumentDbSize": 21965, - "avgDocumentSize": 414, + "rawDocumentDbSize": "[size]", + "avgDocumentSize": "[size]", "isIndexing": false, "numberOfEmbeddings": 0, "numberOfEmbeddedDocuments": 0, @@ -1655,12 +1685,15 @@ async fn import_dump_v4_movie_with_settings() { let (stats, code) = index.stats().await; snapshot!(code, @"200 OK"); snapshot!( - json_string!(stats), + json_string!(stats, { + ".rawDocumentDbSize" => "[size]", + ".avgDocumentSize" => "[size]", + }), @r###" { "numberOfDocuments": 53, - "rawDocumentDbSize": 21965, - "avgDocumentSize": 414, + "rawDocumentDbSize": "[size]", + "avgDocumentSize": "[size]", "isIndexing": false, "numberOfEmbeddings": 0, "numberOfEmbeddedDocuments": 0, @@ -1822,12 +1855,15 @@ async fn import_dump_v4_rubygems_with_settings() { let (stats, code) = index.stats().await; snapshot!(code, @"200 OK"); snapshot!( - json_string!(stats), + json_string!(stats, { + ".rawDocumentDbSize" => "[size]", + ".avgDocumentSize" => "[size]", + }), @r###" { "numberOfDocuments": 53, - "rawDocumentDbSize": 8606, - "avgDocumentSize": 162, + "rawDocumentDbSize": "[size]", + "avgDocumentSize": "[size]", "isIndexing": false, "numberOfEmbeddings": 0, "numberOfEmbeddedDocuments": 0, @@ -1994,11 +2030,14 @@ async fn import_dump_v5() { let (stats, code) = index1.stats().await; snapshot!(code, @"200 OK"); - snapshot!(json_string!(stats), @r###" + snapshot!(json_string!(stats, { + ".rawDocumentDbSize" => "[size]", + ".avgDocumentSize" => "[size]", + }), @r###" { "numberOfDocuments": 10, - "rawDocumentDbSize": 6782, - "avgDocumentSize": 678, + "rawDocumentDbSize": "[size]", + "avgDocumentSize": "[size]", "isIndexing": false, "numberOfEmbeddings": 0, "numberOfEmbeddedDocuments": 0, @@ -2031,12 +2070,15 @@ async fn import_dump_v5() { let (stats, code) = index2.stats().await; snapshot!(code, @"200 OK"); snapshot!( - json_string!(stats), + json_string!(stats, { + ".rawDocumentDbSize" => "[size]", + ".avgDocumentSize" => "[size]", + }), @r###" { "numberOfDocuments": 10, - "rawDocumentDbSize": 6782, - "avgDocumentSize": 678, + "rawDocumentDbSize": "[size]", + "avgDocumentSize": "[size]", "isIndexing": false, "numberOfEmbeddings": 0, "numberOfEmbeddedDocuments": 0, diff --git a/crates/meilisearch/tests/stats/mod.rs b/crates/meilisearch/tests/stats/mod.rs index 20a8eaef6..aee626460 100644 --- a/crates/meilisearch/tests/stats/mod.rs +++ b/crates/meilisearch/tests/stats/mod.rs @@ -110,11 +110,14 @@ async fn add_remove_embeddings() { index.wait_task(response.uid()).await.succeeded(); let (stats, _code) = index.stats().await; - snapshot!(json_string!(stats), @r###" + snapshot!(json_string!(stats, { + ".rawDocumentDbSize" => "[size]", + ".avgDocumentSize" => "[size]", + }), @r###" { "numberOfDocuments": 2, - "rawDocumentDbSize": 27, - "avgDocumentSize": 13, + "rawDocumentDbSize": "[size]", + "avgDocumentSize": "[size]", "isIndexing": false, "numberOfEmbeddings": 5, "numberOfEmbeddedDocuments": 2, @@ -135,11 +138,14 @@ async fn add_remove_embeddings() { index.wait_task(response.uid()).await.succeeded(); let (stats, _code) = index.stats().await; - snapshot!(json_string!(stats), @r###" + snapshot!(json_string!(stats, { + ".rawDocumentDbSize" => "[size]", + ".avgDocumentSize" => "[size]", + }), @r###" { "numberOfDocuments": 2, - "rawDocumentDbSize": 27, - "avgDocumentSize": 13, + "rawDocumentDbSize": "[size]", + "avgDocumentSize": "[size]", "isIndexing": false, "numberOfEmbeddings": 3, "numberOfEmbeddedDocuments": 2, @@ -160,11 +166,14 @@ async fn add_remove_embeddings() { index.wait_task(response.uid()).await.succeeded(); let (stats, _code) = index.stats().await; - snapshot!(json_string!(stats), @r###" + snapshot!(json_string!(stats, { + ".rawDocumentDbSize" => "[size]", + ".avgDocumentSize" => "[size]", + }), @r###" { "numberOfDocuments": 2, - "rawDocumentDbSize": 27, - "avgDocumentSize": 13, + "rawDocumentDbSize": "[size]", + "avgDocumentSize": "[size]", "isIndexing": false, "numberOfEmbeddings": 2, "numberOfEmbeddedDocuments": 2, @@ -186,11 +195,14 @@ async fn add_remove_embeddings() { index.wait_task(response.uid()).await.succeeded(); let (stats, _code) = index.stats().await; - snapshot!(json_string!(stats), @r###" + snapshot!(json_string!(stats, { + ".rawDocumentDbSize" => "[size]", + ".avgDocumentSize" => "[size]", + }), @r###" { "numberOfDocuments": 2, - "rawDocumentDbSize": 27, - "avgDocumentSize": 13, + "rawDocumentDbSize": "[size]", + "avgDocumentSize": "[size]", "isIndexing": false, "numberOfEmbeddings": 2, "numberOfEmbeddedDocuments": 1, @@ -236,11 +248,14 @@ async fn add_remove_embedded_documents() { index.wait_task(response.uid()).await.succeeded(); let (stats, _code) = index.stats().await; - snapshot!(json_string!(stats), @r###" + snapshot!(json_string!(stats, { + ".rawDocumentDbSize" => "[size]", + ".avgDocumentSize" => "[size]", + }), @r###" { "numberOfDocuments": 2, - "rawDocumentDbSize": 27, - "avgDocumentSize": 13, + "rawDocumentDbSize": "[size]", + "avgDocumentSize": "[size]", "isIndexing": false, "numberOfEmbeddings": 5, "numberOfEmbeddedDocuments": 2, @@ -257,11 +272,14 @@ async fn add_remove_embedded_documents() { index.wait_task(response.uid()).await.succeeded(); let (stats, _code) = index.stats().await; - snapshot!(json_string!(stats), @r###" + snapshot!(json_string!(stats, { + ".rawDocumentDbSize" => "[size]", + ".avgDocumentSize" => "[size]", + }), @r###" { "numberOfDocuments": 1, - "rawDocumentDbSize": 13, - "avgDocumentSize": 13, + "rawDocumentDbSize": "[size]", + "avgDocumentSize": "[size]", "isIndexing": false, "numberOfEmbeddings": 3, "numberOfEmbeddedDocuments": 1, @@ -290,11 +308,14 @@ async fn update_embedder_settings() { index.wait_task(response.uid()).await.succeeded(); let (stats, _code) = index.stats().await; - snapshot!(json_string!(stats), @r###" + snapshot!(json_string!(stats, { + ".rawDocumentDbSize" => "[size]", + ".avgDocumentSize" => "[size]", + }), @r###" { "numberOfDocuments": 2, - "rawDocumentDbSize": 108, - "avgDocumentSize": 54, + "rawDocumentDbSize": "[size]", + "avgDocumentSize": "[size]", "isIndexing": false, "numberOfEmbeddings": 0, "numberOfEmbeddedDocuments": 0, @@ -326,11 +347,14 @@ async fn update_embedder_settings() { server.wait_task(response.uid()).await.succeeded(); let (stats, _code) = index.stats().await; - snapshot!(json_string!(stats), @r###" + snapshot!(json_string!(stats, { + ".rawDocumentDbSize" => "[size]", + ".avgDocumentSize" => "[size]", + }), @r###" { "numberOfDocuments": 2, - "rawDocumentDbSize": 108, - "avgDocumentSize": 54, + "rawDocumentDbSize": "[size]", + "avgDocumentSize": "[size]", "isIndexing": false, "numberOfEmbeddings": 3, "numberOfEmbeddedDocuments": 2, diff --git a/crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs b/crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs index 9fc4d0e5b..1b2ae054c 100644 --- a/crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs +++ b/crates/meilisearch/tests/upgrade/v1_12/v1_12_0.rs @@ -133,7 +133,9 @@ async fn check_the_index_scheduler(server: &Server) { let (stats, _) = server.stats().await; assert_json_snapshot!(stats, { ".databaseSize" => "[bytes]", - ".usedDatabaseSize" => "[bytes]" + ".usedDatabaseSize" => "[bytes]", + ".indexes.kefir.rawDocumentDbSize" => "[bytes]", + ".indexes.kefir.avgDocumentSize" => "[bytes]", }, @r###" { @@ -143,8 +145,8 @@ async fn check_the_index_scheduler(server: &Server) { "indexes": { "kefir": { "numberOfDocuments": 1, - "rawDocumentDbSize": 109, - "avgDocumentSize": 109, + "rawDocumentDbSize": "[bytes]", + "avgDocumentSize": "[bytes]", "isIndexing": false, "numberOfEmbeddings": 0, "numberOfEmbeddedDocuments": 0, @@ -217,7 +219,9 @@ async fn check_the_index_scheduler(server: &Server) { let (stats, _) = server.stats().await; assert_json_snapshot!(stats, { ".databaseSize" => "[bytes]", - ".usedDatabaseSize" => "[bytes]" + ".usedDatabaseSize" => "[bytes]", + ".indexes.kefir.rawDocumentDbSize" => "[bytes]", + ".indexes.kefir.avgDocumentSize" => "[bytes]", }, @r###" { @@ -227,8 +231,8 @@ async fn check_the_index_scheduler(server: &Server) { "indexes": { "kefir": { "numberOfDocuments": 1, - "rawDocumentDbSize": 109, - "avgDocumentSize": 109, + "rawDocumentDbSize": "[bytes]", + "avgDocumentSize": "[bytes]", "isIndexing": false, "numberOfEmbeddings": 0, "numberOfEmbeddedDocuments": 0, @@ -245,11 +249,14 @@ async fn check_the_index_scheduler(server: &Server) { "###); let index = server.index("kefir"); let (stats, _) = index.stats().await; - snapshot!(stats, @r###" + snapshot!(json_string!(stats, { + ".rawDocumentDbSize" => "[bytes]", + ".avgDocumentSize" => "[bytes]", + }), @r###" { "numberOfDocuments": 1, - "rawDocumentDbSize": 109, - "avgDocumentSize": 109, + "rawDocumentDbSize": "[bytes]", + "avgDocumentSize": "[bytes]", "isIndexing": false, "numberOfEmbeddings": 0, "numberOfEmbeddedDocuments": 0, diff --git a/crates/milli/src/database_stats.rs b/crates/milli/src/database_stats.rs index d97dc13ba..7da1fbd2b 100644 --- a/crates/milli/src/database_stats.rs +++ b/crates/milli/src/database_stats.rs @@ -1,8 +1,13 @@ -use heed::types::Bytes; +use std::mem; + use heed::Database; +use heed::DatabaseStat; use heed::RoTxn; +use heed::Unspecified; use serde::{Deserialize, Serialize}; +use crate::BEU32; + #[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)] #[serde(rename_all = "camelCase")] /// The stats of a database. @@ -20,58 +25,24 @@ impl DatabaseStats { /// /// This function iterates over the whole database and computes the stats. /// It is not efficient and should be cached somewhere. - pub(crate) fn new(database: Database, rtxn: &RoTxn<'_>) -> heed::Result { - let mut database_stats = - Self { number_of_entries: 0, total_key_size: 0, total_value_size: 0 }; + pub(crate) fn new( + database: Database, + rtxn: &RoTxn<'_>, + ) -> heed::Result { + let DatabaseStat { page_size, depth: _, branch_pages, leaf_pages, overflow_pages, entries } = + database.stat(rtxn)?; - let mut iter = database.iter(rtxn)?; - while let Some((key, value)) = iter.next().transpose()? { - let key_size = key.len() as u64; - let value_size = value.len() as u64; - database_stats.total_key_size += key_size; - database_stats.total_value_size += value_size; - } + // We first take the total size without overflow pages as the overflow pages contains the values and only that. + let total_size = (branch_pages + leaf_pages + overflow_pages) * page_size as usize; + // We compute an estimated size for the keys. + let total_key_size = entries * (mem::size_of::() + 4); + let total_value_size = total_size - total_key_size; - database_stats.number_of_entries = database.len(rtxn)?; - - Ok(database_stats) - } - - /// Recomputes the stats of the database and returns the new stats. - /// - /// This function is used to update the stats of the database when some keys are modified. - /// It is more efficient than the `new` function because it does not iterate over the whole database but only the modified keys comparing the before and after states. - pub(crate) fn recompute( - mut stats: Self, - database: Database, - before_rtxn: &RoTxn<'_>, - after_rtxn: &RoTxn<'_>, - modified_keys: I, - ) -> heed::Result - where - I: IntoIterator, - K: AsRef<[u8]>, - { - for key in modified_keys { - let key = key.as_ref(); - if let Some(value) = database.get(after_rtxn, key)? { - let key_size = key.len() as u64; - let value_size = value.len() as u64; - stats.total_key_size = stats.total_key_size.saturating_add(key_size); - stats.total_value_size = stats.total_value_size.saturating_add(value_size); - } - - if let Some(value) = database.get(before_rtxn, key)? { - let key_size = key.len() as u64; - let value_size = value.len() as u64; - stats.total_key_size = stats.total_key_size.saturating_sub(key_size); - stats.total_value_size = stats.total_value_size.saturating_sub(value_size); - } - } - - stats.number_of_entries = database.len(after_rtxn)?; - - Ok(stats) + Ok(Self { + number_of_entries: entries as u64, + total_key_size: total_key_size as u64, + total_value_size: total_value_size as u64, + }) } pub fn average_key_size(&self) -> u64 { @@ -86,6 +57,10 @@ impl DatabaseStats { self.number_of_entries } + pub fn total_size(&self) -> u64 { + self.total_key_size + self.total_value_size + } + pub fn total_key_size(&self) -> u64 { self.total_key_size } diff --git a/crates/milli/src/index.rs b/crates/milli/src/index.rs index a2d839d03..5f74863e8 100644 --- a/crates/milli/src/index.rs +++ b/crates/milli/src/index.rs @@ -411,38 +411,6 @@ impl Index { Ok(count.unwrap_or_default()) } - /// Updates the stats of the documents database based on the previous stats and the modified docids. - pub fn update_documents_stats( - &self, - wtxn: &mut RwTxn<'_>, - modified_docids: roaring::RoaringBitmap, - ) -> Result<()> { - let before_rtxn = self.read_txn()?; - let document_stats = match self.documents_stats(&before_rtxn)? { - Some(before_stats) => DatabaseStats::recompute( - before_stats, - self.documents.remap_types(), - &before_rtxn, - wtxn, - modified_docids.iter().map(|docid| docid.to_be_bytes()), - )?, - None => { - // This should never happen when there are already documents in the index, the documents stats should be present. - // If it happens, it means that the index was not properly initialized/upgraded. - debug_assert_eq!( - self.documents.len(&before_rtxn)?, - 0, - "The documents stats should be present when there are documents in the index" - ); - tracing::warn!("No documents stats found, creating new ones"); - DatabaseStats::new(self.documents.remap_types(), &*wtxn)? - } - }; - - self.put_documents_stats(wtxn, document_stats)?; - Ok(()) - } - /// Writes the stats of the documents database. pub fn put_documents_stats( &self, diff --git a/crates/milli/src/update/index_documents/mod.rs b/crates/milli/src/update/index_documents/mod.rs index 95342054d..5d445d283 100644 --- a/crates/milli/src/update/index_documents/mod.rs +++ b/crates/milli/src/update/index_documents/mod.rs @@ -28,6 +28,7 @@ pub use self::helpers::*; pub use self::transform::{Transform, TransformOutput}; use super::facet::clear_facet_levels_based_on_settings_diff; use super::new::StdResult; +use crate::database_stats::DatabaseStats; use crate::documents::{obkv_to_object, DocumentsBatchReader}; use crate::error::{Error, InternalError}; use crate::index::{PrefixSearch, PrefixSettings}; @@ -476,7 +477,8 @@ where if !settings_diff.settings_update_only { // Update the stats of the documents database when there is a document update. - self.index.update_documents_stats(self.wtxn, modified_docids)?; + let stats = DatabaseStats::new(self.index.documents.remap_data_type(), self.wtxn)?; + self.index.put_documents_stats(self.wtxn, stats)?; } // We write the field distribution into the main database self.index.put_field_distribution(self.wtxn, &field_distribution)?; diff --git a/crates/milli/src/update/new/indexer/mod.rs b/crates/milli/src/update/new/indexer/mod.rs index 4f2dd19c9..d2a88f4ff 100644 --- a/crates/milli/src/update/new/indexer/mod.rs +++ b/crates/milli/src/update/new/indexer/mod.rs @@ -234,7 +234,6 @@ where embedders, field_distribution, document_ids, - modified_docids, )?; Ok(congestion) diff --git a/crates/milli/src/update/new/indexer/write.rs b/crates/milli/src/update/new/indexer/write.rs index 8618b4b21..7ab7991b2 100644 --- a/crates/milli/src/update/new/indexer/write.rs +++ b/crates/milli/src/update/new/indexer/write.rs @@ -7,6 +7,7 @@ use rand::SeedableRng as _; use time::OffsetDateTime; use super::super::channel::*; +use crate::database_stats::DatabaseStats; use crate::documents::PrimaryKey; use crate::fields_ids_map::metadata::FieldIdMapWithMetadata; use crate::index::IndexEmbeddingConfig; @@ -142,7 +143,6 @@ pub(super) fn update_index( embedders: EmbeddingConfigs, field_distribution: std::collections::BTreeMap, document_ids: roaring::RoaringBitmap, - modified_docids: roaring::RoaringBitmap, ) -> Result<()> { index.put_fields_ids_map(wtxn, new_fields_ids_map.as_fields_ids_map())?; if let Some(new_primary_key) = new_primary_key { @@ -153,7 +153,8 @@ pub(super) fn update_index( index.put_field_distribution(wtxn, &field_distribution)?; index.put_documents_ids(wtxn, &document_ids)?; index.set_updated_at(wtxn, &OffsetDateTime::now_utc())?; - index.update_documents_stats(wtxn, modified_docids)?; + let stats = DatabaseStats::new(index.documents.remap_data_type(), wtxn)?; + index.put_documents_stats(wtxn, stats)?; Ok(()) }