Implement Incremental document database stats computing

This commit is contained in:
ManyTheFish 2025-02-17 16:36:33 +01:00 committed by Kerollmops
parent d9642ec916
commit 9f3663e768
No known key found for this signature in database
GPG key ID: F250A4C4E3AE5F5F
9 changed files with 116 additions and 53 deletions

View file

@ -75,6 +75,7 @@ pub mod main_key {
pub const LOCALIZED_ATTRIBUTES_RULES: &str = "localized_attributes_rules";
pub const FACET_SEARCH: &str = "facet_search";
pub const PREFIX_SEARCH: &str = "prefix_search";
pub const DOCUMENTS_STATS: &str = "documents_stats";
}
pub mod db_name {
@ -404,9 +405,58 @@ impl Index {
Ok(count.unwrap_or_default())
}
/// Returns the stats of the database.
pub fn documents_database_stats(&self, rtxn: &RoTxn<'_>) -> Result<DatabaseStats> {
DatabaseStats::new(self.documents.remap_types::<Bytes, Bytes>(), rtxn)
/// Updates the stats of the documents database based on the previous stats and the modified docids.
pub fn update_documents_stats(
&self,
wtxn: &mut RwTxn<'_>,
modified_docids: roaring::RoaringBitmap,
) -> Result<()> {
let before_rtxn = self.read_txn()?;
let document_stats = match self.documents_stats(&before_rtxn)? {
Some(before_stats) => DatabaseStats::recompute(
before_stats,
self.documents.remap_types(),
&before_rtxn,
wtxn,
modified_docids.iter().map(|docid| docid.to_be_bytes()),
)?,
None => {
// This should never happen when there are already documents in the index, the documents stats should be present.
// If it happens, it means that the index was not properly initialized/upgraded.
debug_assert_eq!(
self.documents.len(&before_rtxn)?,
0,
"The documents stats should be present when there are documents in the index"
);
tracing::warn!("No documents stats found, creating new ones");
DatabaseStats::new(self.documents.remap_types(), &*wtxn)?
}
};
self.put_documents_stats(wtxn, document_stats)?;
Ok(())
}
/// Writes the stats of the documents database.
pub fn put_documents_stats(
&self,
wtxn: &mut RwTxn<'_>,
stats: DatabaseStats,
) -> heed::Result<()> {
eprintln!("putting documents stats: {:?}", stats);
self.main.remap_types::<Str, SerdeJson<DatabaseStats>>().put(
wtxn,
main_key::DOCUMENTS_STATS,
&stats,
)
}
/// Returns the stats of the documents database.
pub fn documents_stats(&self, rtxn: &RoTxn<'_>) -> heed::Result<Option<DatabaseStats>> {
dbg!(self
.main
.remap_types::<Str, SerdeJson<DatabaseStats>>()
.get(rtxn, main_key::DOCUMENTS_STATS))
}
/* primary key */