diff --git a/file-store/src/lib.rs b/file-store/src/lib.rs index e05694c92..4b7e52e5d 100644 --- a/file-store/src/lib.rs +++ b/file-store/src/lib.rs @@ -1,4 +1,3 @@ -use std::collections::BTreeSet; use std::fs::File as StdFile; use std::ops::{Deref, DerefMut}; use std::path::{Path, PathBuf}; @@ -11,10 +10,14 @@ const UPDATE_FILES_PATH: &str = "updates/updates_files"; #[derive(Debug, thiserror::Error)] pub enum Error { + #[error("Could not parse file name as utf-8")] + CouldNotParseFileNameAsUtf8, #[error(transparent)] IoError(#[from] std::io::Error), #[error(transparent)] PersistError(#[from] tempfile::PersistError), + #[error(transparent)] + UuidError(#[from] uuid::Error), } pub type Result = std::result::Result; @@ -33,13 +36,11 @@ impl DerefMut for File { } } -#[cfg_attr(test, faux::create)] #[derive(Clone, Debug)] pub struct FileStore { path: PathBuf, } -#[cfg(not(test))] impl FileStore { pub fn new(path: impl AsRef) -> Result { let path = path.as_ref().to_path_buf(); @@ -48,7 +49,6 @@ impl FileStore { } } -#[cfg_attr(test, faux::methods)] impl FileStore { /// Creates a new temporary update file. /// A call to `persist` is needed to persist the file in the database. @@ -94,7 +94,17 @@ impl FileStore { Ok(()) } - pub fn get_size(&self, uuid: Uuid) -> Result { + /// Compute the size of all the updates contained in the file store. + pub fn compute_total_size(&self) -> Result { + let mut total = 0; + for uuid in self.all_uuids()? { + total += self.compute_size(uuid?).unwrap_or_default(); + } + Ok(total) + } + + /// Compute the size of one update + pub fn compute_size(&self, uuid: Uuid) -> Result { Ok(self.get_update(uuid)?.metadata()?.len()) } @@ -105,17 +115,12 @@ impl FileStore { } /// List the Uuids of the files in the FileStore - /// - /// This function is meant to be used by tests only. - #[doc(hidden)] - pub fn __all_uuids(&self) -> BTreeSet { - let mut uuids = BTreeSet::new(); - for entry in self.path.read_dir().unwrap() { - let entry = entry.unwrap(); - let uuid = Uuid::from_str(entry.file_name().to_str().unwrap()).unwrap(); - uuids.insert(uuid); - } - uuids + pub fn all_uuids(&self) -> Result>> { + Ok(self.path.read_dir()?.map(|entry| { + Ok(Uuid::from_str( + entry?.file_name().to_str().ok_or(Error::CouldNotParseFileNameAsUtf8)?, + )?) + })) } } diff --git a/index-scheduler/src/insta_snapshot.rs b/index-scheduler/src/insta_snapshot.rs index 0f0c9953a..e8d07ee63 100644 --- a/index-scheduler/src/insta_snapshot.rs +++ b/index-scheduler/src/insta_snapshot.rs @@ -1,3 +1,4 @@ +use std::collections::BTreeSet; use std::fmt::Write; use meilisearch_types::heed::types::{OwnedType, SerdeBincode, SerdeJson, Str}; @@ -92,7 +93,9 @@ pub fn snapshot_index_scheduler(scheduler: &IndexScheduler) -> String { pub fn snapshot_file_store(file_store: &file_store::FileStore) -> String { let mut snap = String::new(); - for uuid in file_store.__all_uuids() { + // we store the uuid in a `BTreeSet` to keep them ordered. + let all_uuids = file_store.all_uuids().unwrap().collect::, _>>().unwrap(); + for uuid in all_uuids { snap.push_str(&format!("{uuid}\n")); } snap diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index 895b97813..387dac2d0 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -452,6 +452,10 @@ impl IndexScheduler { &self.index_mapper.indexer_config } + pub fn size(&self) -> Result { + Ok(self.env.real_disk_size()?) + } + /// Return the index corresponding to the name. /// /// * If the index wasn't opened before, the index will be opened. @@ -898,6 +902,11 @@ impl IndexScheduler { Ok(self.file_store.new_update_with_uuid(uuid)?) } + /// The size on disk taken by all the updates files contained in the `IndexScheduler`, in bytes. + pub fn compute_update_file_size(&self) -> Result { + Ok(self.file_store.compute_total_size()?) + } + /// Delete a file from the index scheduler. /// /// Counterpart to the [`create_update_file`](IndexScheduler::create_update_file) method. diff --git a/index-scheduler/src/utils.rs b/index-scheduler/src/utils.rs index e13d0e375..c9b71b523 100644 --- a/index-scheduler/src/utils.rs +++ b/index-scheduler/src/utils.rs @@ -508,14 +508,21 @@ impl IndexScheduler { if let KindWithContent::DocumentAdditionOrUpdate { content_file, .. } = kind { match status { Status::Enqueued | Status::Processing => { - assert!( - self.file_store.__all_uuids().contains(&content_file), + assert!(self + .file_store + .all_uuids() + .unwrap() + .any(|uuid| uuid.as_ref().unwrap() == &content_file), "Could not find uuid `{content_file}` in the file_store. Available uuids are {:?}.", - self.file_store.__all_uuids(), + self.file_store.all_uuids().unwrap().collect::, file_store::Error>>().unwrap(), ); } Status::Succeeded | Status::Failed | Status::Canceled => { - assert!(!self.file_store.__all_uuids().contains(&content_file)); + assert!(self + .file_store + .all_uuids() + .unwrap() + .all(|uuid| uuid.as_ref().unwrap() != &content_file)); } } } diff --git a/meilisearch-auth/src/lib.rs b/meilisearch-auth/src/lib.rs index 609409cf7..adfd00ce5 100644 --- a/meilisearch-auth/src/lib.rs +++ b/meilisearch-auth/src/lib.rs @@ -33,6 +33,11 @@ impl AuthController { Ok(Self { store: Arc::new(store), master_key: master_key.clone() }) } + /// Return the size of the `AuthController` database in bytes. + pub fn size(&self) -> Result { + self.store.size() + } + pub fn create_key(&self, create_key: CreateApiKey) -> Result { match self.store.get_api_key(create_key.uid)? { Some(_) => Err(AuthControllerError::ApiKeyAlreadyExists(create_key.uid.to_string())), diff --git a/meilisearch-auth/src/store.rs b/meilisearch-auth/src/store.rs index 2574572be..c1cec0ede 100644 --- a/meilisearch-auth/src/store.rs +++ b/meilisearch-auth/src/store.rs @@ -60,6 +60,11 @@ impl HeedAuthStore { Ok(Self { env, keys, action_keyid_index_expiration, should_close_on_drop: true }) } + /// Return the size in bytes of database + pub fn size(&self) -> Result { + Ok(self.env.real_disk_size()?) + } + pub fn set_drop_on_close(&mut self, v: bool) { self.should_close_on_drop = v; } diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs index 0f551d584..39d9a1551 100644 --- a/meilisearch-types/src/error.rs +++ b/meilisearch-types/src/error.rs @@ -343,6 +343,7 @@ impl ErrorCode for file_store::Error { match self { Self::IoError(e) => e.error_code(), Self::PersistError(e) => e.error_code(), + Self::CouldNotParseFileNameAsUtf8 | Self::UuidError(_) => Code::Internal, } } } diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index 1b5a1d73f..21b6696e7 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -9,7 +9,7 @@ use actix_web::HttpRequest; use byte_unit::Byte; use http::header::CONTENT_TYPE; use index_scheduler::IndexScheduler; -use meilisearch_auth::SearchRules; +use meilisearch_auth::{AuthController, SearchRules}; use meilisearch_types::InstanceUid; use once_cell::sync::Lazy; use regex::Regex; @@ -82,7 +82,11 @@ pub struct SegmentAnalytics { } impl SegmentAnalytics { - pub async fn new(opt: &Opt, index_scheduler: Arc) -> Arc { + pub async fn new( + opt: &Opt, + index_scheduler: Arc, + auth_controller: AuthController, + ) -> Arc { let instance_uid = super::find_user_id(&opt.db_path); let first_time_run = instance_uid.is_none(); let instance_uid = instance_uid.unwrap_or_else(|| Uuid::new_v4()); @@ -136,7 +140,7 @@ impl SegmentAnalytics { get_tasks_aggregator: TasksAggregator::default(), health_aggregator: HealthAggregator::default(), }); - tokio::spawn(segment.run(index_scheduler.clone())); + tokio::spawn(segment.run(index_scheduler.clone(), auth_controller.clone())); let this = Self { instance_uid, sender, user: user.clone() }; @@ -361,7 +365,7 @@ impl Segment { }) } - async fn run(mut self, index_scheduler: Arc) { + async fn run(mut self, index_scheduler: Arc, auth_controller: AuthController) { const INTERVAL: Duration = Duration::from_secs(60 * 60); // one hour // The first batch must be sent after one hour. let mut interval = @@ -370,7 +374,7 @@ impl Segment { loop { select! { _ = interval.tick() => { - self.tick(index_scheduler.clone()).await; + self.tick(index_scheduler.clone(), auth_controller.clone()).await; }, msg = self.inbox.recv() => { match msg { @@ -389,8 +393,14 @@ impl Segment { } } - async fn tick(&mut self, index_scheduler: Arc) { - if let Ok(stats) = create_all_stats(index_scheduler.into(), &SearchRules::default()) { + async fn tick( + &mut self, + index_scheduler: Arc, + auth_controller: AuthController, + ) { + if let Ok(stats) = + create_all_stats(index_scheduler.into(), auth_controller, &SearchRules::default()) + { let _ = self .batcher .push(Identify { diff --git a/meilisearch/src/main.rs b/meilisearch/src/main.rs index 2841dd52c..b78362ec1 100644 --- a/meilisearch/src/main.rs +++ b/meilisearch/src/main.rs @@ -57,7 +57,8 @@ async fn main() -> anyhow::Result<()> { #[cfg(all(not(debug_assertions), feature = "analytics"))] let analytics = if !opt.no_analytics { - analytics::SegmentAnalytics::new(&opt, index_scheduler.clone()).await + analytics::SegmentAnalytics::new(&opt, index_scheduler.clone(), auth_controller.clone()) + .await } else { analytics::MockAnalytics::new(&opt) }; diff --git a/meilisearch/src/routes/mod.rs b/meilisearch/src/routes/mod.rs index 9ef036554..7aaad7125 100644 --- a/meilisearch/src/routes/mod.rs +++ b/meilisearch/src/routes/mod.rs @@ -4,6 +4,7 @@ use actix_web::web::Data; use actix_web::{web, HttpRequest, HttpResponse}; use index_scheduler::{IndexScheduler, Query}; use log::debug; +use meilisearch_auth::AuthController; use meilisearch_types::error::ResponseError; use meilisearch_types::settings::{Settings, Unchecked}; use meilisearch_types::tasks::{Kind, Status, Task, TaskId}; @@ -230,13 +231,15 @@ pub struct Stats { async fn get_stats( index_scheduler: GuardedData, Data>, + auth_controller: GuardedData, AuthController>, req: HttpRequest, analytics: web::Data, ) -> Result { analytics.publish("Stats Seen".to_string(), json!({ "per_index_uid": false }), Some(&req)); let search_rules = &index_scheduler.filters().search_rules; - let stats = create_all_stats((*index_scheduler).clone(), search_rules)?; + let stats = + create_all_stats((*index_scheduler).clone(), (*auth_controller).clone(), search_rules)?; debug!("returns: {:?}", stats); Ok(HttpResponse::Ok().json(stats)) @@ -244,6 +247,7 @@ async fn get_stats( pub fn create_all_stats( index_scheduler: Data, + auth_controller: AuthController, search_rules: &meilisearch_auth::SearchRules, ) -> Result { let mut last_task: Option = None; @@ -253,6 +257,7 @@ pub fn create_all_stats( Query { statuses: Some(vec![Status::Processing]), limit: Some(1), ..Query::default() }, search_rules.authorized_indexes(), )?; + // accumulate the size of each indexes let processing_index = processing_task.first().and_then(|task| task.index_uid()); for (name, index) in index_scheduler.indexes()? { if !search_rules.is_index_authorized(&name) { @@ -273,6 +278,11 @@ pub fn create_all_stats( indexes.insert(name, stats); } + + database_size += index_scheduler.size()?; + database_size += auth_controller.size()?; + database_size += index_scheduler.compute_update_file_size()?; + let stats = Stats { database_size, last_update: last_task, indexes }; Ok(stats) }