diff --git a/Cargo.lock b/Cargo.lock index 461a4789b..7ef514ffe 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1875,6 +1875,7 @@ dependencies = [ "urlencoding", "uuid", "vergen", + "walkdir", ] [[package]] @@ -2840,6 +2841,15 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "71d301d4193d031abdd79ff7e3dd721168a9572ef3fe51a1517aba235bd8f86e" +[[package]] +name = "same-file" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" +dependencies = [ + "winapi-util", +] + [[package]] name = "scopeguard" version = "1.1.0" @@ -3717,6 +3727,17 @@ version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5a972e5669d67ba988ce3dc826706fb0a8b01471c088cb0b6110b805cc36aed" +[[package]] +name = "walkdir" +version = "2.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56" +dependencies = [ + "same-file", + "winapi 0.3.9", + "winapi-util", +] + [[package]] name = "want" version = "0.3.0" diff --git a/meilisearch-http/Cargo.lock b/meilisearch-http/Cargo.lock index 0bdc739d5..b9bfd06ac 100644 --- a/meilisearch-http/Cargo.lock +++ b/meilisearch-http/Cargo.lock @@ -1827,22 +1827,6 @@ dependencies = [ "vergen", ] -[[package]] -name = "meilisearch-tokenizer" -version = "0.1.1" -source = "git+https://github.com/meilisearch/Tokenizer.git?branch=main#147b6154b1b34cb8f5da2df6a416b7da191bc850" -dependencies = [ - "character_converter", - "cow-utils", - "deunicode", - "fst", - "jieba-rs", - "once_cell", - "slice-group-by", - "unicode-segmentation", - "whatlang", -] - [[package]] name = "memchr" version = "2.3.4" diff --git a/meilisearch-http/Cargo.toml b/meilisearch-http/Cargo.toml index 3e04b876a..9ab386882 100644 --- a/meilisearch-http/Cargo.toml +++ b/meilisearch-http/Cargo.toml @@ -62,6 +62,7 @@ thiserror = "1.0.24" tokio = { version = "1", features = ["full"] } uuid = "0.8.2" oxidized-json-checker = "0.3.2" +walkdir = "2.3.2" [dependencies.sentry] default-features = false diff --git a/meilisearch-http/src/analytics.rs b/meilisearch-http/src/analytics.rs index 379a25030..a01cfabe0 100644 --- a/meilisearch-http/src/analytics.rs +++ b/meilisearch-http/src/analytics.rs @@ -8,6 +8,7 @@ use serde_qs as qs; use siphasher::sip::SipHasher; use walkdir::WalkDir; +use crate::helpers::EnvSizer; use crate::Data; use crate::Opt; @@ -33,12 +34,7 @@ impl EventProperties { } } - let database_size = WalkDir::new(&data.db_path) - .into_iter() - .filter_map(|entry| entry.ok()) - .filter_map(|entry| entry.metadata().ok()) - .filter(|metadata| metadata.is_file()) - .fold(0, |acc, m| acc + m.len()); + let database_size = data.env.size(); let last_update_timestamp = data.db.last_update(&reader)?.map(|u| u.timestamp()); @@ -116,7 +112,7 @@ pub fn analytics_sender(data: Data, opt: Opt) { time, app_version, user_properties, - event_properties + event_properties, }; let event = serde_json::to_string(&event).unwrap(); diff --git a/meilisearch-http/src/data/mod.rs b/meilisearch-http/src/data/mod.rs index 717d728fc..3aee18217 100644 --- a/meilisearch-http/src/data/mod.rs +++ b/meilisearch-http/src/data/mod.rs @@ -1,16 +1,18 @@ -pub mod search; -mod updates; - +use std::collections::HashMap; use std::ops::Deref; use std::sync::Arc; +use chrono::{DateTime, Utc}; use sha2::Digest; use crate::index::Settings; -use crate::index_controller::IndexController; +use crate::index_controller::{IndexController, IndexStats}; use crate::index_controller::{IndexMetadata, IndexSettings}; use crate::option::Opt; +pub mod search; +mod updates; + #[derive(Clone)] pub struct Data { inner: Arc, @@ -37,6 +39,13 @@ pub struct ApiKeys { pub master: Option, } +#[derive(Default)] +pub struct Stats { + pub database_size: u64, + pub last_update: Option>, + pub indexes: HashMap, +} + impl ApiKeys { pub fn generate_missing_api_keys(&mut self) { if let Some(master_key) = &self.master { @@ -104,6 +113,34 @@ impl Data { Ok(meta) } + pub async fn get_index_stats(&self, uid: String) -> anyhow::Result { + Ok(self.index_controller.get_stats(uid).await?) + } + + pub async fn get_stats(&self) -> anyhow::Result { + let mut stats = Stats::default(); + stats.database_size += self.index_controller.get_uuids_size().await?; + + for index in self.index_controller.list_indexes().await? { + let index_stats = self.index_controller.get_stats(index.uid.clone()).await?; + + stats.database_size += index_stats.size; + stats.database_size += self + .index_controller + .get_updates_size(index.uid.clone()) + .await?; + + stats.last_update = Some(match stats.last_update { + Some(last_update) => last_update.max(index.meta.updated_at), + None => index.meta.updated_at, + }); + + stats.indexes.insert(index.uid, index_stats); + } + + Ok(stats) + } + #[inline] pub fn http_payload_size_limit(&self) -> usize { self.options.http_payload_size_limit.get_bytes() as usize diff --git a/meilisearch-http/src/helpers/env.rs b/meilisearch-http/src/helpers/env.rs new file mode 100644 index 000000000..9bc81bc69 --- /dev/null +++ b/meilisearch-http/src/helpers/env.rs @@ -0,0 +1,16 @@ +use walkdir::WalkDir; + +pub trait EnvSizer { + fn size(&self) -> u64; +} + +impl EnvSizer for heed::Env { + fn size(&self) -> u64 { + WalkDir::new(self.path()) + .into_iter() + .filter_map(|entry| entry.ok()) + .filter_map(|entry| entry.metadata().ok()) + .filter(|metadata| metadata.is_file()) + .fold(0, |acc, m| acc + m.len()) + } +} diff --git a/meilisearch-http/src/helpers/mod.rs b/meilisearch-http/src/helpers/mod.rs index d1204908f..a5cddf29c 100644 --- a/meilisearch-http/src/helpers/mod.rs +++ b/meilisearch-http/src/helpers/mod.rs @@ -1,4 +1,6 @@ pub mod authentication; pub mod compression; +mod env; pub use authentication::Authentication; +pub use env::EnvSizer; diff --git a/meilisearch-http/src/index/mod.rs b/meilisearch-http/src/index/mod.rs index dfd2ebdc4..f8835eceb 100644 --- a/meilisearch-http/src/index/mod.rs +++ b/meilisearch-http/src/index/mod.rs @@ -1,6 +1,3 @@ -mod search; -mod updates; - use std::collections::{BTreeSet, HashSet}; use std::ops::Deref; use std::sync::Arc; @@ -11,6 +8,10 @@ use serde_json::{Map, Value}; pub use search::{SearchQuery, SearchResult, DEFAULT_SEARCH_LIMIT}; pub use updates::{Facets, Settings, UpdateResult}; +use crate::helpers::EnvSizer; + +mod search; +mod updates; pub type Document = Map; @@ -54,11 +55,7 @@ impl Index { let stop_words = self .stop_words(&txn)? .map(|stop_words| -> anyhow::Result> { - Ok(stop_words - .stream() - .into_strs()? - .into_iter() - .collect()) + Ok(stop_words.stream().into_strs()?.into_iter().collect()) }) .transpose()? .unwrap_or_else(BTreeSet::new); @@ -126,6 +123,10 @@ impl Index { } } + pub fn size(&self) -> u64 { + self.env.size() + } + fn fields_to_display>( &self, txn: &heed::RoTxn, diff --git a/meilisearch-http/src/index_controller/index_actor/actor.rs b/meilisearch-http/src/index_controller/index_actor/actor.rs index a4228227f..c54b2edd2 100644 --- a/meilisearch-http/src/index_controller/index_actor/actor.rs +++ b/meilisearch-http/src/index_controller/index_actor/actor.rs @@ -8,20 +8,24 @@ use futures::pin_mut; use futures::stream::StreamExt; use heed::CompactionOption; use log::debug; -use tokio::sync::mpsc; +use tokio::sync::{mpsc, RwLock}; use tokio::task::spawn_blocking; use uuid::Uuid; -use super::{IndexError, IndexMeta, IndexMsg, IndexSettings, IndexStore, Result, UpdateResult}; use crate::index::{Document, SearchQuery, SearchResult, Settings}; use crate::index_controller::update_handler::UpdateHandler; -use crate::index_controller::{get_arc_ownership_blocking, updates::Processing, UpdateMeta}; +use crate::index_controller::{ + get_arc_ownership_blocking, updates::Processing, IndexStats, UpdateMeta, +}; use crate::option::IndexerOpts; +use super::{IndexError, IndexMeta, IndexMsg, IndexSettings, IndexStore, Result, UpdateResult}; + pub struct IndexActor { read_receiver: Option>, write_receiver: Option>, update_handler: Arc, + processing: RwLock>, store: S, } @@ -39,8 +43,9 @@ impl IndexActor { Ok(Self { read_receiver, write_receiver, - store, update_handler, + processing: RwLock::new(None), + store, }) } @@ -146,6 +151,9 @@ impl IndexActor { Snapshot { uuid, path, ret } => { let _ = ret.send(self.handle_snapshot(uuid, path).await); } + GetStats { uuid, ret } => { + let _ = ret.send(self.handle_get_stats(uuid).await); + } } } @@ -175,16 +183,25 @@ impl IndexActor { meta: Processing, data: File, ) -> Result { - debug!("Processing update {}", meta.id()); - let uuid = meta.index_uuid(); - let update_handler = self.update_handler.clone(); - let index = match self.store.get(*uuid).await? { - Some(index) => index, - None => self.store.create(*uuid, None).await?, - }; - spawn_blocking(move || update_handler.handle_update(meta, data, index)) - .await - .map_err(|e| IndexError::Error(e.into())) + async fn get_result(actor: &IndexActor, meta: Processing, data: File) -> Result { + debug!("Processing update {}", meta.id()); + let uuid = *meta.index_uuid(); + let update_handler = actor.update_handler.clone(); + let index = match actor.store.get(uuid).await? { + Some(index) => index, + None => actor.store.create(uuid, None).await?, + }; + + spawn_blocking(move || update_handler.handle_update(meta, data, index)) + .await + .map_err(|e| IndexError::Error(e.into())) + } + + *self.processing.write().await = Some(meta.index_uuid().clone()); + let result = get_result(self, meta, data).await; + *self.processing.write().await = None; + + result } async fn handle_settings(&self, uuid: Uuid) -> Result { @@ -328,4 +345,28 @@ impl IndexActor { Ok(()) } + + async fn handle_get_stats(&self, uuid: Uuid) -> Result { + let index = self + .store + .get(uuid) + .await? + .ok_or(IndexError::UnexistingIndex)?; + + let processing = self.processing.read().await; + let is_indexing = *processing == Some(uuid); + + spawn_blocking(move || { + let rtxn = index.read_txn()?; + + Ok(IndexStats { + size: index.size(), + number_of_documents: index.number_of_documents(&rtxn)?, + is_indexing, + fields_distribution: index.fields_distribution(&rtxn)?, + }) + }) + .await + .map_err(|e| IndexError::Error(e.into()))? + } } diff --git a/meilisearch-http/src/index_controller/index_actor/handle_impl.rs b/meilisearch-http/src/index_controller/index_actor/handle_impl.rs index dba0f9e60..93406c13b 100644 --- a/meilisearch-http/src/index_controller/index_actor/handle_impl.rs +++ b/meilisearch-http/src/index_controller/index_actor/handle_impl.rs @@ -3,12 +3,13 @@ use std::path::{Path, PathBuf}; use tokio::sync::{mpsc, oneshot}; use uuid::Uuid; +use crate::index::{Document, SearchQuery, SearchResult, Settings}; +use crate::index_controller::{updates::Processing, UpdateMeta}; +use crate::index_controller::{IndexSettings, IndexStats}; + use super::{ IndexActor, IndexActorHandle, IndexMeta, IndexMsg, MapIndexStore, Result, UpdateResult, }; -use crate::index::{Document, SearchQuery, SearchResult, Settings}; -use crate::index_controller::IndexSettings; -use crate::index_controller::{updates::Processing, UpdateMeta}; #[derive(Clone)] pub struct IndexActorHandleImpl { @@ -121,6 +122,13 @@ impl IndexActorHandle for IndexActorHandleImpl { let _ = self.read_sender.send(msg).await; Ok(receiver.await.expect("IndexActor has been killed")?) } + + async fn get_index_stats(&self, uuid: Uuid) -> Result { + let (ret, receiver) = oneshot::channel(); + let msg = IndexMsg::GetStats { uuid, ret }; + let _ = self.read_sender.send(msg).await; + Ok(receiver.await.expect("IndexActor has been killed")?) + } } impl IndexActorHandleImpl { diff --git a/meilisearch-http/src/index_controller/index_actor/message.rs b/meilisearch-http/src/index_controller/index_actor/message.rs index 46d7f6214..6da0f8628 100644 --- a/meilisearch-http/src/index_controller/index_actor/message.rs +++ b/meilisearch-http/src/index_controller/index_actor/message.rs @@ -3,9 +3,10 @@ use std::path::PathBuf; use tokio::sync::oneshot; use uuid::Uuid; -use super::{IndexMeta, IndexSettings, Result, UpdateResult}; use crate::index::{Document, SearchQuery, SearchResult, Settings}; -use crate::index_controller::{updates::Processing, UpdateMeta}; +use crate::index_controller::{updates::Processing, IndexStats, UpdateMeta}; + +use super::{IndexMeta, IndexSettings, Result, UpdateResult}; pub enum IndexMsg { CreateIndex { @@ -58,4 +59,8 @@ pub enum IndexMsg { path: PathBuf, ret: oneshot::Sender>, }, + GetStats { + uuid: Uuid, + ret: oneshot::Sender>, + }, } diff --git a/meilisearch-http/src/index_controller/index_actor/mod.rs b/meilisearch-http/src/index_controller/index_actor/mod.rs index 2dc856b80..426eb29e4 100644 --- a/meilisearch-http/src/index_controller/index_actor/mod.rs +++ b/meilisearch-http/src/index_controller/index_actor/mod.rs @@ -1,30 +1,30 @@ -mod actor; -mod handle_impl; -mod message; -mod store; - use std::path::PathBuf; use chrono::{DateTime, Utc}; +#[cfg(test)] +use mockall::automock; use serde::{Deserialize, Serialize}; use thiserror::Error; use uuid::Uuid; -use super::IndexSettings; +use actor::IndexActor; +pub use handle_impl::IndexActorHandleImpl; +use message::IndexMsg; +use store::{IndexStore, MapIndexStore}; + use crate::index::UpdateResult as UResult; use crate::index::{Document, Index, SearchQuery, SearchResult, Settings}; use crate::index_controller::{ updates::{Failed, Processed, Processing}, - UpdateMeta, + IndexStats, UpdateMeta, }; -use actor::IndexActor; -use message::IndexMsg; -use store::{IndexStore, MapIndexStore}; -pub use handle_impl::IndexActorHandleImpl; +use super::IndexSettings; -#[cfg(test)] -use mockall::automock; +mod actor; +mod handle_impl; +mod message; +mod store; pub type Result = std::result::Result; type UpdateResult = std::result::Result, Failed>; @@ -33,7 +33,7 @@ type UpdateResult = std::result::Result, Failed, - updated_at: DateTime, + pub updated_at: DateTime, primary_key: Option, } @@ -98,4 +98,5 @@ pub trait IndexActorHandle { async fn get_index_meta(&self, uuid: Uuid) -> Result; async fn update_index(&self, uuid: Uuid, index_settings: IndexSettings) -> Result; async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> Result<()>; + async fn get_index_stats(&self, uuid: Uuid) -> Result; } diff --git a/meilisearch-http/src/index_controller/mod.rs b/meilisearch-http/src/index_controller/mod.rs index b26ab8828..8361c45cc 100644 --- a/meilisearch-http/src/index_controller/mod.rs +++ b/meilisearch-http/src/index_controller/mod.rs @@ -1,10 +1,3 @@ -mod index_actor; -mod snapshot; -mod update_actor; -mod update_handler; -mod updates; -mod uuid_resolver; - use std::path::Path; use std::sync::Arc; use std::time::Duration; @@ -14,33 +7,40 @@ use anyhow::bail; use futures::stream::StreamExt; use log::info; use milli::update::{IndexDocumentsMethod, UpdateFormat}; +use milli::FieldsDistribution; use serde::{Deserialize, Serialize}; use tokio::sync::mpsc; use tokio::time::sleep; use uuid::Uuid; +use index_actor::IndexActorHandle; +use snapshot::load_snapshot; +use snapshot::SnapshotService; +use update_actor::UpdateActorHandle; +pub use updates::{Failed, Processed, Processing}; +use uuid_resolver::UuidError; +use uuid_resolver::UuidResolverHandle; + use crate::index::{Document, SearchQuery, SearchResult}; use crate::index::{Facets, Settings, UpdateResult}; use crate::option::Opt; -use index_actor::IndexActorHandle; -use snapshot::load_snapshot; -use update_actor::UpdateActorHandle; -use uuid_resolver::UuidResolverHandle; - -use snapshot::SnapshotService; -pub use updates::{Failed, Processed, Processing}; -use uuid_resolver::UuidError; +mod index_actor; +mod snapshot; +mod update_actor; +mod update_handler; +mod updates; +mod uuid_resolver; pub type UpdateStatus = updates::UpdateStatus; #[derive(Debug, Serialize, Deserialize, Clone)] #[serde(rename_all = "camelCase")] pub struct IndexMetadata { - uid: String, + pub uid: String, name: String, #[serde(flatten)] - meta: index_actor::IndexMeta, + pub meta: index_actor::IndexMeta, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -63,6 +63,14 @@ pub struct IndexSettings { pub primary_key: Option, } +#[derive(Clone, Debug)] +pub struct IndexStats { + pub size: u64, + pub number_of_documents: u64, + pub is_indexing: bool, + pub fields_distribution: FieldsDistribution, +} + pub struct IndexController { uuid_resolver: uuid_resolver::UuidResolverHandleImpl, index_handle: index_actor::IndexActorHandleImpl, @@ -100,10 +108,11 @@ impl IndexController { update_handle.clone(), Duration::from_secs(options.snapshot_interval_sec), options.snapshot_dir.clone(), - options.db_path - .file_name() - .map(|n| n.to_owned().into_string().expect("invalid path")) - .unwrap_or_else(|| String::from("data.ms")), + options + .db_path + .file_name() + .map(|n| n.to_owned().into_string().expect("invalid path")) + .unwrap_or_else(|| String::from("data.ms")), ); tokio::task::spawn(snapshot_service.run()); @@ -341,6 +350,22 @@ impl IndexController { }; Ok(meta) } + + pub async fn get_stats(&self, uid: String) -> anyhow::Result { + let uuid = self.uuid_resolver.get(uid.clone()).await?; + + Ok(self.index_handle.get_index_stats(uuid).await?) + } + + pub async fn get_updates_size(&self, uid: String) -> anyhow::Result { + let uuid = self.uuid_resolver.get(uid.clone()).await?; + + Ok(self.update_handle.get_size(uuid).await?) + } + + pub async fn get_uuids_size(&self) -> anyhow::Result { + Ok(self.uuid_resolver.get_size().await?) + } } pub async fn get_arc_ownership_blocking(mut item: Arc) -> T { diff --git a/meilisearch-http/src/index_controller/update_actor/actor.rs b/meilisearch-http/src/index_controller/update_actor/actor.rs index d87b910d6..f725dda84 100644 --- a/meilisearch-http/src/index_controller/update_actor/actor.rs +++ b/meilisearch-http/src/index_controller/update_actor/actor.rs @@ -8,10 +8,11 @@ use tokio::io::{AsyncSeekExt, AsyncWriteExt}; use tokio::sync::mpsc; use uuid::Uuid; -use super::{PayloadData, Result, UpdateError, UpdateMsg, UpdateStoreStore}; use crate::index_controller::index_actor::IndexActorHandle; use crate::index_controller::{get_arc_ownership_blocking, UpdateMeta, UpdateStatus}; +use super::{PayloadData, Result, UpdateError, UpdateMsg, UpdateStoreStore}; + pub struct UpdateActor { path: PathBuf, store: S, @@ -72,6 +73,9 @@ where Some(Snapshot { uuid, path, ret }) => { let _ = ret.send(self.handle_snapshot(uuid, path).await); } + Some(GetSize { uuid, ret }) => { + let _ = ret.send(self.handle_get_size(uuid).await); + } None => break, } } @@ -223,4 +227,20 @@ where Ok(()) } + + async fn handle_get_size(&self, uuid: Uuid) -> Result { + let size = match self.store.get(uuid).await? { + Some(update_store) => tokio::task::spawn_blocking(move || -> anyhow::Result { + let txn = update_store.env.read_txn()?; + + update_store.get_size(&txn) + }) + .await + .map_err(|e| UpdateError::Error(e.into()))? + .map_err(|e| UpdateError::Error(e.into()))?, + None => 0, + }; + + Ok(size) + } } diff --git a/meilisearch-http/src/index_controller/update_actor/handle_impl.rs b/meilisearch-http/src/index_controller/update_actor/handle_impl.rs index 59f67fbe0..860cc2bc8 100644 --- a/meilisearch-http/src/index_controller/update_actor/handle_impl.rs +++ b/meilisearch-http/src/index_controller/update_actor/handle_impl.rs @@ -3,11 +3,12 @@ use std::path::{Path, PathBuf}; use tokio::sync::{mpsc, oneshot}; use uuid::Uuid; +use crate::index_controller::IndexActorHandle; + use super::{ MapUpdateStoreStore, PayloadData, Result, UpdateActor, UpdateActorHandle, UpdateMeta, UpdateMsg, UpdateStatus, }; -use crate::index_controller::IndexActorHandle; #[derive(Clone)] pub struct UpdateActorHandleImpl { @@ -36,6 +37,7 @@ where Ok(Self { sender }) } } + #[async_trait::async_trait] impl UpdateActorHandle for UpdateActorHandleImpl where @@ -43,29 +45,12 @@ where { type Data = D; - async fn update( - &self, - meta: UpdateMeta, - data: mpsc::Receiver>, - uuid: Uuid, - ) -> Result { - let (ret, receiver) = oneshot::channel(); - let msg = UpdateMsg::Update { - uuid, - data, - meta, - ret, - }; - let _ = self.sender.send(msg).await; - receiver.await.expect("update actor killed.") - } async fn get_all_updates_status(&self, uuid: Uuid) -> Result> { let (ret, receiver) = oneshot::channel(); let msg = UpdateMsg::ListUpdates { uuid, ret }; let _ = self.sender.send(msg).await; receiver.await.expect("update actor killed.") } - async fn update_status(&self, uuid: Uuid, id: u64) -> Result { let (ret, receiver) = oneshot::channel(); let msg = UpdateMsg::GetUpdate { uuid, id, ret }; @@ -93,4 +78,28 @@ where let _ = self.sender.send(msg).await; receiver.await.expect("update actor killed.") } + + async fn get_size(&self, uuid: Uuid) -> Result { + let (ret, receiver) = oneshot::channel(); + let msg = UpdateMsg::GetSize { uuid, ret }; + let _ = self.sender.send(msg).await; + receiver.await.expect("update actor killed.") + } + + async fn update( + &self, + meta: UpdateMeta, + data: mpsc::Receiver>, + uuid: Uuid, + ) -> Result { + let (ret, receiver) = oneshot::channel(); + let msg = UpdateMsg::Update { + uuid, + data, + meta, + ret, + }; + let _ = self.sender.send(msg).await; + receiver.await.expect("update actor killed.") + } } diff --git a/meilisearch-http/src/index_controller/update_actor/message.rs b/meilisearch-http/src/index_controller/update_actor/message.rs index 8e6e3c212..f8150c00a 100644 --- a/meilisearch-http/src/index_controller/update_actor/message.rs +++ b/meilisearch-http/src/index_controller/update_actor/message.rs @@ -34,4 +34,8 @@ pub enum UpdateMsg { path: PathBuf, ret: oneshot::Sender>, }, + GetSize { + uuid: Uuid, + ret: oneshot::Sender>, + }, } diff --git a/meilisearch-http/src/index_controller/update_actor/mod.rs b/meilisearch-http/src/index_controller/update_actor/mod.rs index f3c3caf04..228b47b02 100644 --- a/meilisearch-http/src/index_controller/update_actor/mod.rs +++ b/meilisearch-http/src/index_controller/update_actor/mod.rs @@ -46,6 +46,7 @@ pub trait UpdateActorHandle { async fn delete(&self, uuid: Uuid) -> Result<()>; async fn create(&self, uuid: Uuid) -> Result<()>; async fn snapshot(&self, uuid: Uuid, path: PathBuf) -> Result<()>; + async fn get_size(&self, uuid: Uuid) -> Result; async fn update( &self, meta: UpdateMeta, diff --git a/meilisearch-http/src/index_controller/update_actor/update_store.rs b/meilisearch-http/src/index_controller/update_actor/update_store.rs index a083eb186..3d6c4e396 100644 --- a/meilisearch-http/src/index_controller/update_actor/update_store.rs +++ b/meilisearch-http/src/index_controller/update_actor/update_store.rs @@ -1,3 +1,4 @@ +use std::fs::File; use std::fs::{copy, create_dir_all, remove_file}; use std::path::{Path, PathBuf}; use std::sync::Arc; @@ -6,10 +7,10 @@ use heed::types::{DecodeIgnore, OwnedType, SerdeJson}; use heed::{CompactionOption, Database, Env, EnvOpenOptions}; use parking_lot::{Mutex, RwLock}; use serde::{Deserialize, Serialize}; -use std::fs::File; use tokio::sync::mpsc; use uuid::Uuid; +use crate::helpers::EnvSizer; use crate::index_controller::updates::*; type BEU64 = heed::zerocopy::U64; @@ -409,4 +410,18 @@ where Ok(()) } + + pub fn get_size(&self, txn: &heed::RoTxn) -> anyhow::Result { + let mut size = self.env.size(); + + for path in self.pending.iter(txn)? { + let (_, path) = path?; + + if let Ok(metadata) = path.metadata() { + size += metadata.len() + } + } + + Ok(size) + } } diff --git a/meilisearch-http/src/index_controller/update_handler.rs b/meilisearch-http/src/index_controller/update_handler.rs index 17f7107a2..1eb622cbf 100644 --- a/meilisearch-http/src/index_controller/update_handler.rs +++ b/meilisearch-http/src/index_controller/update_handler.rs @@ -39,7 +39,7 @@ impl UpdateHandler { }) } - fn update_buidler(&self, update_id: u64) -> UpdateBuilder { + fn update_builder(&self, update_id: u64) -> UpdateBuilder { // We prepare the update by using the update builder. let mut update_builder = UpdateBuilder::new(update_id); if let Some(max_nb_chunks) = self.max_nb_chunks { @@ -67,7 +67,7 @@ impl UpdateHandler { let update_id = meta.id(); - let update_builder = self.update_buidler(update_id); + let update_builder = self.update_builder(update_id); let result = match meta.meta() { DocumentsAddition { diff --git a/meilisearch-http/src/index_controller/uuid_resolver/actor.rs b/meilisearch-http/src/index_controller/uuid_resolver/actor.rs index d5cde13e7..27ffaa05e 100644 --- a/meilisearch-http/src/index_controller/uuid_resolver/actor.rs +++ b/meilisearch-http/src/index_controller/uuid_resolver/actor.rs @@ -41,6 +41,9 @@ impl UuidResolverActor { Some(SnapshotRequest { path, ret }) => { let _ = ret.send(self.handle_snapshot(path).await); } + Some(GetSize { ret }) => { + let _ = ret.send(self.handle_get_size().await); + } // all senders have been dropped, need to quit. None => break, } @@ -86,6 +89,10 @@ impl UuidResolverActor { self.store.insert(uid, uuid).await?; Ok(()) } + + async fn handle_get_size(&self) -> Result { + self.store.get_size().await + } } fn is_index_uid_valid(uid: &str) -> bool { diff --git a/meilisearch-http/src/index_controller/uuid_resolver/handle_impl.rs b/meilisearch-http/src/index_controller/uuid_resolver/handle_impl.rs index f8625b379..c522e87e6 100644 --- a/meilisearch-http/src/index_controller/uuid_resolver/handle_impl.rs +++ b/meilisearch-http/src/index_controller/uuid_resolver/handle_impl.rs @@ -75,4 +75,13 @@ impl UuidResolverHandle for UuidResolverHandleImpl { .await .expect("Uuid resolver actor has been killed")?) } + + async fn get_size(&self) -> Result { + let (ret, receiver) = oneshot::channel(); + let msg = UuidResolveMsg::GetSize { ret }; + let _ = self.sender.send(msg).await; + Ok(receiver + .await + .expect("Uuid resolver actor has been killed")?) + } } diff --git a/meilisearch-http/src/index_controller/uuid_resolver/message.rs b/meilisearch-http/src/index_controller/uuid_resolver/message.rs index 975c709b3..e7d29f05f 100644 --- a/meilisearch-http/src/index_controller/uuid_resolver/message.rs +++ b/meilisearch-http/src/index_controller/uuid_resolver/message.rs @@ -4,6 +4,7 @@ use tokio::sync::oneshot; use uuid::Uuid; use super::Result; + pub enum UuidResolveMsg { Get { uid: String, @@ -29,4 +30,7 @@ pub enum UuidResolveMsg { path: PathBuf, ret: oneshot::Sender>>, }, + GetSize { + ret: oneshot::Sender>, + }, } diff --git a/meilisearch-http/src/index_controller/uuid_resolver/mod.rs b/meilisearch-http/src/index_controller/uuid_resolver/mod.rs index 43cd9995b..33a089ddb 100644 --- a/meilisearch-http/src/index_controller/uuid_resolver/mod.rs +++ b/meilisearch-http/src/index_controller/uuid_resolver/mod.rs @@ -30,6 +30,7 @@ pub trait UuidResolverHandle { async fn delete(&self, name: String) -> anyhow::Result; async fn list(&self) -> anyhow::Result>; async fn snapshot(&self, path: PathBuf) -> Result>; + async fn get_size(&self) -> Result; } #[derive(Debug, Error)] diff --git a/meilisearch-http/src/index_controller/uuid_resolver/store.rs b/meilisearch-http/src/index_controller/uuid_resolver/store.rs index 435314911..1f057830b 100644 --- a/meilisearch-http/src/index_controller/uuid_resolver/store.rs +++ b/meilisearch-http/src/index_controller/uuid_resolver/store.rs @@ -8,6 +8,7 @@ use heed::{ use uuid::Uuid; use super::{Result, UuidError, UUID_STORE_SIZE}; +use crate::helpers::EnvSizer; #[async_trait::async_trait] pub trait UuidStore { @@ -19,6 +20,7 @@ pub trait UuidStore { async fn list(&self) -> Result>; async fn insert(&self, name: String, uuid: Uuid) -> Result<()>; async fn snapshot(&self, path: PathBuf) -> Result>; + async fn get_size(&self) -> Result; } pub struct HeedUuidStore { @@ -151,4 +153,8 @@ impl UuidStore for HeedUuidStore { }) .await? } + + async fn get_size(&self) -> Result { + Ok(self.env.size()) + } } diff --git a/meilisearch-http/src/routes/stats.rs b/meilisearch-http/src/routes/stats.rs index 108c67ca9..7434f1727 100644 --- a/meilisearch-http/src/routes/stats.rs +++ b/meilisearch-http/src/routes/stats.rs @@ -1,4 +1,5 @@ -use std::collections::{BTreeMap, HashMap}; +use std::collections::BTreeMap; +use std::iter::FromIterator; use actix_web::get; use actix_web::web; @@ -6,13 +7,15 @@ use actix_web::HttpResponse; use chrono::{DateTime, Utc}; use serde::Serialize; +use crate::data::Stats; use crate::error::ResponseError; use crate::helpers::Authentication; +use crate::index_controller::IndexStats; use crate::routes::IndexParam; use crate::Data; pub fn services(cfg: &mut web::ServiceConfig) { - cfg.service(index_stats) + cfg.service(get_index_stats) .service(get_stats) .service(get_version); } @@ -22,28 +25,56 @@ pub fn services(cfg: &mut web::ServiceConfig) { struct IndexStatsResponse { number_of_documents: u64, is_indexing: bool, - fields_distribution: BTreeMap, + fields_distribution: BTreeMap, +} + +impl From for IndexStatsResponse { + fn from(stats: IndexStats) -> Self { + Self { + number_of_documents: stats.number_of_documents, + is_indexing: stats.is_indexing, + fields_distribution: BTreeMap::from_iter(stats.fields_distribution.into_iter()), + } + } } #[get("/indexes/{index_uid}/stats", wrap = "Authentication::Private")] -async fn index_stats( - _data: web::Data, - _path: web::Path, +async fn get_index_stats( + data: web::Data, + path: web::Path, ) -> Result { - todo!() + let response: IndexStatsResponse = data.get_index_stats(path.index_uid.clone()).await?.into(); + + Ok(HttpResponse::Ok().json(response)) } #[derive(Serialize)] #[serde(rename_all = "camelCase")] -struct StatsResult { +struct StatsResponse { database_size: u64, last_update: Option>, - indexes: HashMap, + indexes: BTreeMap, +} + +impl From for StatsResponse { + fn from(stats: Stats) -> Self { + Self { + database_size: stats.database_size, + last_update: stats.last_update, + indexes: stats + .indexes + .into_iter() + .map(|(uid, index_stats)| (uid, index_stats.into())) + .collect(), + } + } } #[get("/stats", wrap = "Authentication::Private")] -async fn get_stats(_data: web::Data) -> Result { - todo!() +async fn get_stats(data: web::Data) -> Result { + let response: StatsResponse = data.get_stats().await?.into(); + + Ok(HttpResponse::Ok().json(response)) } #[derive(Serialize)] @@ -58,11 +89,11 @@ struct VersionResponse { async fn get_version() -> HttpResponse { let commit_sha = match option_env!("COMMIT_SHA") { Some("") | None => env!("VERGEN_SHA"), - Some(commit_sha) => commit_sha + Some(commit_sha) => commit_sha, }; let commit_date = match option_env!("COMMIT_DATE") { Some("") | None => env!("VERGEN_COMMIT_DATE"), - Some(commit_date) => commit_date + Some(commit_date) => commit_date, }; HttpResponse::Ok().json(VersionResponse { diff --git a/meilisearch-http/tests/common/index.rs b/meilisearch-http/tests/common/index.rs index 78ad846de..67ea6c19a 100644 --- a/meilisearch-http/tests/common/index.rs +++ b/meilisearch-http/tests/common/index.rs @@ -161,6 +161,11 @@ impl Index<'_> { let url = format!("/indexes/{}/settings", self.uid); self.service.delete(url).await } + + pub async fn stats(&self) -> (Value, StatusCode) { + let url = format!("/indexes/{}/stats", self.uid); + self.service.get(url).await + } } pub struct GetDocumentOptions; diff --git a/meilisearch-http/tests/common/server.rs b/meilisearch-http/tests/common/server.rs index 4655b10a8..e814ab3ed 100644 --- a/meilisearch-http/tests/common/server.rs +++ b/meilisearch-http/tests/common/server.rs @@ -58,6 +58,10 @@ impl Server { pub async fn version(&self) -> (Value, StatusCode) { self.service.get("/version").await } + + pub async fn stats(&self) -> (Value, StatusCode) { + self.service.get("/stats").await + } } pub fn default_settings(dir: impl AsRef) -> Opt { diff --git a/meilisearch-http/tests/index/mod.rs b/meilisearch-http/tests/index/mod.rs index c9804c160..9996df2e7 100644 --- a/meilisearch-http/tests/index/mod.rs +++ b/meilisearch-http/tests/index/mod.rs @@ -1,4 +1,5 @@ mod create_index; mod delete_index; mod get_index; +mod stats; mod update_index; diff --git a/meilisearch-http/tests/index/stats.rs b/meilisearch-http/tests/index/stats.rs new file mode 100644 index 000000000..e1d8bd211 --- /dev/null +++ b/meilisearch-http/tests/index/stats.rs @@ -0,0 +1,53 @@ +use serde_json::json; + +use crate::common::Server; + +#[actix_rt::test] +async fn stats() { + let server = Server::new().await; + let index = server.index("test"); + let (_, code) = index.create(Some("id")).await; + + assert_eq!(code, 200); + + let (response, code) = index.stats().await; + + assert_eq!(code, 200); + assert_eq!(response["numberOfDocuments"], 0); + assert_eq!(response["isIndexing"], false); + assert!(response["fieldsDistribution"] + .as_object() + .unwrap() + .is_empty()); + + let documents = json!([ + { + "id": 1, + "name": "Alexey", + }, + { + "id": 2, + "age": 45, + } + ]); + + let (response, code) = index.add_documents(documents, None).await; + assert_eq!(code, 202); + assert_eq!(response["updateId"], 0); + + let (response, code) = index.stats().await; + + assert_eq!(code, 200); + assert_eq!(response["isIndexing"], true); + + index.wait_update_id(0).await; + + let (response, code) = index.stats().await; + + assert_eq!(code, 200); + assert_eq!(response["numberOfDocuments"], 2); + assert_eq!(response["isIndexing"], false); + assert_eq!(response["fieldsDistribution"]["id"], 2); + assert_eq!(response["fieldsDistribution"]["name"], 1); + assert_eq!(response["fieldsDistribution"]["age"], 1); +} diff --git a/meilisearch-http/tests/stats/mod.rs b/meilisearch-http/tests/stats/mod.rs index db04fb1c0..ee10f9708 100644 --- a/meilisearch-http/tests/stats/mod.rs +++ b/meilisearch-http/tests/stats/mod.rs @@ -1,3 +1,5 @@ +use serde_json::json; + use crate::common::Server; #[actix_rt::test] @@ -19,3 +21,56 @@ async fn test_healthyness() { assert_eq!(status_code, 200); assert_eq!(response["status"], "available"); } + +#[actix_rt::test] +async fn stats() { + let server = Server::new().await; + let index = server.index("test"); + let (_, code) = index.create(Some("id")).await; + + assert_eq!(code, 200); + + let (response, code) = server.stats().await; + + assert_eq!(code, 200); + assert!(response.get("databaseSize").is_some()); + assert!(response.get("lastUpdate").is_some()); + assert!(response["indexes"].get("test").is_some()); + assert_eq!(response["indexes"]["test"]["numberOfDocuments"], 0); + assert_eq!(response["indexes"]["test"]["isIndexing"], false); + + let last_update = response["lastUpdate"].as_str().unwrap(); + + let documents = json!([ + { + "id": 1, + "name": "Alexey", + }, + { + "id": 2, + "age": 45, + } + ]); + + let (response, code) = index.add_documents(documents, None).await; + assert_eq!(code, 202); + assert_eq!(response["updateId"], 0); + + let (response, code) = server.stats().await; + + assert_eq!(code, 200); + assert_eq!(response["indexes"]["test"]["isIndexing"], true); + + index.wait_update_id(0).await; + + let (response, code) = server.stats().await; + + assert_eq!(code, 200); + assert!(response["databaseSize"].as_u64().unwrap() > 0); + assert!(response["lastUpdate"].as_str().unwrap() > last_update); + assert_eq!(response["indexes"]["test"]["numberOfDocuments"], 2); + assert_eq!(response["indexes"]["test"]["isIndexing"], false); + assert_eq!(response["indexes"]["test"]["fieldsDistribution"]["id"], 2); + assert_eq!(response["indexes"]["test"]["fieldsDistribution"]["name"], 1); + assert_eq!(response["indexes"]["test"]["fieldsDistribution"]["age"], 1); +}