From e226b1a87fd4b5b902fe42270827b3237c6f4739 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 12 Oct 2021 11:22:44 +0200 Subject: [PATCH 01/68] rewrite the main analytics module and the information sent in the tick --- Cargo.lock | 15 +- meilisearch-http/Cargo.toml | 5 +- meilisearch-http/src/analytics.rs | 252 +++++++++++++++++------------- meilisearch-http/src/main.rs | 5 +- 4 files changed, 163 insertions(+), 114 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index fe025ebe1..36849a58f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1658,6 +1658,7 @@ dependencies = [ "regex", "reqwest", "rustls", + "segment", "serde", "serde_json", "serde_url_params", @@ -1677,7 +1678,6 @@ dependencies = [ "uuid", "vergen", "walkdir", - "whoami", "zip", ] @@ -2540,6 +2540,19 @@ dependencies = [ "untrusted", ] +[[package]] +name = "segment" +version = "0.1.1" +source = "git+https://github.com/meilisearch/segment#656b91e1f7a2c6443e2a8ed59f8942400e9a811e" +dependencies = [ + "async-trait", + "chrono", + "reqwest", + "serde", + "serde_json", + "thiserror", +] + [[package]] name = "semver" version = "0.9.0" diff --git a/meilisearch-http/Cargo.toml b/meilisearch-http/Cargo.toml index 394892db9..ffc660c80 100644 --- a/meilisearch-http/Cargo.toml +++ b/meilisearch-http/Cargo.toml @@ -55,6 +55,7 @@ rand = "0.8.4" rayon = "1.5.1" regex = "1.5.4" rustls = "0.19.1" +segment = { git = "https://github.com/meilisearch/segment", optional = true } serde = { version = "1.0.130", features = ["derive"] } serde_json = { version = "1.0.67", features = ["preserve_order"] } sha2 = "0.9.6" @@ -69,8 +70,6 @@ uuid = { version = "0.8.2", features = ["serde"] } walkdir = "2.3.2" obkv = "0.2.0" pin-project = "1.0.8" -whoami = { version = "1.1.3", optional = true } -reqwest = { version = "0.11.4", features = ["json", "rustls-tls"], default-features = false, optional = true } sysinfo = "0.20.2" tokio-stream = "0.1.7" @@ -91,7 +90,7 @@ mini-dashboard = [ "tempfile", "zip", ] -analytics = ["whoami", "reqwest"] +analytics = ["segment"] default = ["analytics", "mini-dashboard"] [target.'cfg(target_os = "linux")'.dependencies] diff --git a/meilisearch-http/src/analytics.rs b/meilisearch-http/src/analytics.rs index 596b69aa0..41f487bb4 100644 --- a/meilisearch-http/src/analytics.rs +++ b/meilisearch-http/src/analytics.rs @@ -1,126 +1,164 @@ -use std::hash::{Hash, Hasher}; -use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH}; - -use log::debug; +use meilisearch_lib::index_controller::Stats; use meilisearch_lib::MeiliSearch; -use serde::Serialize; -use siphasher::sip::SipHasher; +use once_cell::sync::Lazy; +use segment::message::{Identify, Track, User}; +use segment::{AutoBatcher, Batcher, HttpClient}; +use serde_json::{json, Value}; +use std::fmt::Display; +use std::time::{Duration, Instant}; +use sysinfo::DiskExt; +use sysinfo::ProcessorExt; +use sysinfo::System; +use sysinfo::SystemExt; +use tokio::sync::Mutex; +use uuid::Uuid; use crate::Opt; -const AMPLITUDE_API_KEY: &str = "f7fba398780e06d8fe6666a9be7e3d47"; +const SEGMENT_API_KEY: &str = "vHi89WrNDckHSQssyUJqLvIyp2QFITSC"; -#[derive(Debug, Serialize)] -struct EventProperties { - database_size: u64, - last_update_timestamp: Option, //timestamp - number_of_documents: Vec, +pub struct Analytics { + user: User, + opt: Opt, + batcher: Mutex, } -impl EventProperties { - async fn from(data: MeiliSearch) -> anyhow::Result { - let stats = data.get_all_stats().await?; +impl Analytics { + pub fn publish(&'static self, event_name: String, send: Value) { + tokio::spawn(async move { + let _ = self + .batcher + .lock() + .await + .push(Track { + user: self.user.clone(), + event: event_name.clone(), + properties: send, + ..Default::default() + }) + .await; + println!("ANALYTICS: {} added to batch", event_name) + }); + } - let database_size = stats.database_size; - let last_update_timestamp = stats.last_update.map(|u| u.timestamp()); + pub fn tick(&'static self, meilisearch: MeiliSearch) { + tokio::spawn(async move { + loop { + tokio::time::sleep(Duration::from_secs(60)).await; // 1 minutes + println!("ANALYTICS: should do things"); + + if let Ok(stats) = meilisearch.get_all_stats().await { + let traits = Self::compute_traits(&self.opt, stats); + let user = self.user.clone(); + println!("ANALYTICS: Pushing our identify tick"); + let _ = self + .batcher + .lock() + .await + .push(Identify { + user, + traits, + ..Default::default() + }) + .await; + } + println!("ANALYTICS: Pushing our batch"); + let _ = self.batcher.lock().await.flush().await; + } + }); + } +} + +impl Analytics { + pub async fn new(opt: &Opt, meilisearch: &MeiliSearch) -> &'static Self { + let user_id = std::fs::read_to_string(opt.db_path.join("user-id")); + let first_time_run = user_id.is_err(); + let user_id = user_id.unwrap_or(Uuid::new_v4().to_string()); + let _ = std::fs::write(opt.db_path.join("user-id"), user_id.as_bytes()); + let client = HttpClient::default(); + let user = User::UserId { + user_id: user_id.clone(), + }; + let batcher = Batcher::new(None); + let batcher = Mutex::new(AutoBatcher::new( + client, + batcher, + SEGMENT_API_KEY.to_string(), + )); + let segment = Box::new(Self { + user, + opt: opt.clone(), + batcher, + }); + let segment = Box::leak(segment); + + // send an identify event + let _ = segment + .batcher + .lock() + .await + .push(Identify { + user: segment.user.clone(), + // TODO: TAMO: what should we do when meilisearch is broken at start + traits: Self::compute_traits( + &segment.opt, + meilisearch.get_all_stats().await.unwrap(), + ), + ..Default::default() + }) + .await; + println!("ANALYTICS: pushed the identify event"); + + // send the associated track event + if first_time_run { + segment.publish("Launched for the first time".to_string(), json!({})); + } + + // start the runtime tick + segment.tick(meilisearch.clone()); + + segment + } + + fn compute_traits(opt: &Opt, stats: Stats) -> Value { + static FIRST_START_TIMESTAMP: Lazy = Lazy::new(|| Instant::now()); + static SYSTEM: Lazy = Lazy::new(|| { + let mut sys = System::new_all(); + sys.refresh_all(); + json!({ + "distribution": sys.name().zip(sys.kernel_version()).map(|(name, version)| format!("{}: {}", name, version)), + "core_number": sys.processors().len(), + "ram_size": sys.total_memory(), + "frequency": sys.processors().iter().map(|cpu| cpu.frequency()).sum::() / sys.processors().len() as u64, + "disk_size": sys.disks().iter().map(|disk| disk.available_space()).max(), + "server_provider": std::env::var("MEILI_SERVER_PROVIDER").ok(), + }) + }); let number_of_documents = stats .indexes .values() .map(|index| index.number_of_documents) - .collect(); + .collect::>(); - Ok(EventProperties { - database_size, - last_update_timestamp, - number_of_documents, + json!({ + "system": *SYSTEM, + "stats": { + "database_size": stats.database_size, + "indexes_number": stats.indexes.len(), + "documents_number": number_of_documents, + }, + "infos": { + "version": env!("CARGO_PKG_VERSION").to_string(), + "env": opt.env.clone(), + "snapshot": opt.schedule_snapshot, + "start_since_days": FIRST_START_TIMESTAMP.elapsed().as_secs() / 60 * 60 * 24, // one day + }, }) } } -#[derive(Debug, Serialize)] -struct UserProperties<'a> { - env: &'a str, - start_since_days: u64, - user_email: Option, - server_provider: Option, -} - -#[derive(Debug, Serialize)] -struct Event<'a> { - user_id: &'a str, - event_type: &'a str, - device_id: &'a str, - time: u64, - app_version: &'a str, - user_properties: UserProperties<'a>, - event_properties: Option, -} - -#[derive(Debug, Serialize)] -struct AmplitudeRequest<'a> { - api_key: &'a str, - events: Vec>, -} - -pub async fn analytics_sender(data: MeiliSearch, opt: Opt) { - let username = whoami::username(); - let hostname = whoami::hostname(); - let platform = whoami::platform(); - - let uid = username + &hostname + &platform.to_string(); - - let mut hasher = SipHasher::new(); - uid.hash(&mut hasher); - let hash = hasher.finish(); - - let uid = format!("{:X}", hash); - let platform = platform.to_string(); - let first_start = Instant::now(); - - loop { - let n = SystemTime::now().duration_since(UNIX_EPOCH).unwrap(); - let user_id = &uid; - let device_id = &platform; - let time = n.as_secs(); - let event_type = "runtime_tick"; - let elapsed_since_start = first_start.elapsed().as_secs() / 86_400; // One day - let event_properties = EventProperties::from(data.clone()).await.ok(); - let app_version = env!("CARGO_PKG_VERSION").to_string(); - let app_version = app_version.as_str(); - let user_email = std::env::var("MEILI_USER_EMAIL").ok(); - let server_provider = std::env::var("MEILI_SERVER_PROVIDER").ok(); - let user_properties = UserProperties { - env: &opt.env, - start_since_days: elapsed_since_start, - user_email, - server_provider, - }; - - let event = Event { - user_id, - event_type, - device_id, - time, - app_version, - user_properties, - event_properties, - }; - - let request = AmplitudeRequest { - api_key: AMPLITUDE_API_KEY, - events: vec![event], - }; - - let response = reqwest::Client::new() - .post("https://api2.amplitude.com/2/httpapi") - .timeout(Duration::from_secs(60)) // 1 minute max - .json(&request) - .send() - .await; - if let Err(e) = response { - debug!("Unsuccessful call to Amplitude: {}", e); - } - - tokio::time::sleep(Duration::from_secs(3600)).await; +impl Display for Analytics { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{:?}", self.user) } } diff --git a/meilisearch-http/src/main.rs b/meilisearch-http/src/main.rs index 864015dd1..73105927e 100644 --- a/meilisearch-http/src/main.rs +++ b/meilisearch-http/src/main.rs @@ -48,9 +48,8 @@ async fn main() -> anyhow::Result<()> { #[cfg(all(not(debug_assertions), feature = "analytics"))] if !opt.no_analytics { - let analytics_data = meilisearch.clone(); - let analytics_opt = opt.clone(); - tokio::task::spawn(analytics::analytics_sender(analytics_data, analytics_opt)); + let analytics = analytics::Analytics::new(&opt, &meilisearch).await; + println!("go my analytics back"); } print_launch_resume(&opt); From 664d09e86a3ce1ae285e5a582d3a54c638c22bfd Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 12 Oct 2021 13:31:56 +0200 Subject: [PATCH 02/68] makes the analytics works with the option and the feature --- Cargo.lock | 2 +- meilisearch-http/src/analytics.rs | 318 +++++++++++++++++------------- meilisearch-http/src/lib.rs | 1 - meilisearch-http/src/main.rs | 21 +- 4 files changed, 195 insertions(+), 147 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 36849a58f..93a276a60 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2543,7 +2543,7 @@ dependencies = [ [[package]] name = "segment" version = "0.1.1" -source = "git+https://github.com/meilisearch/segment#656b91e1f7a2c6443e2a8ed59f8942400e9a811e" +source = "git+https://github.com/meilisearch/segment#042a8631361f02ba84e8bb06f9120e93bf1922f2" dependencies = [ "async-trait", "chrono", diff --git a/meilisearch-http/src/analytics.rs b/meilisearch-http/src/analytics.rs index 41f487bb4..6bb52faad 100644 --- a/meilisearch-http/src/analytics.rs +++ b/meilisearch-http/src/analytics.rs @@ -1,164 +1,210 @@ -use meilisearch_lib::index_controller::Stats; -use meilisearch_lib::MeiliSearch; -use once_cell::sync::Lazy; -use segment::message::{Identify, Track, User}; -use segment::{AutoBatcher, Batcher, HttpClient}; -use serde_json::{json, Value}; +use serde_json::Value; use std::fmt::Display; -use std::time::{Duration, Instant}; -use sysinfo::DiskExt; -use sysinfo::ProcessorExt; -use sysinfo::System; -use sysinfo::SystemExt; -use tokio::sync::Mutex; -use uuid::Uuid; use crate::Opt; -const SEGMENT_API_KEY: &str = "vHi89WrNDckHSQssyUJqLvIyp2QFITSC"; +// if we are in release mode and the feature analytics was enabled +#[cfg(all(not(debug_assertions), feature = "analytics"))] +mod segment { + use crate::analytics::Analytics; + use meilisearch_lib::index_controller::Stats; + use meilisearch_lib::MeiliSearch; + use once_cell::sync::Lazy; + use segment::message::{Identify, Track, User}; + use segment::{AutoBatcher, Batcher, HttpClient}; + use serde_json::{json, Value}; + use std::fmt::Display; + use std::time::{Duration, Instant}; + use sysinfo::DiskExt; + use sysinfo::ProcessorExt; + use sysinfo::System; + use sysinfo::SystemExt; + use tokio::sync::Mutex; + use uuid::Uuid; -pub struct Analytics { - user: User, - opt: Opt, - batcher: Mutex, -} + use crate::Opt; -impl Analytics { - pub fn publish(&'static self, event_name: String, send: Value) { - tokio::spawn(async move { - let _ = self + const SEGMENT_API_KEY: &str = "vHi89WrNDckHSQssyUJqLvIyp2QFITSC"; + + pub struct SegmentAnalytics { + user: User, + opt: Opt, + batcher: Mutex, + } + + impl SegmentAnalytics { + fn compute_traits(opt: &Opt, stats: Stats) -> Value { + static FIRST_START_TIMESTAMP: Lazy = Lazy::new(Instant::now); + static SYSTEM: Lazy = Lazy::new(|| { + let mut sys = System::new_all(); + sys.refresh_all(); + json!({ + "distribution": sys.name().zip(sys.kernel_version()).map(|(name, version)| format!("{}: {}", name, version)), + "core_number": sys.processors().len(), + "ram_size": sys.total_memory(), + "frequency": sys.processors().iter().map(|cpu| cpu.frequency()).sum::() / sys.processors().len() as u64, + "disk_size": sys.disks().iter().map(|disk| disk.available_space()).max(), + "server_provider": std::env::var("MEILI_SERVER_PROVIDER").ok(), + }) + }); + let number_of_documents = stats + .indexes + .values() + .map(|index| index.number_of_documents) + .collect::>(); + + json!({ + "system": *SYSTEM, + "stats": { + "database_size": stats.database_size, + "indexes_number": stats.indexes.len(), + "documents_number": number_of_documents, + }, + "infos": { + "version": env!("CARGO_PKG_VERSION").to_string(), + "env": opt.env.clone(), + "snapshot": opt.schedule_snapshot, + "start_since_days": FIRST_START_TIMESTAMP.elapsed().as_secs() / 60 * 60 * 24, // one day + }, + }) + } + + pub async fn new(opt: &Opt, meilisearch: &MeiliSearch) -> &'static Self { + // see if there is already a user-id + let user_id = std::fs::read_to_string(opt.db_path.join("user-id")); + let first_time_run = user_id.is_err(); + // if not, generate a new user-id and save it to the fs + let user_id = user_id.unwrap_or_else(|_| Uuid::new_v4().to_string()); + let _ = std::fs::write(opt.db_path.join("user-id"), user_id.as_bytes()); + + let client = HttpClient::default(); + let user = User::UserId { + user_id: user_id.clone(), + }; + let batcher = Mutex::new(AutoBatcher::new( + client, + Batcher::new(None), + SEGMENT_API_KEY.to_string(), + )); + let segment = Box::new(Self { + user, + opt: opt.clone(), + batcher, + }); + let segment = Box::leak(segment); + + // send an identify event + let _ = segment .batcher .lock() .await - .push(Track { - user: self.user.clone(), - event: event_name.clone(), - properties: send, + .push(Identify { + user: segment.user.clone(), + // TODO: TAMO: what should we do when meilisearch is broken at start + traits: Self::compute_traits( + &segment.opt, + meilisearch.get_all_stats().await.unwrap(), + ), ..Default::default() }) .await; - println!("ANALYTICS: {} added to batch", event_name) - }); - } - pub fn tick(&'static self, meilisearch: MeiliSearch) { - tokio::spawn(async move { - loop { - tokio::time::sleep(Duration::from_secs(60)).await; // 1 minutes - println!("ANALYTICS: should do things"); - - if let Ok(stats) = meilisearch.get_all_stats().await { - let traits = Self::compute_traits(&self.opt, stats); - let user = self.user.clone(); - println!("ANALYTICS: Pushing our identify tick"); - let _ = self - .batcher - .lock() - .await - .push(Identify { - user, - traits, - ..Default::default() - }) - .await; - } - println!("ANALYTICS: Pushing our batch"); - let _ = self.batcher.lock().await.flush().await; + // send the associated track event + if first_time_run { + segment.publish("Launched for the first time".to_string(), json!({})); } - }); - } -} -impl Analytics { - pub async fn new(opt: &Opt, meilisearch: &MeiliSearch) -> &'static Self { - let user_id = std::fs::read_to_string(opt.db_path.join("user-id")); - let first_time_run = user_id.is_err(); - let user_id = user_id.unwrap_or(Uuid::new_v4().to_string()); - let _ = std::fs::write(opt.db_path.join("user-id"), user_id.as_bytes()); - let client = HttpClient::default(); - let user = User::UserId { - user_id: user_id.clone(), - }; - let batcher = Batcher::new(None); - let batcher = Mutex::new(AutoBatcher::new( - client, - batcher, - SEGMENT_API_KEY.to_string(), - )); - let segment = Box::new(Self { - user, - opt: opt.clone(), - batcher, - }); - let segment = Box::leak(segment); + // start the runtime tick + segment.tick(meilisearch.clone()); - // send an identify event - let _ = segment - .batcher - .lock() - .await - .push(Identify { - user: segment.user.clone(), - // TODO: TAMO: what should we do when meilisearch is broken at start - traits: Self::compute_traits( - &segment.opt, - meilisearch.get_all_stats().await.unwrap(), - ), - ..Default::default() - }) - .await; - println!("ANALYTICS: pushed the identify event"); - - // send the associated track event - if first_time_run { - segment.publish("Launched for the first time".to_string(), json!({})); + segment } - // start the runtime tick - segment.tick(meilisearch.clone()); + fn tick(&'static self, meilisearch: MeiliSearch) { + tokio::spawn(async move { + loop { + tokio::time::sleep(Duration::from_secs(60)).await; // 1 minutes + println!("ANALYTICS: should do things"); - segment + if let Ok(stats) = meilisearch.get_all_stats().await { + let traits = Self::compute_traits(&self.opt, stats); + let user = self.user.clone(); + println!("ANALYTICS: Pushing our identify tick"); + let _ = self + .batcher + .lock() + .await + .push(Identify { + user, + traits, + ..Default::default() + }) + .await; + } + let _ = self.batcher.lock().await.flush().await; + } + }); + } } - fn compute_traits(opt: &Opt, stats: Stats) -> Value { - static FIRST_START_TIMESTAMP: Lazy = Lazy::new(|| Instant::now()); - static SYSTEM: Lazy = Lazy::new(|| { - let mut sys = System::new_all(); - sys.refresh_all(); - json!({ - "distribution": sys.name().zip(sys.kernel_version()).map(|(name, version)| format!("{}: {}", name, version)), - "core_number": sys.processors().len(), - "ram_size": sys.total_memory(), - "frequency": sys.processors().iter().map(|cpu| cpu.frequency()).sum::() / sys.processors().len() as u64, - "disk_size": sys.disks().iter().map(|disk| disk.available_space()).max(), - "server_provider": std::env::var("MEILI_SERVER_PROVIDER").ok(), - }) - }); - let number_of_documents = stats - .indexes - .values() - .map(|index| index.number_of_documents) - .collect::>(); + #[async_trait::async_trait] + impl super::Analytics for SegmentAnalytics { + fn publish(&'static self, event_name: String, send: Value) { + tokio::spawn(async move { + let _ = self + .batcher + .lock() + .await + .push(Track { + user: self.user.clone(), + event: event_name.clone(), + properties: send, + ..Default::default() + }) + .await; + }); + } + } - json!({ - "system": *SYSTEM, - "stats": { - "database_size": stats.database_size, - "indexes_number": stats.indexes.len(), - "documents_number": number_of_documents, - }, - "infos": { - "version": env!("CARGO_PKG_VERSION").to_string(), - "env": opt.env.clone(), - "snapshot": opt.schedule_snapshot, - "start_since_days": FIRST_START_TIMESTAMP.elapsed().as_secs() / 60 * 60 * 24, // one day - }, - }) + impl Display for SegmentAnalytics { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.user) + } } } -impl Display for Analytics { +// if we are in debug mode OR the analytics feature is disabled +#[cfg(any(debug_assertions, not(feature = "analytics")))] +pub type SegmentAnalytics = MockAnalytics; +#[cfg(all(not(debug_assertions), feature = "analytics"))] +pub type SegmentAnalytics = segment::SegmentAnalytics; + +pub struct MockAnalytics { + user: String, +} + +impl MockAnalytics { + pub fn new(opt: &Opt) -> &'static Self { + let user = std::fs::read_to_string(opt.db_path.join("user-id")) + .unwrap_or_else(|_| "No user-id".to_string()); + let analytics = Box::new(Self { user }); + Box::leak(analytics) + } +} + +#[async_trait::async_trait] +impl Analytics for MockAnalytics { + /// This is a noop and should be optimized out + fn publish(&'static self, _event_name: String, _send: Value) {} +} + +impl Display for MockAnalytics { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{:?}", self.user) + write!(f, "{}", self.user) } } + +#[async_trait::async_trait] +pub trait Analytics: Display { + fn publish(&'static self, event_name: String, send: Value); +} diff --git a/meilisearch-http/src/lib.rs b/meilisearch-http/src/lib.rs index 7ca967ce2..7376dedac 100644 --- a/meilisearch-http/src/lib.rs +++ b/meilisearch-http/src/lib.rs @@ -3,7 +3,6 @@ pub mod error; #[macro_use] pub mod extractors; -#[cfg(all(not(debug_assertions), feature = "analytics"))] pub mod analytics; pub mod helpers; pub mod option; diff --git a/meilisearch-http/src/main.rs b/meilisearch-http/src/main.rs index 73105927e..13833318d 100644 --- a/meilisearch-http/src/main.rs +++ b/meilisearch-http/src/main.rs @@ -1,13 +1,12 @@ use std::env; use actix_web::HttpServer; +use meilisearch_http::analytics; +use meilisearch_http::analytics::Analytics; use meilisearch_http::{create_app, setup_meilisearch, Opt}; use meilisearch_lib::MeiliSearch; use structopt::StructOpt; -#[cfg(all(not(debug_assertions), feature = "analytics"))] -use meilisearch_http::analytics; - #[cfg(target_os = "linux")] #[global_allocator] static ALLOC: tikv_jemallocator::Jemalloc = tikv_jemallocator::Jemalloc; @@ -47,12 +46,15 @@ async fn main() -> anyhow::Result<()> { let meilisearch = setup_meilisearch(&opt)?; #[cfg(all(not(debug_assertions), feature = "analytics"))] - if !opt.no_analytics { - let analytics = analytics::Analytics::new(&opt, &meilisearch).await; - println!("go my analytics back"); - } + let analytics = if !opt.no_analytics { + analytics::SegmentAnalytics::new(&opt, &meilisearch).await as &'static dyn Analytics + } else { + analytics::MockAnalytics::new(&opt) as &'static dyn Analytics + }; + #[cfg(any(debug_assertions, not(feature = "analytics")))] + let analytics = analytics::MockAnalytics::new(&opt); - print_launch_resume(&opt); + print_launch_resume(&opt, analytics); run_http(meilisearch, opt).await?; @@ -77,7 +79,7 @@ async fn run_http(data: MeiliSearch, opt: Opt) -> anyhow::Result<()> { Ok(()) } -pub fn print_launch_resume(opt: &Opt) { +pub fn print_launch_resume(opt: &Opt, analytics: &'static dyn Analytics) { let commit_sha = option_env!("VERGEN_GIT_SHA").unwrap_or("unknown"); let commit_date = option_env!("VERGEN_GIT_COMMIT_TIMESTAMP").unwrap_or("unknown"); @@ -119,6 +121,7 @@ Anonymous telemetry: \"Enabled\"" ); } } + eprintln!("Unique User ID:\t\"{}\"", analytics); eprintln!(); From d72c887422796cbec31a5eb4528faa615bc36cf1 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 12 Oct 2021 14:32:44 +0200 Subject: [PATCH 03/68] makes the analytics available for all the routes --- meilisearch-http/src/analytics.rs | 4 +- meilisearch-http/src/lib.rs | 13 ++++-- meilisearch-http/src/main.rs | 15 ++++--- meilisearch-http/tests/common/service.rs | 42 ++++++++++++++++--- meilisearch-http/tests/content_type.rs | 5 ++- .../tests/documents/add_documents.rs | 11 +++-- 6 files changed, 69 insertions(+), 21 deletions(-) diff --git a/meilisearch-http/src/analytics.rs b/meilisearch-http/src/analytics.rs index 6bb52faad..f04170fbb 100644 --- a/meilisearch-http/src/analytics.rs +++ b/meilisearch-http/src/analytics.rs @@ -151,6 +151,7 @@ mod segment { impl super::Analytics for SegmentAnalytics { fn publish(&'static self, event_name: String, send: Value) { tokio::spawn(async move { + println!("ANALYTICS pushing {} in the batcher", event_name); let _ = self .batcher .lock() @@ -162,6 +163,7 @@ mod segment { ..Default::default() }) .await; + println!("ANALYTICS {} pushed", event_name); }); } } @@ -205,6 +207,6 @@ impl Display for MockAnalytics { } #[async_trait::async_trait] -pub trait Analytics: Display { +pub trait Analytics: Display + Sync + Send { fn publish(&'static self, event_name: String, send: Value); } diff --git a/meilisearch-http/src/lib.rs b/meilisearch-http/src/lib.rs index 7376dedac..ead678693 100644 --- a/meilisearch-http/src/lib.rs +++ b/meilisearch-http/src/lib.rs @@ -12,6 +12,7 @@ use std::time::Duration; use crate::error::MeilisearchHttpError; use crate::extractors::authentication::AuthConfig; use actix_web::error::JsonPayloadError; +use analytics::Analytics; use error::PayloadError; use http::header::CONTENT_TYPE; pub use option::Opt; @@ -73,10 +74,16 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result { meilisearch.build(opt.db_path.clone(), opt.indexer_options.clone()) } -pub fn configure_data(config: &mut web::ServiceConfig, data: MeiliSearch, opt: &Opt) { +pub fn configure_data( + config: &mut web::ServiceConfig, + data: MeiliSearch, + opt: &Opt, + analytics: &'static dyn Analytics, +) { let http_payload_size_limit = opt.http_payload_size_limit.get_bytes() as usize; config .app_data(data) + .app_data(web::Data::new(analytics)) .app_data( web::JsonConfig::default() .content_type(|mime| mime == mime::APPLICATION_JSON) @@ -167,7 +174,7 @@ pub fn dashboard(config: &mut web::ServiceConfig, _enable_frontend: bool) { #[macro_export] macro_rules! create_app { - ($data:expr, $enable_frontend:expr, $opt:expr) => {{ + ($data:expr, $enable_frontend:expr, $opt:expr, $analytics:expr) => {{ use actix_cors::Cors; use actix_web::middleware::TrailingSlash; use actix_web::App; @@ -177,7 +184,7 @@ macro_rules! create_app { use meilisearch_http::{configure_auth, configure_data, dashboard}; App::new() - .configure(|s| configure_data(s, $data.clone(), &$opt)) + .configure(|s| configure_data(s, $data.clone(), &$opt, $analytics)) .configure(|s| configure_auth(s, &$opt)) .configure(routes::configure) .configure(|s| dashboard(s, $enable_frontend)) diff --git a/meilisearch-http/src/main.rs b/meilisearch-http/src/main.rs index 13833318d..9a258b287 100644 --- a/meilisearch-http/src/main.rs +++ b/meilisearch-http/src/main.rs @@ -56,17 +56,22 @@ async fn main() -> anyhow::Result<()> { print_launch_resume(&opt, analytics); - run_http(meilisearch, opt).await?; + run_http(meilisearch, opt, analytics).await?; Ok(()) } -async fn run_http(data: MeiliSearch, opt: Opt) -> anyhow::Result<()> { +async fn run_http( + data: MeiliSearch, + opt: Opt, + analytics: &'static dyn Analytics, +) -> anyhow::Result<()> { let _enable_dashboard = &opt.env == "development"; let opt_clone = opt.clone(); - let http_server = HttpServer::new(move || create_app!(data, _enable_dashboard, opt_clone)) - // Disable signals allows the server to terminate immediately when a user enter CTRL-C - .disable_signals(); + let http_server = + HttpServer::new(move || create_app!(data, _enable_dashboard, opt_clone, analytics)) + // Disable signals allows the server to terminate immediately when a user enter CTRL-C + .disable_signals(); if let Some(config) = opt.get_ssl_config()? { http_server diff --git a/meilisearch-http/tests/common/service.rs b/meilisearch-http/tests/common/service.rs index 8a3b07c1d..299769107 100644 --- a/meilisearch-http/tests/common/service.rs +++ b/meilisearch-http/tests/common/service.rs @@ -2,7 +2,7 @@ use actix_web::{http::StatusCode, test}; use meilisearch_lib::MeiliSearch; use serde_json::Value; -use meilisearch_http::{create_app, Opt}; +use meilisearch_http::{analytics, create_app, Opt}; pub struct Service { pub meilisearch: MeiliSearch, @@ -11,7 +11,13 @@ pub struct Service { impl Service { pub async fn post(&self, url: impl AsRef, body: Value) -> (Value, StatusCode) { - let app = test::init_service(create_app!(&self.meilisearch, true, &self.options)).await; + let app = test::init_service(create_app!( + &self.meilisearch, + true, + &self.options, + analytics::MockAnalytics::new(&self.options) + )) + .await; let req = test::TestRequest::post() .uri(url.as_ref()) @@ -31,7 +37,13 @@ impl Service { url: impl AsRef, body: impl AsRef, ) -> (Value, StatusCode) { - let app = test::init_service(create_app!(&self.meilisearch, true, &self.options)).await; + let app = test::init_service(create_app!( + &self.meilisearch, + true, + &self.options, + analytics::MockAnalytics::new(&self.options) + )) + .await; let req = test::TestRequest::post() .uri(url.as_ref()) @@ -47,7 +59,13 @@ impl Service { } pub async fn get(&self, url: impl AsRef) -> (Value, StatusCode) { - let app = test::init_service(create_app!(&self.meilisearch, true, &self.options)).await; + let app = test::init_service(create_app!( + &self.meilisearch, + true, + &self.options, + analytics::MockAnalytics::new(&self.options) + )) + .await; let req = test::TestRequest::get().uri(url.as_ref()).to_request(); let res = test::call_service(&app, req).await; @@ -59,7 +77,13 @@ impl Service { } pub async fn put(&self, url: impl AsRef, body: Value) -> (Value, StatusCode) { - let app = test::init_service(create_app!(&self.meilisearch, true, &self.options)).await; + let app = test::init_service(create_app!( + &self.meilisearch, + true, + &self.options, + analytics::MockAnalytics::new(&self.options) + )) + .await; let req = test::TestRequest::put() .uri(url.as_ref()) @@ -74,7 +98,13 @@ impl Service { } pub async fn delete(&self, url: impl AsRef) -> (Value, StatusCode) { - let app = test::init_service(create_app!(&self.meilisearch, true, &self.options)).await; + let app = test::init_service(create_app!( + &self.meilisearch, + true, + &self.options, + analytics::MockAnalytics::new(&self.options) + )) + .await; let req = test::TestRequest::delete().uri(url.as_ref()).to_request(); let res = test::call_service(&app, req).await; diff --git a/meilisearch-http/tests/content_type.rs b/meilisearch-http/tests/content_type.rs index 45b3b784b..79c497bb7 100644 --- a/meilisearch-http/tests/content_type.rs +++ b/meilisearch-http/tests/content_type.rs @@ -4,7 +4,7 @@ mod common; use crate::common::Server; use actix_web::test; -use meilisearch_http::create_app; +use meilisearch_http::{analytics, create_app}; use serde_json::{json, Value}; #[actix_rt::test] @@ -40,7 +40,8 @@ async fn error_json_bad_content_type() { let app = test::init_service(create_app!( &server.service.meilisearch, true, - &server.service.options + &server.service.options, + analytics::MockAnalytics::new(&server.service.options) )) .await; for route in routes { diff --git a/meilisearch-http/tests/documents/add_documents.rs b/meilisearch-http/tests/documents/add_documents.rs index 4363bd29d..9e55ef2bd 100644 --- a/meilisearch-http/tests/documents/add_documents.rs +++ b/meilisearch-http/tests/documents/add_documents.rs @@ -1,7 +1,7 @@ use crate::common::{GetAllDocumentsOptions, Server}; use actix_web::test; use chrono::DateTime; -use meilisearch_http::create_app; +use meilisearch_http::{analytics, create_app}; use serde_json::{json, Value}; /// This is the basic usage of our API and every other tests uses the content-type application/json @@ -19,7 +19,8 @@ async fn add_documents_test_json_content_types() { let app = test::init_service(create_app!( &server.service.meilisearch, true, - &server.service.options + &server.service.options, + analytics::MockAnalytics::new(&server.service.options) )) .await; // post @@ -63,7 +64,8 @@ async fn error_add_documents_test_bad_content_types() { let app = test::init_service(create_app!( &server.service.meilisearch, true, - &server.service.options + &server.service.options, + analytics::MockAnalytics::new(&server.service.options) )) .await; // post @@ -129,7 +131,8 @@ async fn error_add_documents_test_no_content_type() { let app = test::init_service(create_app!( &server.service.meilisearch, true, - &server.service.options + &server.service.options, + analytics::MockAnalytics::new(&server.service.options) )) .await; // post From 6b8e5a4c924a8a652c1fbe90283b13fb7780d7f0 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 12 Oct 2021 14:46:35 +0200 Subject: [PATCH 04/68] log the index created route --- meilisearch-http/src/routes/indexes/mod.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/meilisearch-http/src/routes/indexes/mod.rs b/meilisearch-http/src/routes/indexes/mod.rs index 0d0132d05..ee1c47ac9 100644 --- a/meilisearch-http/src/routes/indexes/mod.rs +++ b/meilisearch-http/src/routes/indexes/mod.rs @@ -4,7 +4,9 @@ use log::debug; use meilisearch_lib::index_controller::IndexSettings; use meilisearch_lib::MeiliSearch; use serde::{Deserialize, Serialize}; +use serde_json::json; +use crate::analytics::Analytics; use crate::error::ResponseError; use crate::extractors::authentication::{policies::*, GuardedData}; use crate::routes::IndexParam; @@ -54,8 +56,14 @@ pub struct IndexCreateRequest { pub async fn create_index( meilisearch: GuardedData, body: web::Json, + analytics: web::Data<&'static dyn Analytics>, ) -> Result { let body = body.into_inner(); + + analytics.publish( + "Index Created".to_string(), + json!({ "with_primary_key": body.primary_key}), + ); let meta = meilisearch.create_index(body.uid, body.primary_key).await?; Ok(HttpResponse::Created().json(meta)) } From 0616f68eb01892a1eb94d8b928d1f844d73d1109 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 12 Oct 2021 14:54:09 +0200 Subject: [PATCH 05/68] implements part of the search --- meilisearch-http/src/routes/indexes/search.rs | 44 +++++++++++++++++-- 1 file changed, 40 insertions(+), 4 deletions(-) diff --git a/meilisearch-http/src/routes/indexes/search.rs b/meilisearch-http/src/routes/indexes/search.rs index 9cbdcf2c0..beefd06ed 100644 --- a/meilisearch-http/src/routes/indexes/search.rs +++ b/meilisearch-http/src/routes/indexes/search.rs @@ -3,8 +3,9 @@ use log::debug; use meilisearch_lib::index::{default_crop_length, SearchQuery, DEFAULT_SEARCH_LIMIT}; use meilisearch_lib::MeiliSearch; use serde::Deserialize; -use serde_json::Value; +use serde_json::{json, Value}; +use crate::analytics::Analytics; use crate::error::ResponseError; use crate::extractors::authentication::{policies::*, GuardedData}; use crate::routes::IndexParam; @@ -109,9 +110,14 @@ pub async fn search_with_url_query( meilisearch: GuardedData, path: web::Path, params: web::Query, + analytics: web::Data<&'static dyn Analytics>, ) -> Result { debug!("called with params: {:?}", params); - let query = params.into_inner().into(); + let query: SearchQuery = params.into_inner().into(); + + let mut analytics_value = extract_analytics_from_query(&query); + analytics_value["http_method"] = json!("get"); + let search_result = meilisearch .search(path.into_inner().index_uid, query) .await?; @@ -120,6 +126,9 @@ pub async fn search_with_url_query( #[cfg(test)] assert!(!search_result.exhaustive_nb_hits); + analytics_value["response_time"] = json!(search_result.processing_time_ms as u64); + analytics.publish("Documents Searched".to_string(), analytics_value); + debug!("returns: {:?}", search_result); Ok(HttpResponse::Ok().json(search_result)) } @@ -128,20 +137,47 @@ pub async fn search_with_post( meilisearch: GuardedData, path: web::Path, params: web::Json, + analytics: web::Data<&'static dyn Analytics>, ) -> Result { - debug!("search called with params: {:?}", params); + let query = params.into_inner(); + debug!("search called with params: {:?}", query); + + let mut analytics_value = extract_analytics_from_query(&query); + analytics_value["http_method"] = json!("post"); + let search_result = meilisearch - .search(path.into_inner().index_uid, params.into_inner()) + .search(path.into_inner().index_uid, query) .await?; // Tests that the nb_hits is always set to false #[cfg(test)] assert!(!search_result.exhaustive_nb_hits); + analytics_value["response_time"] = json!(search_result.processing_time_ms as u64); + analytics.publish("Documents Searched".to_string(), analytics_value); + debug!("returns: {:?}", search_result); Ok(HttpResponse::Ok().json(search_result)) } +fn extract_analytics_from_query(query: &SearchQuery) -> Value { + json!({ + "sort": { + "total": query.sort.as_ref().map(|sort| sort.len()), + "has_geoPoint": query.sort.as_ref().map(|sort| sort.iter().any(|sort| sort.starts_with("_geoPoint"))), + }, + "filter": { + "has_geoRadius": query.filter.as_ref().map(|filter| filter.to_string().contains("_geoRadius")), + // "syntax": 42, + }, + "pagination": { + "offset": query.offset, + "limit": query.limit, + }, + "terms_number": query.q.as_ref().map(|q| q.split_whitespace().count()).unwrap_or_default(), + }) +} + #[cfg(test)] mod test { use super::*; From 0b3e0a59cb6a1f1e596728aa90a39228c6867cc1 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 12 Oct 2021 15:00:04 +0200 Subject: [PATCH 06/68] log index updated --- meilisearch-http/src/routes/indexes/mod.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/meilisearch-http/src/routes/indexes/mod.rs b/meilisearch-http/src/routes/indexes/mod.rs index ee1c47ac9..18a2e42e8 100644 --- a/meilisearch-http/src/routes/indexes/mod.rs +++ b/meilisearch-http/src/routes/indexes/mod.rs @@ -98,9 +98,14 @@ pub async fn update_index( meilisearch: GuardedData, path: web::Path, body: web::Json, + analytics: web::Data<&'static dyn Analytics>, ) -> Result { debug!("called with params: {:?}", body); let body = body.into_inner(); + analytics.publish( + "Index Updated".to_string(), + json!({ "with_primary_key": body.primary_key}), + ); let settings = IndexSettings { uid: body.uid, primary_key: body.primary_key, From 1ed05c6c07628488c8f9bcace65706b0ea67d731 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 12 Oct 2021 15:23:31 +0200 Subject: [PATCH 07/68] log documents added --- .../src/routes/indexes/documents.rs | 27 ++++++++++++++----- meilisearch-http/src/routes/mod.rs | 1 + 2 files changed, 22 insertions(+), 6 deletions(-) diff --git a/meilisearch-http/src/routes/indexes/documents.rs b/meilisearch-http/src/routes/indexes/documents.rs index 2f6746c90..e5eb15fbf 100644 --- a/meilisearch-http/src/routes/indexes/documents.rs +++ b/meilisearch-http/src/routes/indexes/documents.rs @@ -8,9 +8,10 @@ use meilisearch_lib::milli::update::IndexDocumentsMethod; use meilisearch_lib::MeiliSearch; use once_cell::sync::Lazy; use serde::Deserialize; -use serde_json::Value; +use serde_json::{json, Value}; use tokio::sync::mpsc; +use crate::analytics::Analytics; use crate::error::{MeilisearchHttpError, ResponseError}; use crate::extractors::authentication::{policies::*, GuardedData}; use crate::extractors::payload::Payload; @@ -131,15 +132,29 @@ pub async fn add_documents( params: web::Query, body: Payload, req: HttpRequest, + analytics: web::Data<&'static dyn Analytics>, ) -> Result { debug!("called with params: {:?}", params); + let content_type = req + .headers() + .get("Content-type") + .map(|s| s.to_str().unwrap_or("unkown")); + let params = params.into_inner(); + + analytics.publish( + "Documents Added".to_string(), + json!({ + "payload_type": content_type, + "with_primary_key": params.primary_key, + "index_creation": meilisearch.get_index(path.index_uid.clone()).await.is_ok(), + }), + ); + document_addition( - req.headers() - .get("Content-type") - .map(|s| s.to_str().unwrap_or("unkown")), + content_type, meilisearch, - path.into_inner().index_uid, - params.into_inner().primary_key, + path.index_uid.clone(), + params.primary_key, body, IndexDocumentsMethod::ReplaceDocuments, ) diff --git a/meilisearch-http/src/routes/mod.rs b/meilisearch-http/src/routes/mod.rs index 382147f31..092bd1186 100644 --- a/meilisearch-http/src/routes/mod.rs +++ b/meilisearch-http/src/routes/mod.rs @@ -302,6 +302,7 @@ mod test { impl_is_policy! {A B} impl_is_policy! {A B C} impl_is_policy! {A B C D} + impl_is_policy! {A B C D E} /// Emits a compile error if a route doesn't have the correct authentication policy. /// From bda74728807d8d8e6e3d75b1b201e6e0e9deb8ec Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 12 Oct 2021 15:31:59 +0200 Subject: [PATCH 08/68] log the documetns updated route --- .../src/routes/indexes/documents.rs | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/meilisearch-http/src/routes/indexes/documents.rs b/meilisearch-http/src/routes/indexes/documents.rs index e5eb15fbf..1e8a803dc 100644 --- a/meilisearch-http/src/routes/indexes/documents.rs +++ b/meilisearch-http/src/routes/indexes/documents.rs @@ -167,12 +167,25 @@ pub async fn update_documents( params: web::Query, body: Payload, req: HttpRequest, + analytics: web::Data<&'static dyn Analytics>, ) -> Result { debug!("called with params: {:?}", params); + let content_type = req + .headers() + .get("Content-type") + .map(|s| s.to_str().unwrap_or("unkown")); + + analytics.publish( + "Documents Updated".to_string(), + json!({ + "payload_type": content_type, + "with_primary_key": params.primary_key, + "index_creation": meilisearch.get_index(path.index_uid.clone()).await.is_ok(), + }), + ); + document_addition( - req.headers() - .get("Content-type") - .map(|s| s.to_str().unwrap_or("unkown")), + content_type, meilisearch, path.into_inner().index_uid, params.into_inner().primary_key, From 7524bfc07fe99304644363ce0979068839346039 Mon Sep 17 00:00:00 2001 From: Irevoire Date: Wed, 13 Oct 2021 14:10:22 +0200 Subject: [PATCH 09/68] log the all settings updated route --- .../src/routes/indexes/settings.rs | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/meilisearch-http/src/routes/indexes/settings.rs b/meilisearch-http/src/routes/indexes/settings.rs index 7e6033180..96b8577ce 100644 --- a/meilisearch-http/src/routes/indexes/settings.rs +++ b/meilisearch-http/src/routes/indexes/settings.rs @@ -4,7 +4,9 @@ use actix_web::{web, HttpResponse}; use meilisearch_lib::index::{Settings, Unchecked}; use meilisearch_lib::index_controller::Update; use meilisearch_lib::MeiliSearch; +use serde_json::json; +use crate::analytics::Analytics; use crate::error::ResponseError; use crate::extractors::authentication::{policies::*, GuardedData}; @@ -154,9 +156,27 @@ pub async fn update_all( meilisearch: GuardedData, index_uid: web::Path, body: web::Json>, + analytics: web::Data<&'static dyn Analytics>, ) -> Result { let settings = body.into_inner(); + analytics.publish( + "Settings Updated".to_string(), + json!({ + "ranking_rules": { + "sort_position": settings.ranking_rules.as_ref().set().map(|sort| sort.iter().filter(|s| s.contains(":")).count()), + }, + "sortable_attributes": { + "total": settings.sortable_attributes.as_ref().set().map(|sort| sort.len()), + "has_geo": settings.sortable_attributes.as_ref().set().map(|sort| sort.iter().any(|s| s == "_geo")).unwrap_or(false), + }, + "filterable_attributes": { + "total": settings.filterable_attributes.as_ref().set().map(|filter| filter.len()), + "has_geo": settings.filterable_attributes.as_ref().set().map(|filter| filter.iter().any(|s| s == "_geo")).unwrap_or(false), + }, + }), + ); + let update = Update::Settings(settings); let update_result = meilisearch .register_update(index_uid.into_inner(), update, true) From 22d9d660cc709534d2d797fd905d1bec43104b63 Mon Sep 17 00:00:00 2001 From: Irevoire Date: Wed, 13 Oct 2021 14:56:54 +0200 Subject: [PATCH 10/68] log all the required settings route --- .../src/routes/indexes/settings.rs | 59 +++++++++++++++++-- 1 file changed, 54 insertions(+), 5 deletions(-) diff --git a/meilisearch-http/src/routes/indexes/settings.rs b/meilisearch-http/src/routes/indexes/settings.rs index 96b8577ce..8eed67d7d 100644 --- a/meilisearch-http/src/routes/indexes/settings.rs +++ b/meilisearch-http/src/routes/indexes/settings.rs @@ -12,7 +12,7 @@ use crate::extractors::authentication::{policies::*, GuardedData}; #[macro_export] macro_rules! make_setting_route { - ($route:literal, $type:ty, $attr:ident, $camelcase_attr:literal) => { + ($route:literal, $type:ty, $attr:ident, $camelcase_attr:literal, $analytics_var:ident, $analytics:expr) => { pub mod $attr { use log::debug; use actix_web::{web, HttpResponse, Resource}; @@ -20,6 +20,7 @@ macro_rules! make_setting_route { use meilisearch_lib::milli::update::Setting; use meilisearch_lib::{MeiliSearch, index::Settings, index_controller::Update}; + use crate::analytics::Analytics; use crate::error::ResponseError; use crate::extractors::authentication::{GuardedData, policies::*}; @@ -41,9 +42,14 @@ macro_rules! make_setting_route { meilisearch: GuardedData, index_uid: actix_web::web::Path, body: actix_web::web::Json>, + $analytics_var: web::Data<&'static dyn Analytics>, ) -> std::result::Result { + let body = body.into_inner(); + + $analytics(&body); + let settings = Settings { - $attr: match body.into_inner() { + $attr: match body { Some(inner_body) => Setting::Set(inner_body), None => Setting::Reset }, @@ -75,20 +81,47 @@ macro_rules! make_setting_route { } } }; + ($route:literal, $type:ty, $attr:ident, $camelcase_attr:literal) => { + make_setting_route!($route, $type, $attr, $camelcase_attr, _analytics, |_| {}); + }; } make_setting_route!( "/filterable-attributes", std::collections::BTreeSet, filterable_attributes, - "filterableAttributes" + "filterableAttributes", + analytics, + |setting: &Option>| { + use serde_json::json; + + analytics.publish( + "FilterableAttributes Updated".to_string(), + json!({ + "total": setting.as_ref().map(|filter| filter.len()), + "has_geo": setting.as_ref().map(|filter| filter.contains("_geo")).unwrap_or(false), + }), + ); + } ); make_setting_route!( "/sortable-attributes", std::collections::BTreeSet, sortable_attributes, - "sortableAttributes" + "sortableAttributes", + analytics, + |setting: &Option>| { + use serde_json::json; + + analytics.publish( + "SortableAttributes Updated".to_string(), + json!({ + "total": setting.as_ref().map(|sort| sort.len()), + "has_geo": setting.as_ref().map(|sort| sort.contains("_geo")).unwrap_or(false), + }), + ); + } ); make_setting_route!( @@ -126,7 +159,23 @@ make_setting_route!( "distinctAttribute" ); -make_setting_route!("/ranking-rules", Vec, ranking_rules, "rankingRules"); +make_setting_route!( + "/ranking-rules", + Vec, + ranking_rules, + "rankingRules", + analytics, + |setting: &Option>| { + use serde_json::json; + + analytics.publish( + "RankingRules Updated".to_string(), + json!({ + "sort_position": setting.as_ref().map(|sort| sort.iter().filter(|s| s.contains(":")).count()), + }), + ); + } +); macro_rules! generate_configure { ($($mod:ident),*) => { From 30aeda7a1a0bf9d8fb5824e54fcee0c5e3e8e61b Mon Sep 17 00:00:00 2001 From: Irevoire Date: Wed, 13 Oct 2021 16:08:33 +0200 Subject: [PATCH 11/68] update the identify call to the latest spec version --- meilisearch-http/src/analytics.rs | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/meilisearch-http/src/analytics.rs b/meilisearch-http/src/analytics.rs index f04170fbb..bd985c9ea 100644 --- a/meilisearch-http/src/analytics.rs +++ b/meilisearch-http/src/analytics.rs @@ -15,10 +15,7 @@ mod segment { use serde_json::{json, Value}; use std::fmt::Display; use std::time::{Duration, Instant}; - use sysinfo::DiskExt; - use sysinfo::ProcessorExt; - use sysinfo::System; - use sysinfo::SystemExt; + use sysinfo::{DiskExt, System, SystemExt}; use tokio::sync::Mutex; use uuid::Uuid; @@ -35,18 +32,28 @@ mod segment { impl SegmentAnalytics { fn compute_traits(opt: &Opt, stats: Stats) -> Value { static FIRST_START_TIMESTAMP: Lazy = Lazy::new(Instant::now); - static SYSTEM: Lazy = Lazy::new(|| { + const SYSTEM: Lazy = Lazy::new(|| { let mut sys = System::new_all(); sys.refresh_all(); + let kernel_version = sys + .kernel_version() + .map(|k| k.split_once("-").map(|(k, _)| k.to_string())) + .flatten(); json!({ - "distribution": sys.name().zip(sys.kernel_version()).map(|(name, version)| format!("{}: {}", name, version)), + "distribution": sys.name(), + "kernel_version": kernel_version, "core_number": sys.processors().len(), "ram_size": sys.total_memory(), - "frequency": sys.processors().iter().map(|cpu| cpu.frequency()).sum::() / sys.processors().len() as u64, "disk_size": sys.disks().iter().map(|disk| disk.available_space()).max(), "server_provider": std::env::var("MEILI_SERVER_PROVIDER").ok(), }) }); + let infos = json!({ + "version": env!("CARGO_PKG_VERSION").to_string(), + "env": opt.env.clone(), + "has_snapshot": opt.schedule_snapshot, + }); + let number_of_documents = stats .indexes .values() @@ -59,13 +66,9 @@ mod segment { "database_size": stats.database_size, "indexes_number": stats.indexes.len(), "documents_number": number_of_documents, - }, - "infos": { - "version": env!("CARGO_PKG_VERSION").to_string(), - "env": opt.env.clone(), - "snapshot": opt.schedule_snapshot, "start_since_days": FIRST_START_TIMESTAMP.elapsed().as_secs() / 60 * 60 * 24, // one day }, + "infos": infos, }) } @@ -123,7 +126,7 @@ mod segment { fn tick(&'static self, meilisearch: MeiliSearch) { tokio::spawn(async move { loop { - tokio::time::sleep(Duration::from_secs(60)).await; // 1 minutes + tokio::time::sleep(Duration::from_secs(60 * 5)).await; // 1 minutes println!("ANALYTICS: should do things"); if let Ok(stats) = meilisearch.get_all_stats().await { @@ -142,6 +145,7 @@ mod segment { .await; } let _ = self.batcher.lock().await.flush().await; + println!("sent batch"); } }); } From b33b1ef3ddb71acb41071a4300e0015e40dece7e Mon Sep 17 00:00:00 2001 From: Irevoire Date: Wed, 13 Oct 2021 16:36:15 +0200 Subject: [PATCH 12/68] update the way of getting and saving the user-id to the file system --- meilisearch-http/src/analytics.rs | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/meilisearch-http/src/analytics.rs b/meilisearch-http/src/analytics.rs index bd985c9ea..c0e699344 100644 --- a/meilisearch-http/src/analytics.rs +++ b/meilisearch-http/src/analytics.rs @@ -1,5 +1,6 @@ use serde_json::Value; use std::fmt::Display; +use std::fs::read_to_string; use crate::Opt; @@ -14,6 +15,7 @@ mod segment { use segment::{AutoBatcher, Batcher, HttpClient}; use serde_json::{json, Value}; use std::fmt::Display; + use std::fs; use std::time::{Duration, Instant}; use sysinfo::{DiskExt, System, SystemExt}; use tokio::sync::Mutex; @@ -73,12 +75,17 @@ mod segment { } pub async fn new(opt: &Opt, meilisearch: &MeiliSearch) -> &'static Self { - // see if there is already a user-id - let user_id = std::fs::read_to_string(opt.db_path.join("user-id")); + // see if there is already a user-id in the `data.ms` + let user_id = fs::read_to_string(opt.db_path.join("user-id")) + .or_else(|_| fs::read_to_string("/tmp/meilisearch-user-id")); let first_time_run = user_id.is_err(); // if not, generate a new user-id and save it to the fs let user_id = user_id.unwrap_or_else(|_| Uuid::new_v4().to_string()); - let _ = std::fs::write(opt.db_path.join("user-id"), user_id.as_bytes()); + let _ = fs::write(opt.db_path.join("user-id"), user_id.as_bytes()); + let _ = fs::write( + opt.db_path.join("/tmp/meilisearch-user-id"), + user_id.as_bytes(), + ); let client = HttpClient::default(); let user = User::UserId { @@ -103,7 +110,7 @@ mod segment { .await .push(Identify { user: segment.user.clone(), - // TODO: TAMO: what should we do when meilisearch is broken at start + // If meilisearch is corrupted at the start we can panic traits: Self::compute_traits( &segment.opt, meilisearch.get_all_stats().await.unwrap(), @@ -191,7 +198,8 @@ pub struct MockAnalytics { impl MockAnalytics { pub fn new(opt: &Opt) -> &'static Self { - let user = std::fs::read_to_string(opt.db_path.join("user-id")) + let user = read_to_string(opt.db_path.join("user-id")) + .or_else(|_| read_to_string("/tmp/meilisearch-user-id")) .unwrap_or_else(|_| "No user-id".to_string()); let analytics = Box::new(Self { user }); Box::leak(analytics) From f7bb499c281a4e3b615d55c053e8f6332fe7ffe3 Mon Sep 17 00:00:00 2001 From: Irevoire Date: Wed, 13 Oct 2021 16:43:33 +0200 Subject: [PATCH 13/68] send the first identify + launched for the first time events right away instead of batching them --- meilisearch-http/src/analytics.rs | 24 +++--------------------- 1 file changed, 3 insertions(+), 21 deletions(-) diff --git a/meilisearch-http/src/analytics.rs b/meilisearch-http/src/analytics.rs index c0e699344..91abb1c98 100644 --- a/meilisearch-http/src/analytics.rs +++ b/meilisearch-http/src/analytics.rs @@ -103,23 +103,7 @@ mod segment { }); let segment = Box::leak(segment); - // send an identify event - let _ = segment - .batcher - .lock() - .await - .push(Identify { - user: segment.user.clone(), - // If meilisearch is corrupted at the start we can panic - traits: Self::compute_traits( - &segment.opt, - meilisearch.get_all_stats().await.unwrap(), - ), - ..Default::default() - }) - .await; - - // send the associated track event + // batch the launched for the first time track event if first_time_run { segment.publish("Launched for the first time".to_string(), json!({})); } @@ -133,9 +117,6 @@ mod segment { fn tick(&'static self, meilisearch: MeiliSearch) { tokio::spawn(async move { loop { - tokio::time::sleep(Duration::from_secs(60 * 5)).await; // 1 minutes - println!("ANALYTICS: should do things"); - if let Ok(stats) = meilisearch.get_all_stats().await { let traits = Self::compute_traits(&self.opt, stats); let user = self.user.clone(); @@ -152,7 +133,8 @@ mod segment { .await; } let _ = self.batcher.lock().await.flush().await; - println!("sent batch"); + println!("ANALYTICS: sent the batch"); + tokio::time::sleep(Duration::from_secs(60 * 5)).await; // 5 minutes } }); } From 9e1bba40f7e72f4e07f3860629b49f9b0a5ccada Mon Sep 17 00:00:00 2001 From: Irevoire Date: Wed, 13 Oct 2021 19:38:14 +0200 Subject: [PATCH 14/68] do not print anything if no user id was found --- meilisearch-http/src/analytics.rs | 2 +- meilisearch-http/src/main.rs | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/meilisearch-http/src/analytics.rs b/meilisearch-http/src/analytics.rs index 91abb1c98..daed4c233 100644 --- a/meilisearch-http/src/analytics.rs +++ b/meilisearch-http/src/analytics.rs @@ -182,7 +182,7 @@ impl MockAnalytics { pub fn new(opt: &Opt) -> &'static Self { let user = read_to_string(opt.db_path.join("user-id")) .or_else(|_| read_to_string("/tmp/meilisearch-user-id")) - .unwrap_or_else(|_| "No user-id".to_string()); + .unwrap_or_else(|_| "".to_string()); let analytics = Box::new(Self { user }); Box::leak(analytics) } diff --git a/meilisearch-http/src/main.rs b/meilisearch-http/src/main.rs index 9a258b287..60535966d 100644 --- a/meilisearch-http/src/main.rs +++ b/meilisearch-http/src/main.rs @@ -126,7 +126,11 @@ Anonymous telemetry: \"Enabled\"" ); } } - eprintln!("Unique User ID:\t\"{}\"", analytics); + + let analytics = analytics.to_string(); + if analytics != "" { + eprintln!("Unique User ID:\t\"{}\"", analytics); + } eprintln!(); From 8e2d6cf87d9f637a0902397dab5d2f2f9e16efb8 Mon Sep 17 00:00:00 2001 From: Irevoire Date: Wed, 13 Oct 2021 20:56:28 +0200 Subject: [PATCH 15/68] add the content type to all the route --- meilisearch-http/src/analytics.rs | 17 ++++++++++++---- .../src/routes/indexes/documents.rs | 2 ++ meilisearch-http/src/routes/indexes/mod.rs | 6 +++++- meilisearch-http/src/routes/indexes/search.rs | 16 ++++++++++++--- .../src/routes/indexes/settings.rs | 20 ++++++++++++------- 5 files changed, 46 insertions(+), 15 deletions(-) diff --git a/meilisearch-http/src/analytics.rs b/meilisearch-http/src/analytics.rs index daed4c233..037c68524 100644 --- a/meilisearch-http/src/analytics.rs +++ b/meilisearch-http/src/analytics.rs @@ -1,3 +1,4 @@ +use actix_web::HttpRequest; use serde_json::Value; use std::fmt::Display; use std::fs::read_to_string; @@ -8,6 +9,8 @@ use crate::Opt; #[cfg(all(not(debug_assertions), feature = "analytics"))] mod segment { use crate::analytics::Analytics; + use actix_web::http::header::USER_AGENT; + use actix_web::HttpRequest; use meilisearch_lib::index_controller::Stats; use meilisearch_lib::MeiliSearch; use once_cell::sync::Lazy; @@ -105,7 +108,7 @@ mod segment { // batch the launched for the first time track event if first_time_run { - segment.publish("Launched for the first time".to_string(), json!({})); + segment.publish("Launched for the first time".to_string(), json!({}), None); } // start the runtime tick @@ -142,7 +145,12 @@ mod segment { #[async_trait::async_trait] impl super::Analytics for SegmentAnalytics { - fn publish(&'static self, event_name: String, send: Value) { + fn publish(&'static self, event_name: String, send: Value, request: Option<&HttpRequest>) { + let content_type = request + .map(|req| req.headers().get(USER_AGENT)) + .flatten() + .map(|header| header.to_str().unwrap_or("unknown").to_string()); + tokio::spawn(async move { println!("ANALYTICS pushing {} in the batcher", event_name); let _ = self @@ -152,6 +160,7 @@ mod segment { .push(Track { user: self.user.clone(), event: event_name.clone(), + context: content_type.map(|user_agent| json!({ "user-agent": user_agent.split(";").map(|u| u.trim()).collect::>() })), properties: send, ..Default::default() }) @@ -191,7 +200,7 @@ impl MockAnalytics { #[async_trait::async_trait] impl Analytics for MockAnalytics { /// This is a noop and should be optimized out - fn publish(&'static self, _event_name: String, _send: Value) {} + fn publish(&'static self, _event_name: String, _send: Value, _request: Option<&HttpRequest>) {} } impl Display for MockAnalytics { @@ -202,5 +211,5 @@ impl Display for MockAnalytics { #[async_trait::async_trait] pub trait Analytics: Display + Sync + Send { - fn publish(&'static self, event_name: String, send: Value); + fn publish(&'static self, event_name: String, send: Value, request: Option<&HttpRequest>); } diff --git a/meilisearch-http/src/routes/indexes/documents.rs b/meilisearch-http/src/routes/indexes/documents.rs index 1e8a803dc..8d3630713 100644 --- a/meilisearch-http/src/routes/indexes/documents.rs +++ b/meilisearch-http/src/routes/indexes/documents.rs @@ -148,6 +148,7 @@ pub async fn add_documents( "with_primary_key": params.primary_key, "index_creation": meilisearch.get_index(path.index_uid.clone()).await.is_ok(), }), + Some(&req), ); document_addition( @@ -182,6 +183,7 @@ pub async fn update_documents( "with_primary_key": params.primary_key, "index_creation": meilisearch.get_index(path.index_uid.clone()).await.is_ok(), }), + Some(&req), ); document_addition( diff --git a/meilisearch-http/src/routes/indexes/mod.rs b/meilisearch-http/src/routes/indexes/mod.rs index 18a2e42e8..743da5b6a 100644 --- a/meilisearch-http/src/routes/indexes/mod.rs +++ b/meilisearch-http/src/routes/indexes/mod.rs @@ -1,4 +1,4 @@ -use actix_web::{web, HttpResponse}; +use actix_web::{web, HttpRequest, HttpResponse}; use chrono::{DateTime, Utc}; use log::debug; use meilisearch_lib::index_controller::IndexSettings; @@ -56,6 +56,7 @@ pub struct IndexCreateRequest { pub async fn create_index( meilisearch: GuardedData, body: web::Json, + req: HttpRequest, analytics: web::Data<&'static dyn Analytics>, ) -> Result { let body = body.into_inner(); @@ -63,6 +64,7 @@ pub async fn create_index( analytics.publish( "Index Created".to_string(), json!({ "with_primary_key": body.primary_key}), + Some(&req), ); let meta = meilisearch.create_index(body.uid, body.primary_key).await?; Ok(HttpResponse::Created().json(meta)) @@ -98,6 +100,7 @@ pub async fn update_index( meilisearch: GuardedData, path: web::Path, body: web::Json, + req: HttpRequest, analytics: web::Data<&'static dyn Analytics>, ) -> Result { debug!("called with params: {:?}", body); @@ -105,6 +108,7 @@ pub async fn update_index( analytics.publish( "Index Updated".to_string(), json!({ "with_primary_key": body.primary_key}), + Some(&req), ); let settings = IndexSettings { uid: body.uid, diff --git a/meilisearch-http/src/routes/indexes/search.rs b/meilisearch-http/src/routes/indexes/search.rs index beefd06ed..c0d3f1462 100644 --- a/meilisearch-http/src/routes/indexes/search.rs +++ b/meilisearch-http/src/routes/indexes/search.rs @@ -1,4 +1,4 @@ -use actix_web::{web, HttpResponse}; +use actix_web::{web, HttpRequest, HttpResponse}; use log::debug; use meilisearch_lib::index::{default_crop_length, SearchQuery, DEFAULT_SEARCH_LIMIT}; use meilisearch_lib::MeiliSearch; @@ -110,6 +110,7 @@ pub async fn search_with_url_query( meilisearch: GuardedData, path: web::Path, params: web::Query, + req: HttpRequest, analytics: web::Data<&'static dyn Analytics>, ) -> Result { debug!("called with params: {:?}", params); @@ -127,7 +128,11 @@ pub async fn search_with_url_query( assert!(!search_result.exhaustive_nb_hits); analytics_value["response_time"] = json!(search_result.processing_time_ms as u64); - analytics.publish("Documents Searched".to_string(), analytics_value); + analytics.publish( + "Documents Searched".to_string(), + analytics_value, + Some(&req), + ); debug!("returns: {:?}", search_result); Ok(HttpResponse::Ok().json(search_result)) @@ -137,6 +142,7 @@ pub async fn search_with_post( meilisearch: GuardedData, path: web::Path, params: web::Json, + req: HttpRequest, analytics: web::Data<&'static dyn Analytics>, ) -> Result { let query = params.into_inner(); @@ -154,7 +160,11 @@ pub async fn search_with_post( assert!(!search_result.exhaustive_nb_hits); analytics_value["response_time"] = json!(search_result.processing_time_ms as u64); - analytics.publish("Documents Searched".to_string(), analytics_value); + analytics.publish( + "Documents Searched".to_string(), + analytics_value, + Some(&req), + ); debug!("returns: {:?}", search_result); Ok(HttpResponse::Ok().json(search_result)) diff --git a/meilisearch-http/src/routes/indexes/settings.rs b/meilisearch-http/src/routes/indexes/settings.rs index 8eed67d7d..f70eb1222 100644 --- a/meilisearch-http/src/routes/indexes/settings.rs +++ b/meilisearch-http/src/routes/indexes/settings.rs @@ -1,6 +1,6 @@ use log::debug; -use actix_web::{web, HttpResponse}; +use actix_web::{web, HttpRequest, HttpResponse}; use meilisearch_lib::index::{Settings, Unchecked}; use meilisearch_lib::index_controller::Update; use meilisearch_lib::MeiliSearch; @@ -15,7 +15,7 @@ macro_rules! make_setting_route { ($route:literal, $type:ty, $attr:ident, $camelcase_attr:literal, $analytics_var:ident, $analytics:expr) => { pub mod $attr { use log::debug; - use actix_web::{web, HttpResponse, Resource}; + use actix_web::{web, HttpResponse, HttpRequest, Resource}; use meilisearch_lib::milli::update::Setting; use meilisearch_lib::{MeiliSearch, index::Settings, index_controller::Update}; @@ -42,11 +42,12 @@ macro_rules! make_setting_route { meilisearch: GuardedData, index_uid: actix_web::web::Path, body: actix_web::web::Json>, + req: HttpRequest, $analytics_var: web::Data<&'static dyn Analytics>, ) -> std::result::Result { let body = body.into_inner(); - $analytics(&body); + $analytics(&body, &req); let settings = Settings { $attr: match body { @@ -82,7 +83,7 @@ macro_rules! make_setting_route { } }; ($route:literal, $type:ty, $attr:ident, $camelcase_attr:literal) => { - make_setting_route!($route, $type, $attr, $camelcase_attr, _analytics, |_| {}); + make_setting_route!($route, $type, $attr, $camelcase_attr, _analytics, |_, _| {}); }; } @@ -92,7 +93,7 @@ make_setting_route!( filterable_attributes, "filterableAttributes", analytics, - |setting: &Option>| { + |setting: &Option>, req: &HttpRequest| { use serde_json::json; analytics.publish( @@ -101,6 +102,7 @@ make_setting_route!( "total": setting.as_ref().map(|filter| filter.len()), "has_geo": setting.as_ref().map(|filter| filter.contains("_geo")).unwrap_or(false), }), + Some(&req), ); } ); @@ -111,7 +113,7 @@ make_setting_route!( sortable_attributes, "sortableAttributes", analytics, - |setting: &Option>| { + |setting: &Option>, req: &HttpRequest| { use serde_json::json; analytics.publish( @@ -120,6 +122,7 @@ make_setting_route!( "total": setting.as_ref().map(|sort| sort.len()), "has_geo": setting.as_ref().map(|sort| sort.contains("_geo")).unwrap_or(false), }), + Some(&req), ); } ); @@ -165,7 +168,7 @@ make_setting_route!( ranking_rules, "rankingRules", analytics, - |setting: &Option>| { + |setting: &Option>, req: &HttpRequest| { use serde_json::json; analytics.publish( @@ -173,6 +176,7 @@ make_setting_route!( json!({ "sort_position": setting.as_ref().map(|sort| sort.iter().filter(|s| s.contains(":")).count()), }), + Some(&req), ); } ); @@ -205,6 +209,7 @@ pub async fn update_all( meilisearch: GuardedData, index_uid: web::Path, body: web::Json>, + req: HttpRequest, analytics: web::Data<&'static dyn Analytics>, ) -> Result { let settings = body.into_inner(); @@ -224,6 +229,7 @@ pub async fn update_all( "has_geo": settings.filterable_attributes.as_ref().set().map(|filter| filter.iter().any(|s| s == "_geo")).unwrap_or(false), }, }), + Some(&req), ); let update = Update::Settings(settings); From 75d1272325a42617f3498b0be1a10ac0d9badd98 Mon Sep 17 00:00:00 2001 From: Irevoire Date: Thu, 14 Oct 2021 11:32:55 +0200 Subject: [PATCH 16/68] log the dump creation --- meilisearch-http/src/routes/dump.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/meilisearch-http/src/routes/dump.rs b/meilisearch-http/src/routes/dump.rs index cbf89ddea..849b8c654 100644 --- a/meilisearch-http/src/routes/dump.rs +++ b/meilisearch-http/src/routes/dump.rs @@ -1,8 +1,10 @@ -use actix_web::{web, HttpResponse}; +use actix_web::{web, HttpRequest, HttpResponse}; use log::debug; use meilisearch_lib::MeiliSearch; use serde::{Deserialize, Serialize}; +use serde_json::json; +use crate::analytics::Analytics; use crate::error::ResponseError; use crate::extractors::authentication::{policies::*, GuardedData}; @@ -13,7 +15,11 @@ pub fn configure(cfg: &mut web::ServiceConfig) { pub async fn create_dump( meilisearch: GuardedData, + req: HttpRequest, + analytics: web::Data<&'static dyn Analytics>, ) -> Result { + analytics.publish("Dump Created".to_string(), json!({}), Some(&req)); + let res = meilisearch.create_dump().await?; debug!("returns: {:?}", res); From 0667d940f9aeef075f89500629ba622dd6ee01b2 Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 18 Oct 2021 17:00:33 +0200 Subject: [PATCH 17/68] update the name of nb_cores in the identify --- meilisearch-http/src/analytics.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meilisearch-http/src/analytics.rs b/meilisearch-http/src/analytics.rs index 037c68524..05af16fda 100644 --- a/meilisearch-http/src/analytics.rs +++ b/meilisearch-http/src/analytics.rs @@ -47,7 +47,7 @@ mod segment { json!({ "distribution": sys.name(), "kernel_version": kernel_version, - "core_number": sys.processors().len(), + "cores": sys.processors().len(), "ram_size": sys.total_memory(), "disk_size": sys.disks().iter().map(|disk| disk.available_space()).max(), "server_provider": std::env::var("MEILI_SERVER_PROVIDER").ok(), From 595ae42e94a5f02d7b503f7485b5206d4590939d Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 18 Oct 2021 17:02:13 +0200 Subject: [PATCH 18/68] update the name of the Launched event --- meilisearch-http/src/analytics.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meilisearch-http/src/analytics.rs b/meilisearch-http/src/analytics.rs index 05af16fda..c10b65457 100644 --- a/meilisearch-http/src/analytics.rs +++ b/meilisearch-http/src/analytics.rs @@ -108,7 +108,7 @@ mod segment { // batch the launched for the first time track event if first_time_run { - segment.publish("Launched for the first time".to_string(), json!({}), None); + segment.publish("Launched".to_string(), json!({}), None); } // start the runtime tick From d3d76bf97a31fd0b07e47d1cd4f899731252f7b2 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 19 Oct 2021 15:09:55 +0200 Subject: [PATCH 19/68] wip create a search batcher --- meilisearch-http/src/analytics.rs | 227 +++++++++++++++++++++++++++++- 1 file changed, 225 insertions(+), 2 deletions(-) diff --git a/meilisearch-http/src/analytics.rs b/meilisearch-http/src/analytics.rs index c10b65457..60f03e2d5 100644 --- a/meilisearch-http/src/analytics.rs +++ b/meilisearch-http/src/analytics.rs @@ -1,5 +1,7 @@ use actix_web::HttpRequest; +use meilisearch_lib::index::SearchQuery; use serde_json::Value; +use std::collections::{HashMap, HashSet}; use std::fmt::Display; use std::fs::read_to_string; @@ -11,12 +13,14 @@ mod segment { use crate::analytics::Analytics; use actix_web::http::header::USER_AGENT; use actix_web::HttpRequest; + use meilisearch_lib::index::SearchQuery; use meilisearch_lib::index_controller::Stats; use meilisearch_lib::MeiliSearch; use once_cell::sync::Lazy; use segment::message::{Identify, Track, User}; use segment::{AutoBatcher, Batcher, HttpClient}; use serde_json::{json, Value}; + use std::collections::{HashMap, HashSet}; use std::fmt::Display; use std::fs; use std::time::{Duration, Instant}; @@ -32,6 +36,8 @@ mod segment { user: User, opt: Opt, batcher: Mutex, + post_search_batcher: Mutex, + get_search_batcher: Mutex, } impl SegmentAnalytics { @@ -103,6 +109,8 @@ mod segment { user, opt: opt.clone(), batcher, + post_search_batcher: Mutex::new(SearchBatcher::default()), + get_search_batcher: Mutex::new(SearchBatcher::default()), }); let segment = Box::leak(segment); @@ -141,6 +149,92 @@ mod segment { } }); } + + fn start_search( + &'static self, + getter: impl Fn(&'static Self) -> &'static Mutex + Send + Sync + 'static, + query: &SearchQuery, + request: &HttpRequest, + ) { + let user_agent = SearchBatcher::extract_user_agents(request); + let sorted = query.sort.is_some() as usize; + let sort_with_geo_point = query + .sort + .as_ref() + .map_or(false, |s| s.iter().any(|s| s.contains("_geoPoint("))); + let sort_criteria_terms = query.sort.as_ref().map_or(0, |s| s.len()); + + // since there is quite a bit of computation made on the filter we are going to do that in the async task + let filter = query.filter.clone(); + let queried = query.q.is_some(); + let nb_terms = query.q.as_ref().map_or(0, |s| s.split_whitespace().count()); + + let max_limit = query.limit; + let max_offset = query.offset.unwrap_or_default(); + + // to avoid blocking the search we are going to do the heavier computation in an async task + // and take the mutex in the same task + tokio::spawn(async move { + let filtered = filter.is_some() as usize; + let syntax = match filter.as_ref() { + Some(Value::String(_)) => "string".to_string(), + Some(Value::Array(values)) => { + if values.iter().map(|v| v.to_string()).any(|s| { + s.contains(['=', '<', '>', '!'].as_ref()) + || s.contains("_geoRadius") + || s.contains("TO") + }) { + "mixed".to_string() + } else { + "array".to_string() + } + } + _ => "none".to_string(), + }; + let stringified_filters = filter.map_or(String::new(), |v| v.to_string()); + let filter_with_geo_radius = stringified_filters.contains("_geoRadius("); + let filter_number_of_criteria = stringified_filters + .split("!=") + .map(|s| s.split("<=")) + .flatten() + .map(|s| s.split(">=")) + .flatten() + .map(|s| s.split(['=', '<', '>', '!'].as_ref())) + .flatten() + .map(|s| s.split("_geoRadius(")) + .flatten() + .map(|s| s.split("TO")) + .flatten() + .count() + - 1; + + println!("Batching a search"); + let mut search_batcher = getter(self).lock().await; + user_agent.into_iter().for_each(|ua| { + search_batcher.user_agents.insert(ua); + }); + search_batcher.total_received += 1; + + // sort + search_batcher.sort_with_geo_point |= sort_with_geo_point; + search_batcher.sort_sum_of_criteria_terms += sort_criteria_terms; + search_batcher.sort_total_number_of_criteria += sorted; + + // filter + search_batcher.filter_with_geo_radius |= filter_with_geo_radius; + search_batcher.filter_sum_of_criteria_terms += filter_number_of_criteria; + search_batcher.filter_total_number_of_criteria += filtered as usize; + *search_batcher.used_syntax.entry(syntax).or_insert(0) += 1; + + // q + search_batcher.sum_of_terms_count += nb_terms; + search_batcher.total_number_of_q += queried as usize; + + // pagination + search_batcher.max_limit = search_batcher.max_limit.max(max_limit); + search_batcher.max_offset = search_batcher.max_offset.max(max_offset); + }); + } } #[async_trait::async_trait] @@ -160,7 +254,7 @@ mod segment { .push(Track { user: self.user.clone(), event: event_name.clone(), - context: content_type.map(|user_agent| json!({ "user-agent": user_agent.split(";").map(|u| u.trim()).collect::>() })), + context: content_type.map(|user_agent| json!({ "user-agent": user_agent.split(";").map(str::trim).collect::>() })), properties: send, ..Default::default() }) @@ -168,6 +262,30 @@ mod segment { println!("ANALYTICS {} pushed", event_name); }); } + + fn start_get_search(&'static self, query: &SearchQuery, request: &HttpRequest) { + self.start_search(|s| &s.get_search_batcher, query, request) + } + + fn end_get_search(&'static self, process_time: usize) { + tokio::spawn(async move { + let mut search_batcher = self.get_search_batcher.lock().await; + search_batcher.total_succeeded += 1; + search_batcher.time_spent.push(process_time); + }); + } + + fn start_post_search(&'static self, query: &SearchQuery, request: &HttpRequest) { + self.start_search(|s| &s.post_search_batcher, query, request) + } + + fn end_post_search(&'static self, process_time: usize) { + tokio::spawn(async move { + let mut search_batcher = self.get_search_batcher.lock().await; + search_batcher.total_succeeded += 1; + search_batcher.time_spent.push(process_time); + }); + } } impl Display for SegmentAnalytics { @@ -175,6 +293,96 @@ mod segment { write!(f, "{}", self.user) } } + + #[derive(Default)] + pub struct SearchBatcher { + // context + user_agents: HashSet, + + // requests + total_received: usize, + total_succeeded: usize, + time_spent: Vec, + + // sort + sort_with_geo_point: bool, + // everytime a request has a filter, this field must be incremented by the number of terms it contains + sort_sum_of_criteria_terms: usize, + // everytime a request has a filter, this field must be incremented by one + sort_total_number_of_criteria: usize, + + // filter + filter_with_geo_radius: bool, + // everytime a request has a filter, this field must be incremented by the number of terms it contains + filter_sum_of_criteria_terms: usize, + // everytime a request has a filter, this field must be incremented by one + filter_total_number_of_criteria: usize, + used_syntax: HashMap, + + // q + // everytime a request has a q field, this field must be incremented by the number of terms + sum_of_terms_count: usize, + // everytime a request has a q field, this field must be incremented by one + total_number_of_q: usize, + + // pagination + max_limit: usize, + max_offset: usize, + } + + impl SearchBatcher { + pub fn extract_user_agents(request: &HttpRequest) -> Vec { + request + .headers() + .get(USER_AGENT) + .map(|header| header.to_str().ok()) + .flatten() + .unwrap_or("unknown") + .split(";") + .map(str::trim) + .map(ToString::to_string) + .collect() + } + + pub fn into_event(mut self, user: User, event_name: String) -> Track { + let context = Some(json!({ "user-agent": self.user_agents})); + let percentile_99th = 0.99 * (self.total_succeeded as f64 - 1.) + 1.; + self.time_spent.drain(percentile_99th as usize..); + + let properties = json!({ + "requests": { + "99th_response_time": self.time_spent.len() as f64 / self.time_spent.iter().sum::() as f64, + "total_succeeded": self.total_succeeded, + "total_failed": self.total_received.saturating_sub(self.total_succeeded), // just to be sure we never panics + "total_received": self.total_received, + }, + "sort": { + "with_geoPoint": self.sort_with_geo_point, + "avg_criteria_number": self.sort_total_number_of_criteria as f64 / self.sort_sum_of_criteria_terms as f64, + }, + "filter": { + "with_geoRadius": self.filter_with_geo_radius, + "avg_criteria_number": self.filter_total_number_of_criteria as f64 / self.filter_sum_of_criteria_terms as f64, + "most_used_syntax": self.used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)), + }, + "q": { + "avg_terms_number": self.total_number_of_q as f64 / self.sum_of_terms_count as f64, + }, + "pagination": { + "max_limit": self.max_limit, + "max_offset": self.max_offset, + }, + }); + + Track { + user, + event: event_name, + context, + properties, + ..Default::default() + } + } + } } // if we are in debug mode OR the analytics feature is disabled @@ -199,8 +407,12 @@ impl MockAnalytics { #[async_trait::async_trait] impl Analytics for MockAnalytics { - /// This is a noop and should be optimized out + // These methods are noop and should be optimized out fn publish(&'static self, _event_name: String, _send: Value, _request: Option<&HttpRequest>) {} + fn start_get_search(&'static self, _query: &SearchQuery, _request: &HttpRequest) {} + fn end_get_search(&'static self, _process_time: usize) {} + fn start_post_search(&'static self, _query: &SearchQuery, _request: &HttpRequest) {} + fn end_post_search(&'static self, _process_time: usize) {} } impl Display for MockAnalytics { @@ -211,5 +423,16 @@ impl Display for MockAnalytics { #[async_trait::async_trait] pub trait Analytics: Display + Sync + Send { + /// The method used to publish most analytics that do not need to be batched every hours fn publish(&'static self, event_name: String, send: Value, request: Option<&HttpRequest>); + + /// This method should be called to batch a get search request + fn start_get_search(&'static self, query: &SearchQuery, request: &HttpRequest); + /// This method should be called once a get search request has succeeded + fn end_get_search(&'static self, process_time: usize); + + /// This method should be called to batch a get search request + fn start_post_search(&'static self, query: &SearchQuery, request: &HttpRequest); + /// This method should be called once a post search request has succeeded + fn end_post_search(&'static self, process_time: usize); } From 35ffd0ec3a5d056fff63afac035b581199ead33e Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 19 Oct 2021 19:17:16 +0200 Subject: [PATCH 20/68] integrate the search batcher in the tick --- meilisearch-http/src/analytics.rs | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/meilisearch-http/src/analytics.rs b/meilisearch-http/src/analytics.rs index 60f03e2d5..ec365039f 100644 --- a/meilisearch-http/src/analytics.rs +++ b/meilisearch-http/src/analytics.rs @@ -143,7 +143,17 @@ mod segment { }) .await; } - let _ = self.batcher.lock().await.flush().await; + let get_search = std::mem::take(&mut *self.get_search_batcher.lock().await) + .into_event(self.user.clone(), "Documents Searched GET".to_string()); + let post_search = std::mem::take(&mut *self.post_search_batcher.lock().await) + .into_event(self.user.clone(), "Documents Searched POST".to_string()); + // keep the lock on the batcher just for these three operations + { + let mut batcher = self.batcher.lock().await; + let _ = batcher.push(get_search).await; + let _ = batcher.push(post_search).await; + let _ = self.batcher.lock().await.flush().await; + } println!("ANALYTICS: sent the batch"); tokio::time::sleep(Duration::from_secs(60 * 5)).await; // 5 minutes } From 40eabd50d13bde733f267641de2966ec7fb5d951 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 19 Oct 2021 19:23:11 +0200 Subject: [PATCH 21/68] integrate the search batcher in the search route --- meilisearch-http/src/routes/indexes/search.rs | 38 ++----------------- 1 file changed, 4 insertions(+), 34 deletions(-) diff --git a/meilisearch-http/src/routes/indexes/search.rs b/meilisearch-http/src/routes/indexes/search.rs index c0d3f1462..e3e47886b 100644 --- a/meilisearch-http/src/routes/indexes/search.rs +++ b/meilisearch-http/src/routes/indexes/search.rs @@ -116,8 +116,7 @@ pub async fn search_with_url_query( debug!("called with params: {:?}", params); let query: SearchQuery = params.into_inner().into(); - let mut analytics_value = extract_analytics_from_query(&query); - analytics_value["http_method"] = json!("get"); + analytics.start_get_search(&query, &req); let search_result = meilisearch .search(path.into_inner().index_uid, query) @@ -127,12 +126,7 @@ pub async fn search_with_url_query( #[cfg(test)] assert!(!search_result.exhaustive_nb_hits); - analytics_value["response_time"] = json!(search_result.processing_time_ms as u64); - analytics.publish( - "Documents Searched".to_string(), - analytics_value, - Some(&req), - ); + analytics.end_post_search(search_result.processing_time_ms as usize); debug!("returns: {:?}", search_result); Ok(HttpResponse::Ok().json(search_result)) @@ -148,8 +142,7 @@ pub async fn search_with_post( let query = params.into_inner(); debug!("search called with params: {:?}", query); - let mut analytics_value = extract_analytics_from_query(&query); - analytics_value["http_method"] = json!("post"); + analytics.start_post_search(&query, &req); let search_result = meilisearch .search(path.into_inner().index_uid, query) @@ -159,35 +152,12 @@ pub async fn search_with_post( #[cfg(test)] assert!(!search_result.exhaustive_nb_hits); - analytics_value["response_time"] = json!(search_result.processing_time_ms as u64); - analytics.publish( - "Documents Searched".to_string(), - analytics_value, - Some(&req), - ); + analytics.end_post_search(search_result.processing_time_ms as usize); debug!("returns: {:?}", search_result); Ok(HttpResponse::Ok().json(search_result)) } -fn extract_analytics_from_query(query: &SearchQuery) -> Value { - json!({ - "sort": { - "total": query.sort.as_ref().map(|sort| sort.len()), - "has_geoPoint": query.sort.as_ref().map(|sort| sort.iter().any(|sort| sort.starts_with("_geoPoint"))), - }, - "filter": { - "has_geoRadius": query.filter.as_ref().map(|filter| filter.to_string().contains("_geoRadius")), - // "syntax": 42, - }, - "pagination": { - "offset": query.offset, - "limit": query.limit, - }, - "terms_number": query.q.as_ref().map(|q| q.split_whitespace().count()).unwrap_or_default(), - }) -} - #[cfg(test)] mod test { use super::*; From 5395041dcb77983c9afe1df6c85e0c813a9499e8 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 20 Oct 2021 14:09:40 +0200 Subject: [PATCH 22/68] fix the stats and stop sending events when no request happened --- meilisearch-http/src/analytics.rs | 39 ++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/meilisearch-http/src/analytics.rs b/meilisearch-http/src/analytics.rs index ec365039f..f000a54d9 100644 --- a/meilisearch-http/src/analytics.rs +++ b/meilisearch-http/src/analytics.rs @@ -143,19 +143,32 @@ mod segment { }) .await; } - let get_search = std::mem::take(&mut *self.get_search_batcher.lock().await) - .into_event(self.user.clone(), "Documents Searched GET".to_string()); - let post_search = std::mem::take(&mut *self.post_search_batcher.lock().await) - .into_event(self.user.clone(), "Documents Searched POST".to_string()); + println!("ANALYTICS: taking the lock on the search batcher"); + let get_search = std::mem::take(&mut *self.get_search_batcher.lock().await); + let get_search = (get_search.total_received != 0).then(|| { + get_search + .into_event(self.user.clone(), "Document Searched GET".to_string()) + }); + let post_search = std::mem::take(&mut *self.post_search_batcher.lock().await); + let post_search = (post_search.total_received != 0).then(|| { + post_search + .into_event(self.user.clone(), "Document Searched POST".to_string()) + }); // keep the lock on the batcher just for these three operations { + println!("ANALYTICS: taking the lock on the batcher"); let mut batcher = self.batcher.lock().await; - let _ = batcher.push(get_search).await; - let _ = batcher.push(post_search).await; - let _ = self.batcher.lock().await.flush().await; + if let Some(get_search) = get_search { + let _ = batcher.push(get_search).await; + } + if let Some(post_search) = post_search { + let _ = batcher.push(post_search).await; + } + println!("ANALYTICS: Sending the batch"); + let _ = batcher.flush().await; } println!("ANALYTICS: sent the batch"); - tokio::time::sleep(Duration::from_secs(60 * 5)).await; // 5 minutes + tokio::time::sleep(Duration::from_secs(60 * 2)).await; // 2 minutes } }); } @@ -291,7 +304,7 @@ mod segment { fn end_post_search(&'static self, process_time: usize) { tokio::spawn(async move { - let mut search_batcher = self.get_search_batcher.lock().await; + let mut search_batcher = self.post_search_batcher.lock().await; search_batcher.total_succeeded += 1; search_batcher.time_spent.push(process_time); }); @@ -361,22 +374,22 @@ mod segment { let properties = json!({ "requests": { - "99th_response_time": self.time_spent.len() as f64 / self.time_spent.iter().sum::() as f64, + "99th_response_time": format!("{:.2}", self.time_spent.iter().sum::() as f64 / self.time_spent.len() as f64), "total_succeeded": self.total_succeeded, "total_failed": self.total_received.saturating_sub(self.total_succeeded), // just to be sure we never panics "total_received": self.total_received, }, "sort": { "with_geoPoint": self.sort_with_geo_point, - "avg_criteria_number": self.sort_total_number_of_criteria as f64 / self.sort_sum_of_criteria_terms as f64, + "avg_criteria_number": format!("{:.2}", self.sort_sum_of_criteria_terms as f64 / self.sort_total_number_of_criteria as f64), }, "filter": { "with_geoRadius": self.filter_with_geo_radius, - "avg_criteria_number": self.filter_total_number_of_criteria as f64 / self.filter_sum_of_criteria_terms as f64, + "avg_criteria_number": format!("{:.2}", self.filter_sum_of_criteria_terms as f64 / self.filter_total_number_of_criteria as f64), "most_used_syntax": self.used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)), }, "q": { - "avg_terms_number": self.total_number_of_q as f64 / self.sum_of_terms_count as f64, + "avg_terms_number": format!("{:.2}", self.sum_of_terms_count as f64 / self.total_number_of_q as f64), }, "pagination": { "max_limit": self.max_limit, From cfcd3ae04813ee17a5de55d433f9d8cf7cf70f04 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 20 Oct 2021 14:24:34 +0200 Subject: [PATCH 23/68] move the version to context.app --- meilisearch-http/src/analytics.rs | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/meilisearch-http/src/analytics.rs b/meilisearch-http/src/analytics.rs index f000a54d9..e200141a0 100644 --- a/meilisearch-http/src/analytics.rs +++ b/meilisearch-http/src/analytics.rs @@ -60,7 +60,6 @@ mod segment { }) }); let infos = json!({ - "version": env!("CARGO_PKG_VERSION").to_string(), "env": opt.env.clone(), "has_snapshot": opt.schedule_snapshot, }); @@ -137,6 +136,11 @@ mod segment { .lock() .await .push(Identify { + context: Some(json!({ + "app": { + "version": env!("CARGO_PKG_VERSION").to_string(), + }, + })), user, traits, ..Default::default() From 1d73f484f0b63433093d72934a535709e34f6508 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 20 Oct 2021 14:29:08 +0200 Subject: [PATCH 24/68] update the primary key when creating a new index --- meilisearch-http/src/routes/indexes/mod.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/meilisearch-http/src/routes/indexes/mod.rs b/meilisearch-http/src/routes/indexes/mod.rs index 743da5b6a..81f937013 100644 --- a/meilisearch-http/src/routes/indexes/mod.rs +++ b/meilisearch-http/src/routes/indexes/mod.rs @@ -63,7 +63,7 @@ pub async fn create_index( analytics.publish( "Index Created".to_string(), - json!({ "with_primary_key": body.primary_key}), + json!({ "primary_key": body.primary_key}), Some(&req), ); let meta = meilisearch.create_index(body.uid, body.primary_key).await?; @@ -107,7 +107,7 @@ pub async fn update_index( let body = body.into_inner(); analytics.publish( "Index Updated".to_string(), - json!({ "with_primary_key": body.primary_key}), + json!({ "primary_key": body.primary_key}), Some(&req), ); let settings = IndexSettings { From 392ee86714c77fde86309e401f084a24e030e975 Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 25 Oct 2021 16:41:23 +0200 Subject: [PATCH 25/68] implement the documents batcher --- meilisearch-http/src/analytics.rs | 155 +++++++++++++++++- .../src/routes/indexes/documents.rs | 2 +- meilisearch-http/src/routes/indexes/search.rs | 2 +- meilisearch-http/src/routes/mod.rs | 2 +- 4 files changed, 155 insertions(+), 6 deletions(-) diff --git a/meilisearch-http/src/analytics.rs b/meilisearch-http/src/analytics.rs index e200141a0..f467c800c 100644 --- a/meilisearch-http/src/analytics.rs +++ b/meilisearch-http/src/analytics.rs @@ -1,18 +1,20 @@ use actix_web::HttpRequest; use meilisearch_lib::index::SearchQuery; use serde_json::Value; -use std::collections::{HashMap, HashSet}; use std::fmt::Display; use std::fs::read_to_string; +use crate::routes::indexes::documents::UpdateDocumentsQuery; use crate::Opt; // if we are in release mode and the feature analytics was enabled #[cfg(all(not(debug_assertions), feature = "analytics"))] mod segment { use crate::analytics::Analytics; + use crate::routes::indexes::documents::UpdateDocumentsQuery; use actix_web::http::header::USER_AGENT; use actix_web::HttpRequest; + use http::header::CONTENT_TYPE; use meilisearch_lib::index::SearchQuery; use meilisearch_lib::index_controller::Stats; use meilisearch_lib::MeiliSearch; @@ -38,6 +40,8 @@ mod segment { batcher: Mutex, post_search_batcher: Mutex, get_search_batcher: Mutex, + documents_added_batcher: Mutex, + documents_updated_batcher: Mutex, } impl SegmentAnalytics { @@ -110,6 +114,8 @@ mod segment { batcher, post_search_batcher: Mutex::new(SearchBatcher::default()), get_search_batcher: Mutex::new(SearchBatcher::default()), + documents_added_batcher: Mutex::new(DocumentsBatcher::default()), + documents_updated_batcher: Mutex::new(DocumentsBatcher::default()), }); let segment = Box::leak(segment); @@ -158,6 +164,17 @@ mod segment { post_search .into_event(self.user.clone(), "Document Searched POST".to_string()) }); + let add_documents = + std::mem::take(&mut *self.documents_added_batcher.lock().await); + let add_documents = (add_documents.updated).then(|| { + add_documents.into_event(self.user.clone(), "Documents Added".to_string()) + }); + let update_documents = + std::mem::take(&mut *self.documents_updated_batcher.lock().await); + let update_documents = (update_documents.updated).then(|| { + update_documents + .into_event(self.user.clone(), "Documents Updated".to_string()) + }); // keep the lock on the batcher just for these three operations { println!("ANALYTICS: taking the lock on the batcher"); @@ -168,6 +185,12 @@ mod segment { if let Some(post_search) = post_search { let _ = batcher.push(post_search).await; } + if let Some(add_documents) = add_documents { + let _ = batcher.push(add_documents).await; + } + if let Some(update_documents) = update_documents { + let _ = batcher.push(update_documents).await; + } println!("ANALYTICS: Sending the batch"); let _ = batcher.flush().await; } @@ -199,8 +222,8 @@ mod segment { let max_limit = query.limit; let max_offset = query.offset.unwrap_or_default(); - // to avoid blocking the search we are going to do the heavier computation in an async task - // and take the mutex in the same task + // to avoid blocking the search we are going to do the heavier computation and take the + // batcher's mutex in an async task tokio::spawn(async move { let filtered = filter.is_some() as usize; let syntax = match filter.as_ref() { @@ -313,6 +336,70 @@ mod segment { search_batcher.time_spent.push(process_time); }); } + + fn add_documents( + &'static self, + documents_query: &UpdateDocumentsQuery, + index_creation: bool, + request: &HttpRequest, + ) { + let user_agents = request + .headers() + .get(USER_AGENT) + .map(|header| header.to_str().unwrap_or("unknown").to_string()); + let primary_key = documents_query.primary_key.clone(); + let content_type = request + .headers() + .get(CONTENT_TYPE) + .map(|s| s.to_str().unwrap_or("unkown")) + .unwrap() + .to_string(); + + tokio::spawn(async move { + let mut lock = self.documents_added_batcher.lock().await; + for user_agent in user_agents { + lock.user_agents.insert(user_agent); + } + lock.content_types.insert(content_type); + if let Some(primary_key) = primary_key { + lock.primary_keys.insert(primary_key); + } + lock.index_creation |= index_creation; + // drop the lock here + }); + } + + fn update_documents( + &'static self, + documents_query: &UpdateDocumentsQuery, + index_creation: bool, + request: &HttpRequest, + ) { + let user_agents = request + .headers() + .get(USER_AGENT) + .map(|header| header.to_str().unwrap_or("unknown").to_string()); + let primary_key = documents_query.primary_key.clone(); + let content_type = request + .headers() + .get(CONTENT_TYPE) + .map(|s| s.to_str().unwrap_or("unkown")) + .unwrap() + .to_string(); + + tokio::spawn(async move { + let mut lock = self.documents_updated_batcher.lock().await; + for user_agent in user_agents { + lock.user_agents.insert(user_agent); + } + lock.content_types.insert(content_type); + if let Some(primary_key) = primary_key { + lock.primary_keys.insert(primary_key); + } + lock.index_creation |= index_creation; + // drop the lock here + }); + } } impl Display for SegmentAnalytics { @@ -410,6 +497,39 @@ mod segment { } } } + + #[derive(Default)] + pub struct DocumentsBatcher { + // set to true when at least one request was received + updated: bool, + + // context + user_agents: HashSet, + + content_types: HashSet, + primary_keys: HashSet, + index_creation: bool, + } + + impl DocumentsBatcher { + pub fn into_event(mut self, user: User, event_name: String) -> Track { + let context = Some(json!({ "user-agent": self.user_agents})); + + let properties = json!({ + "payload_type": self.content_types, + "primary_key": self.primary_keys, + "index_creation": self.index_creation, + }); + + Track { + user, + event: event_name, + context, + properties, + ..Default::default() + } + } + } } // if we are in debug mode OR the analytics feature is disabled @@ -440,6 +560,20 @@ impl Analytics for MockAnalytics { fn end_get_search(&'static self, _process_time: usize) {} fn start_post_search(&'static self, _query: &SearchQuery, _request: &HttpRequest) {} fn end_post_search(&'static self, _process_time: usize) {} + fn add_documents( + &'static self, + _documents_query: &UpdateDocumentsQuery, + _index_creation: bool, + _request: &HttpRequest, + ) { + } + fn update_documents( + &'static self, + _documents_query: &UpdateDocumentsQuery, + _index_creation: bool, + _request: &HttpRequest, + ) { + } } impl Display for MockAnalytics { @@ -462,4 +596,19 @@ pub trait Analytics: Display + Sync + Send { fn start_post_search(&'static self, query: &SearchQuery, request: &HttpRequest); /// This method should be called once a post search request has succeeded fn end_post_search(&'static self, process_time: usize); + + // this method should be called to batch a add documents request + fn add_documents( + &'static self, + documents_query: &UpdateDocumentsQuery, + index_creation: bool, + request: &HttpRequest, + ); + // this method should be called to batch a update documents request + fn update_documents( + &'static self, + documents_query: &UpdateDocumentsQuery, + index_creation: bool, + request: &HttpRequest, + ); } diff --git a/meilisearch-http/src/routes/indexes/documents.rs b/meilisearch-http/src/routes/indexes/documents.rs index 8d3630713..c6b220b41 100644 --- a/meilisearch-http/src/routes/indexes/documents.rs +++ b/meilisearch-http/src/routes/indexes/documents.rs @@ -123,7 +123,7 @@ pub async fn get_all_documents( #[derive(Deserialize, Debug)] #[serde(rename_all = "camelCase", deny_unknown_fields)] pub struct UpdateDocumentsQuery { - primary_key: Option, + pub primary_key: Option, } pub async fn add_documents( diff --git a/meilisearch-http/src/routes/indexes/search.rs b/meilisearch-http/src/routes/indexes/search.rs index e3e47886b..d5d2b9540 100644 --- a/meilisearch-http/src/routes/indexes/search.rs +++ b/meilisearch-http/src/routes/indexes/search.rs @@ -3,7 +3,7 @@ use log::debug; use meilisearch_lib::index::{default_crop_length, SearchQuery, DEFAULT_SEARCH_LIMIT}; use meilisearch_lib::MeiliSearch; use serde::Deserialize; -use serde_json::{json, Value}; +use serde_json::Value; use crate::analytics::Analytics; use crate::error::ResponseError; diff --git a/meilisearch-http/src/routes/mod.rs b/meilisearch-http/src/routes/mod.rs index 092bd1186..2c27b7b45 100644 --- a/meilisearch-http/src/routes/mod.rs +++ b/meilisearch-http/src/routes/mod.rs @@ -14,7 +14,7 @@ use crate::extractors::authentication::{policies::*, GuardedData}; use crate::ApiKeys; mod dump; -mod indexes; +pub mod indexes; pub fn configure(cfg: &mut web::ServiceConfig) { cfg.service(web::resource("/health").route(web::get().to(get_health))) From a9523146a38f5e20386574262bc02583c894f665 Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 25 Oct 2021 16:52:15 +0200 Subject: [PATCH 26/68] simplify the into_events methods --- meilisearch-http/src/analytics.rs | 132 +++++++++++++++--------------- 1 file changed, 65 insertions(+), 67 deletions(-) diff --git a/meilisearch-http/src/analytics.rs b/meilisearch-http/src/analytics.rs index f467c800c..00efd8a0a 100644 --- a/meilisearch-http/src/analytics.rs +++ b/meilisearch-http/src/analytics.rs @@ -153,28 +153,18 @@ mod segment { }) .await; } - println!("ANALYTICS: taking the lock on the search batcher"); - let get_search = std::mem::take(&mut *self.get_search_batcher.lock().await); - let get_search = (get_search.total_received != 0).then(|| { - get_search - .into_event(self.user.clone(), "Document Searched GET".to_string()) - }); - let post_search = std::mem::take(&mut *self.post_search_batcher.lock().await); - let post_search = (post_search.total_received != 0).then(|| { - post_search - .into_event(self.user.clone(), "Document Searched POST".to_string()) - }); + println!("ANALYTICS: taking the lock on the search batchers"); + let get_search = std::mem::take(&mut *self.get_search_batcher.lock().await) + .into_event(&self.user, "Document Searched GET"); + let post_search = std::mem::take(&mut *self.post_search_batcher.lock().await) + .into_event(&self.user, "Document Searched POST"); + println!("ANALYTICS: taking the lock on the documents batchers"); let add_documents = - std::mem::take(&mut *self.documents_added_batcher.lock().await); - let add_documents = (add_documents.updated).then(|| { - add_documents.into_event(self.user.clone(), "Documents Added".to_string()) - }); + std::mem::take(&mut *self.documents_added_batcher.lock().await) + .into_event(&self.user, "Documents Added"); let update_documents = - std::mem::take(&mut *self.documents_updated_batcher.lock().await); - let update_documents = (update_documents.updated).then(|| { - update_documents - .into_event(self.user.clone(), "Documents Updated".to_string()) - }); + std::mem::take(&mut *self.documents_updated_batcher.lock().await) + .into_event(&self.user, "Documents Updated"); // keep the lock on the batcher just for these three operations { println!("ANALYTICS: taking the lock on the batcher"); @@ -458,42 +448,46 @@ mod segment { .collect() } - pub fn into_event(mut self, user: User, event_name: String) -> Track { - let context = Some(json!({ "user-agent": self.user_agents})); - let percentile_99th = 0.99 * (self.total_succeeded as f64 - 1.) + 1.; - self.time_spent.drain(percentile_99th as usize..); + pub fn into_event(mut self, user: &User, event_name: &str) -> Option { + if self.total_received == 0 { + None + } else { + let context = Some(json!({ "user-agent": self.user_agents})); + let percentile_99th = 0.99 * (self.total_succeeded as f64 - 1.) + 1.; + self.time_spent.drain(percentile_99th as usize..); - let properties = json!({ - "requests": { - "99th_response_time": format!("{:.2}", self.time_spent.iter().sum::() as f64 / self.time_spent.len() as f64), - "total_succeeded": self.total_succeeded, - "total_failed": self.total_received.saturating_sub(self.total_succeeded), // just to be sure we never panics - "total_received": self.total_received, - }, - "sort": { - "with_geoPoint": self.sort_with_geo_point, - "avg_criteria_number": format!("{:.2}", self.sort_sum_of_criteria_terms as f64 / self.sort_total_number_of_criteria as f64), - }, - "filter": { - "with_geoRadius": self.filter_with_geo_radius, - "avg_criteria_number": format!("{:.2}", self.filter_sum_of_criteria_terms as f64 / self.filter_total_number_of_criteria as f64), - "most_used_syntax": self.used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)), - }, - "q": { - "avg_terms_number": format!("{:.2}", self.sum_of_terms_count as f64 / self.total_number_of_q as f64), - }, - "pagination": { - "max_limit": self.max_limit, - "max_offset": self.max_offset, - }, - }); + let properties = json!({ + "requests": { + "99th_response_time": format!("{:.2}", self.time_spent.iter().sum::() as f64 / self.time_spent.len() as f64), + "total_succeeded": self.total_succeeded, + "total_failed": self.total_received.saturating_sub(self.total_succeeded), // just to be sure we never panics + "total_received": self.total_received, + }, + "sort": { + "with_geoPoint": self.sort_with_geo_point, + "avg_criteria_number": format!("{:.2}", self.sort_sum_of_criteria_terms as f64 / self.sort_total_number_of_criteria as f64), + }, + "filter": { + "with_geoRadius": self.filter_with_geo_radius, + "avg_criteria_number": format!("{:.2}", self.filter_sum_of_criteria_terms as f64 / self.filter_total_number_of_criteria as f64), + "most_used_syntax": self.used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)), + }, + "q": { + "avg_terms_number": format!("{:.2}", self.sum_of_terms_count as f64 / self.total_number_of_q as f64), + }, + "pagination": { + "max_limit": self.max_limit, + "max_offset": self.max_offset, + }, + }); - Track { - user, - event: event_name, - context, - properties, - ..Default::default() + Some(Track { + user: user.clone(), + event: event_name.to_string(), + context, + properties, + ..Default::default() + }) } } } @@ -512,21 +506,25 @@ mod segment { } impl DocumentsBatcher { - pub fn into_event(mut self, user: User, event_name: String) -> Track { - let context = Some(json!({ "user-agent": self.user_agents})); + pub fn into_event(mut self, user: &User, event_name: &str) -> Option { + if self.updated { + None + } else { + let context = Some(json!({ "user-agent": self.user_agents})); - let properties = json!({ - "payload_type": self.content_types, - "primary_key": self.primary_keys, - "index_creation": self.index_creation, - }); + let properties = json!({ + "payload_type": self.content_types, + "primary_key": self.primary_keys, + "index_creation": self.index_creation, + }); - Track { - user, - event: event_name, - context, - properties, - ..Default::default() + Some(Track { + user: user.clone(), + event: event_name.to_string(), + context, + properties, + ..Default::default() + }) } } } From e64ba122e1345ce533dde91fccd5e04e53fe6beb Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 25 Oct 2021 17:08:28 +0200 Subject: [PATCH 27/68] factorize the code between the two documents batcher --- meilisearch-http/src/analytics.rs | 118 ++++++++++++++---------------- 1 file changed, 56 insertions(+), 62 deletions(-) diff --git a/meilisearch-http/src/analytics.rs b/meilisearch-http/src/analytics.rs index 00efd8a0a..6bbf81155 100644 --- a/meilisearch-http/src/analytics.rs +++ b/meilisearch-http/src/analytics.rs @@ -34,6 +34,19 @@ mod segment { const SEGMENT_API_KEY: &str = "vHi89WrNDckHSQssyUJqLvIyp2QFITSC"; + pub fn extract_user_agents(request: &HttpRequest) -> Vec { + request + .headers() + .get(USER_AGENT) + .map(|header| header.to_str().ok()) + .flatten() + .unwrap_or("unknown") + .split(";") + .map(str::trim) + .map(ToString::to_string) + .collect() + } + pub struct SegmentAnalytics { user: User, opt: Opt, @@ -196,7 +209,7 @@ mod segment { query: &SearchQuery, request: &HttpRequest, ) { - let user_agent = SearchBatcher::extract_user_agents(request); + let user_agent = extract_user_agents(request); let sorted = query.sort.is_some() as usize; let sort_with_geo_point = query .sort @@ -275,6 +288,36 @@ mod segment { search_batcher.max_offset = search_batcher.max_offset.max(max_offset); }); } + + fn batch_documents( + &'static self, + batcher: &'static Mutex, + documents_query: &UpdateDocumentsQuery, + index_creation: bool, + request: &HttpRequest, + ) { + let user_agents = extract_user_agents(request); + let primary_key = documents_query.primary_key.clone(); + let content_type = request + .headers() + .get(CONTENT_TYPE) + .map(|s| s.to_str().unwrap_or("unkown")) + .unwrap() + .to_string(); + + tokio::spawn(async move { + let mut lock = batcher.lock().await; + for user_agent in user_agents { + lock.user_agents.insert(user_agent); + } + lock.content_types.insert(content_type); + if let Some(primary_key) = primary_key { + lock.primary_keys.insert(primary_key); + } + lock.index_creation |= index_creation; + // drop the lock here + }); + } } #[async_trait::async_trait] @@ -333,30 +376,12 @@ mod segment { index_creation: bool, request: &HttpRequest, ) { - let user_agents = request - .headers() - .get(USER_AGENT) - .map(|header| header.to_str().unwrap_or("unknown").to_string()); - let primary_key = documents_query.primary_key.clone(); - let content_type = request - .headers() - .get(CONTENT_TYPE) - .map(|s| s.to_str().unwrap_or("unkown")) - .unwrap() - .to_string(); - - tokio::spawn(async move { - let mut lock = self.documents_added_batcher.lock().await; - for user_agent in user_agents { - lock.user_agents.insert(user_agent); - } - lock.content_types.insert(content_type); - if let Some(primary_key) = primary_key { - lock.primary_keys.insert(primary_key); - } - lock.index_creation |= index_creation; - // drop the lock here - }); + self.batch_documents( + &self.documents_added_batcher, + documents_query, + index_creation, + request, + ) } fn update_documents( @@ -365,30 +390,12 @@ mod segment { index_creation: bool, request: &HttpRequest, ) { - let user_agents = request - .headers() - .get(USER_AGENT) - .map(|header| header.to_str().unwrap_or("unknown").to_string()); - let primary_key = documents_query.primary_key.clone(); - let content_type = request - .headers() - .get(CONTENT_TYPE) - .map(|s| s.to_str().unwrap_or("unkown")) - .unwrap() - .to_string(); - - tokio::spawn(async move { - let mut lock = self.documents_updated_batcher.lock().await; - for user_agent in user_agents { - lock.user_agents.insert(user_agent); - } - lock.content_types.insert(content_type); - if let Some(primary_key) = primary_key { - lock.primary_keys.insert(primary_key); - } - lock.index_creation |= index_creation; - // drop the lock here - }); + self.batch_documents( + &self.documents_updated_batcher, + documents_query, + index_creation, + request, + ) } } @@ -435,19 +442,6 @@ mod segment { } impl SearchBatcher { - pub fn extract_user_agents(request: &HttpRequest) -> Vec { - request - .headers() - .get(USER_AGENT) - .map(|header| header.to_str().ok()) - .flatten() - .unwrap_or("unknown") - .split(";") - .map(str::trim) - .map(ToString::to_string) - .collect() - } - pub fn into_event(mut self, user: &User, event_name: &str) -> Option { if self.total_received == 0 { None From 6591acfdfaf1c1fabd7680f2cd600eaeeccb30fc Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 25 Oct 2021 17:12:14 +0200 Subject: [PATCH 28/68] rename the documents batchers --- meilisearch-http/src/analytics.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/meilisearch-http/src/analytics.rs b/meilisearch-http/src/analytics.rs index 6bbf81155..688521975 100644 --- a/meilisearch-http/src/analytics.rs +++ b/meilisearch-http/src/analytics.rs @@ -53,8 +53,8 @@ mod segment { batcher: Mutex, post_search_batcher: Mutex, get_search_batcher: Mutex, - documents_added_batcher: Mutex, - documents_updated_batcher: Mutex, + add_documents_batcher: Mutex, + update_documents_batcher: Mutex, } impl SegmentAnalytics { @@ -127,8 +127,8 @@ mod segment { batcher, post_search_batcher: Mutex::new(SearchBatcher::default()), get_search_batcher: Mutex::new(SearchBatcher::default()), - documents_added_batcher: Mutex::new(DocumentsBatcher::default()), - documents_updated_batcher: Mutex::new(DocumentsBatcher::default()), + add_documents_batcher: Mutex::new(DocumentsBatcher::default()), + update_documents_batcher: Mutex::new(DocumentsBatcher::default()), }); let segment = Box::leak(segment); @@ -173,14 +173,14 @@ mod segment { .into_event(&self.user, "Document Searched POST"); println!("ANALYTICS: taking the lock on the documents batchers"); let add_documents = - std::mem::take(&mut *self.documents_added_batcher.lock().await) + std::mem::take(&mut *self.add_documents_batcher.lock().await) .into_event(&self.user, "Documents Added"); let update_documents = - std::mem::take(&mut *self.documents_updated_batcher.lock().await) + std::mem::take(&mut *self.update_documents_batcher.lock().await) .into_event(&self.user, "Documents Updated"); // keep the lock on the batcher just for these three operations { - println!("ANALYTICS: taking the lock on the batcher"); + println!("ANALYTICS: taking the lock on the batchers"); let mut batcher = self.batcher.lock().await; if let Some(get_search) = get_search { let _ = batcher.push(get_search).await; @@ -377,7 +377,7 @@ mod segment { request: &HttpRequest, ) { self.batch_documents( - &self.documents_added_batcher, + &self.add_documents_batcher, documents_query, index_creation, request, @@ -391,7 +391,7 @@ mod segment { request: &HttpRequest, ) { self.batch_documents( - &self.documents_updated_batcher, + &self.update_documents_batcher, documents_query, index_creation, request, From f9b14ca149ad9e0da3e17321fe489a87b7b3656e Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 25 Oct 2021 17:14:07 +0200 Subject: [PATCH 29/68] simplify the search batcher --- meilisearch-http/src/analytics.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/meilisearch-http/src/analytics.rs b/meilisearch-http/src/analytics.rs index 688521975..04ac79211 100644 --- a/meilisearch-http/src/analytics.rs +++ b/meilisearch-http/src/analytics.rs @@ -205,7 +205,7 @@ mod segment { fn start_search( &'static self, - getter: impl Fn(&'static Self) -> &'static Mutex + Send + Sync + 'static, + batcher: &'static Mutex, query: &SearchQuery, request: &HttpRequest, ) { @@ -262,7 +262,7 @@ mod segment { - 1; println!("Batching a search"); - let mut search_batcher = getter(self).lock().await; + let mut search_batcher = batcher.lock().await; user_agent.into_iter().for_each(|ua| { search_batcher.user_agents.insert(ua); }); @@ -347,7 +347,7 @@ mod segment { } fn start_get_search(&'static self, query: &SearchQuery, request: &HttpRequest) { - self.start_search(|s| &s.get_search_batcher, query, request) + self.start_search(&self.get_search_batcher, query, request) } fn end_get_search(&'static self, process_time: usize) { @@ -359,7 +359,7 @@ mod segment { } fn start_post_search(&'static self, query: &SearchQuery, request: &HttpRequest) { - self.start_search(|s| &s.post_search_batcher, query, request) + self.start_search(&self.post_search_batcher, query, request) } fn end_post_search(&'static self, process_time: usize) { @@ -500,7 +500,7 @@ mod segment { } impl DocumentsBatcher { - pub fn into_event(mut self, user: &User, event_name: &str) -> Option { + pub fn into_event(self, user: &User, event_name: &str) -> Option { if self.updated { None } else { From 9be90011c674467c873fde0fdb686dfd60d7b47d Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 25 Oct 2021 19:23:47 +0200 Subject: [PATCH 30/68] save the user-id in the config dir of the OS --- Cargo.lock | 41 ++++++++++++++++++ meilisearch-http/Cargo.toml | 1 + meilisearch-http/src/analytics.rs | 71 ++++++++++++++++++++++--------- 3 files changed, 93 insertions(+), 20 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 93a276a60..2ad7be305 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -849,6 +849,27 @@ dependencies = [ "generic-array 0.14.4", ] +[[package]] +name = "dirs-next" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cf36e65a80337bea855cd4ef9b8401ffce06a7baedf2e85ec467b1ac3f6e82b6" +dependencies = [ + "cfg-if 1.0.0", + "dirs-sys-next", +] + +[[package]] +name = "dirs-sys-next" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d" +dependencies = [ + "libc", + "redox_users", + "winapi", +] + [[package]] name = "discard" version = "1.0.4" @@ -1653,6 +1674,7 @@ dependencies = [ "parking_lot", "paste", "pin-project", + "platform-dirs", "rand", "rayon", "regex", @@ -2172,6 +2194,15 @@ version = "0.3.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7c9b1041b4387893b91ee6746cddfc28516aff326a3519fb2adf820932c5e6cb" +[[package]] +name = "platform-dirs" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e188d043c1a692985f78b5464853a263f1a27e5bd6322bad3a4078ee3c998a38" +dependencies = [ + "dirs-next", +] + [[package]] name = "ppv-lite86" version = "0.2.10" @@ -2353,6 +2384,16 @@ dependencies = [ "bitflags", ] +[[package]] +name = "redox_users" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "528532f3d801c87aec9def2add9ca802fe569e44a544afe633765267840abe64" +dependencies = [ + "getrandom", + "redox_syscall", +] + [[package]] name = "regex" version = "1.5.4" diff --git a/meilisearch-http/Cargo.toml b/meilisearch-http/Cargo.toml index ffc660c80..41bae43b8 100644 --- a/meilisearch-http/Cargo.toml +++ b/meilisearch-http/Cargo.toml @@ -51,6 +51,7 @@ mime = "0.3.16" num_cpus = "1.13.0" once_cell = "1.8.0" parking_lot = "0.11.2" +platform-dirs = "0.3.0" rand = "0.8.4" rayon = "1.5.1" regex = "1.5.4" diff --git a/meilisearch-http/src/analytics.rs b/meilisearch-http/src/analytics.rs index 04ac79211..2ff739f65 100644 --- a/meilisearch-http/src/analytics.rs +++ b/meilisearch-http/src/analytics.rs @@ -1,11 +1,50 @@ -use actix_web::HttpRequest; -use meilisearch_lib::index::SearchQuery; -use serde_json::Value; -use std::fmt::Display; -use std::fs::read_to_string; - use crate::routes::indexes::documents::UpdateDocumentsQuery; use crate::Opt; +use actix_web::HttpRequest; +use meilisearch_lib::index::SearchQuery; +use once_cell::sync::Lazy; +use platform_dirs::AppDirs; +use serde_json::Value; +use std::fmt::Display; +use std::fs; +use std::path::PathBuf; + +/// The MeiliSearch config dir: +/// `~/.config/MeiliSearch` on *NIX or *BSD. +/// `~/Library/ApplicationSupport` on macOS. +/// `%APPDATA` (= `C:\Users%USERNAME%\AppData\Roaming`) on windows. +static MEILISEARCH_CONFIG_PATH: Lazy> = + Lazy::new(|| AppDirs::new(Some("MeiliSearch"), false).map(|appdir| appdir.config_dir)); + +fn config_user_id_path(opt: &Opt) -> Option { + opt.db_path + .canonicalize() + .ok() + .map(|path| path.join("user-id").display().to_string().replace("/", "-")) + .zip(MEILISEARCH_CONFIG_PATH.as_ref()) + .map(|(filename, config_path)| config_path.join(filename)) +} + +/// Look for the user-id in the `data.ms` or in `~/.config/MeiliSearch/path-to-db-user-id` +fn find_user_id(opt: &Opt) -> Option { + fs::read_to_string(opt.db_path.join("user-id")) + .ok() + .or_else(|| fs::read_to_string(&config_user_id_path(opt)?).ok()) +} + +#[cfg(all(not(debug_assertions), feature = "analytics"))] +/// Write the user-id in the `data.ms` and in `~/.config/MeiliSearch/path-to-db-user-id`. Ignore the errors. +fn write_user_id(opt: &Opt, user_id: &str) { + let _ = fs::write(opt.db_path.join("user-id"), user_id.as_bytes()); + if let Some((meilisearch_config_path, user_id_path)) = MEILISEARCH_CONFIG_PATH + .as_ref() + .zip(config_user_id_path(opt)) + { + println!("{}", user_id_path.display()); + let _ = fs::create_dir_all(&meilisearch_config_path); + let _ = fs::write(user_id_path, user_id.as_bytes()); + } +} // if we are in release mode and the feature analytics was enabled #[cfg(all(not(debug_assertions), feature = "analytics"))] @@ -24,7 +63,6 @@ mod segment { use serde_json::{json, Value}; use std::collections::{HashMap, HashSet}; use std::fmt::Display; - use std::fs; use std::time::{Duration, Instant}; use sysinfo::{DiskExt, System, SystemExt}; use tokio::sync::Mutex; @@ -100,17 +138,12 @@ mod segment { } pub async fn new(opt: &Opt, meilisearch: &MeiliSearch) -> &'static Self { - // see if there is already a user-id in the `data.ms` - let user_id = fs::read_to_string(opt.db_path.join("user-id")) - .or_else(|_| fs::read_to_string("/tmp/meilisearch-user-id")); - let first_time_run = user_id.is_err(); + // see if there is already a user-id in the `data.ms` or in `/tmp/path-to-db-user-id` + let user_id = super::find_user_id(opt); + let first_time_run = user_id.is_none(); // if not, generate a new user-id and save it to the fs - let user_id = user_id.unwrap_or_else(|_| Uuid::new_v4().to_string()); - let _ = fs::write(opt.db_path.join("user-id"), user_id.as_bytes()); - let _ = fs::write( - opt.db_path.join("/tmp/meilisearch-user-id"), - user_id.as_bytes(), - ); + let user_id = user_id.unwrap_or_else(|| Uuid::new_v4().to_string()); + super::write_user_id(opt, &user_id); let client = HttpClient::default(); let user = User::UserId { @@ -536,9 +569,7 @@ pub struct MockAnalytics { impl MockAnalytics { pub fn new(opt: &Opt) -> &'static Self { - let user = read_to_string(opt.db_path.join("user-id")) - .or_else(|_| read_to_string("/tmp/meilisearch-user-id")) - .unwrap_or_else(|_| "".to_string()); + let user = find_user_id(opt).unwrap_or(String::new()); let analytics = Box::new(Self { user }); Box::leak(analytics) } From ba14ea124318f05860644873e6326dd514252192 Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 25 Oct 2021 19:28:30 +0200 Subject: [PATCH 31/68] plug the new batchers into the documents route --- .../src/routes/indexes/documents.rs | 26 +++++++------------ 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/meilisearch-http/src/routes/indexes/documents.rs b/meilisearch-http/src/routes/indexes/documents.rs index c6b220b41..932861f36 100644 --- a/meilisearch-http/src/routes/indexes/documents.rs +++ b/meilisearch-http/src/routes/indexes/documents.rs @@ -8,7 +8,7 @@ use meilisearch_lib::milli::update::IndexDocumentsMethod; use meilisearch_lib::MeiliSearch; use once_cell::sync::Lazy; use serde::Deserialize; -use serde_json::{json, Value}; +use serde_json::Value; use tokio::sync::mpsc; use crate::analytics::Analytics; @@ -141,14 +141,10 @@ pub async fn add_documents( .map(|s| s.to_str().unwrap_or("unkown")); let params = params.into_inner(); - analytics.publish( - "Documents Added".to_string(), - json!({ - "payload_type": content_type, - "with_primary_key": params.primary_key, - "index_creation": meilisearch.get_index(path.index_uid.clone()).await.is_ok(), - }), - Some(&req), + analytics.add_documents( + ¶ms, + meilisearch.get_index(path.index_uid.clone()).await.is_ok(), + &req, ); document_addition( @@ -176,14 +172,10 @@ pub async fn update_documents( .get("Content-type") .map(|s| s.to_str().unwrap_or("unkown")); - analytics.publish( - "Documents Updated".to_string(), - json!({ - "payload_type": content_type, - "with_primary_key": params.primary_key, - "index_creation": meilisearch.get_index(path.index_uid.clone()).await.is_ok(), - }), - Some(&req), + analytics.update_documents( + ¶ms, + meilisearch.get_index(path.index_uid.clone()).await.is_ok(), + &req, ); document_addition( From 87a8bf5e96fba331dda74f9edd46e76e43685783 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 26 Oct 2021 12:34:00 +0200 Subject: [PATCH 32/68] write and load the user-id in the dumps --- Cargo.toml | 3 +++ meilisearch-http/src/analytics.rs | 25 +++++++++---------- meilisearch-lib/src/analytics.rs | 17 +++++++++++++ .../src/index_controller/dump_actor/actor.rs | 4 +++ .../dump_actor/handle_impl.rs | 2 ++ .../index_controller/dump_actor/loaders/v3.rs | 2 ++ .../src/index_controller/dump_actor/mod.rs | 7 ++++++ meilisearch-lib/src/index_controller/mod.rs | 2 ++ meilisearch-lib/src/lib.rs | 2 ++ 9 files changed, 51 insertions(+), 13 deletions(-) create mode 100644 meilisearch-lib/src/analytics.rs diff --git a/Cargo.toml b/Cargo.toml index 02e9813a4..44732839f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,5 +6,8 @@ members = [ ] resolver = "2" +[profile.release] +debug = true + [patch.crates-io] pest = { git = "https://github.com/pest-parser/pest.git", rev = "51fd1d49f1041f7839975664ef71fe15c7dcaf67" } diff --git a/meilisearch-http/src/analytics.rs b/meilisearch-http/src/analytics.rs index 2ff739f65..29613cda3 100644 --- a/meilisearch-http/src/analytics.rs +++ b/meilisearch-http/src/analytics.rs @@ -7,7 +7,7 @@ use platform_dirs::AppDirs; use serde_json::Value; use std::fmt::Display; use std::fs; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; /// The MeiliSearch config dir: /// `~/.config/MeiliSearch` on *NIX or *BSD. @@ -16,8 +16,8 @@ use std::path::PathBuf; static MEILISEARCH_CONFIG_PATH: Lazy> = Lazy::new(|| AppDirs::new(Some("MeiliSearch"), false).map(|appdir| appdir.config_dir)); -fn config_user_id_path(opt: &Opt) -> Option { - opt.db_path +fn config_user_id_path(db_path: &Path) -> Option { + db_path .canonicalize() .ok() .map(|path| path.join("user-id").display().to_string().replace("/", "-")) @@ -26,19 +26,18 @@ fn config_user_id_path(opt: &Opt) -> Option { } /// Look for the user-id in the `data.ms` or in `~/.config/MeiliSearch/path-to-db-user-id` -fn find_user_id(opt: &Opt) -> Option { - fs::read_to_string(opt.db_path.join("user-id")) +fn find_user_id(db_path: &Path) -> Option { + fs::read_to_string(db_path.join("user-id")) .ok() - .or_else(|| fs::read_to_string(&config_user_id_path(opt)?).ok()) + .or_else(|| fs::read_to_string(&config_user_id_path(db_path)?).ok()) } -#[cfg(all(not(debug_assertions), feature = "analytics"))] /// Write the user-id in the `data.ms` and in `~/.config/MeiliSearch/path-to-db-user-id`. Ignore the errors. -fn write_user_id(opt: &Opt, user_id: &str) { - let _ = fs::write(opt.db_path.join("user-id"), user_id.as_bytes()); +fn write_user_id(db_path: &Path, user_id: &str) { + let _ = fs::write(db_path.join("user-id"), user_id.as_bytes()); if let Some((meilisearch_config_path, user_id_path)) = MEILISEARCH_CONFIG_PATH .as_ref() - .zip(config_user_id_path(opt)) + .zip(config_user_id_path(db_path)) { println!("{}", user_id_path.display()); let _ = fs::create_dir_all(&meilisearch_config_path); @@ -139,11 +138,11 @@ mod segment { pub async fn new(opt: &Opt, meilisearch: &MeiliSearch) -> &'static Self { // see if there is already a user-id in the `data.ms` or in `/tmp/path-to-db-user-id` - let user_id = super::find_user_id(opt); + let user_id = super::find_user_id(&opt.db_path); let first_time_run = user_id.is_none(); // if not, generate a new user-id and save it to the fs let user_id = user_id.unwrap_or_else(|| Uuid::new_v4().to_string()); - super::write_user_id(opt, &user_id); + super::write_user_id(&opt.db_path, &user_id); let client = HttpClient::default(); let user = User::UserId { @@ -569,7 +568,7 @@ pub struct MockAnalytics { impl MockAnalytics { pub fn new(opt: &Opt) -> &'static Self { - let user = find_user_id(opt).unwrap_or(String::new()); + let user = find_user_id(&opt.db_path).unwrap_or(String::new()); let analytics = Box::new(Self { user }); Box::leak(analytics) } diff --git a/meilisearch-lib/src/analytics.rs b/meilisearch-lib/src/analytics.rs new file mode 100644 index 000000000..76e673e5a --- /dev/null +++ b/meilisearch-lib/src/analytics.rs @@ -0,0 +1,17 @@ +use std::{fs, path::Path}; + +/// To load a dump we get the user id from the source directory; +/// If there was a user-id, write it to the new destination if not ignore the error +pub fn load_dump(src: &Path, dst: &Path) { + if let Ok(user_id) = fs::read_to_string(src.join("user-id")) { + let _ = fs::write(dst.join("user-id"), &user_id); + } +} + +/// To load a dump we get the user id either from the source directory; +/// If there was a user-id, write it to the new destination if not ignore the error +pub fn write_dump(src: &Path, dst: &Path) { + if let Ok(user_id) = fs::read_to_string(src) { + let _ = fs::write(dst, &user_id); + } +} diff --git a/meilisearch-lib/src/index_controller/dump_actor/actor.rs b/meilisearch-lib/src/index_controller/dump_actor/actor.rs index 9cdeacfaf..896f86e3b 100644 --- a/meilisearch-lib/src/index_controller/dump_actor/actor.rs +++ b/meilisearch-lib/src/index_controller/dump_actor/actor.rs @@ -22,6 +22,7 @@ pub struct DumpActor { index_resolver: Arc>, update: UpdateSender, dump_path: PathBuf, + analytics_path: PathBuf, lock: Arc>, dump_infos: Arc>>, update_db_size: usize, @@ -43,6 +44,7 @@ where index_resolver: Arc>, update: UpdateSender, dump_path: impl AsRef, + analytics_path: impl AsRef, index_db_size: usize, update_db_size: usize, ) -> Self { @@ -53,6 +55,7 @@ where index_resolver, update, dump_path: dump_path.as_ref().into(), + analytics_path: analytics_path.as_ref().into(), dump_infos, lock, index_db_size, @@ -119,6 +122,7 @@ where let task = DumpTask { path: self.dump_path.clone(), + analytics_path: self.analytics_path.clone(), index_resolver: self.index_resolver.clone(), update_sender: self.update.clone(), uid: uid.clone(), diff --git a/meilisearch-lib/src/index_controller/dump_actor/handle_impl.rs b/meilisearch-lib/src/index_controller/dump_actor/handle_impl.rs index 5acee2f81..ce7c36d13 100644 --- a/meilisearch-lib/src/index_controller/dump_actor/handle_impl.rs +++ b/meilisearch-lib/src/index_controller/dump_actor/handle_impl.rs @@ -33,6 +33,7 @@ impl DumpActorHandle for DumpActorHandleImpl { impl DumpActorHandleImpl { pub fn new( path: impl AsRef, + analytics_path: impl AsRef, index_resolver: Arc, update: crate::index_controller::updates::UpdateSender, index_db_size: usize, @@ -44,6 +45,7 @@ impl DumpActorHandleImpl { index_resolver, update, path, + analytics_path, index_db_size, update_db_size, ); diff --git a/meilisearch-lib/src/index_controller/dump_actor/loaders/v3.rs b/meilisearch-lib/src/index_controller/dump_actor/loaders/v3.rs index 480dd83d4..8a67edc67 100644 --- a/meilisearch-lib/src/index_controller/dump_actor/loaders/v3.rs +++ b/meilisearch-lib/src/index_controller/dump_actor/loaders/v3.rs @@ -2,6 +2,7 @@ use std::path::Path; use log::info; +use crate::analytics; use crate::index_controller::dump_actor::Metadata; use crate::index_controller::index_resolver::IndexResolver; use crate::index_controller::update_file_store::UpdateFileStore; @@ -24,6 +25,7 @@ pub fn load_dump( IndexResolver::load_dump(src.as_ref(), &dst, index_db_size, indexing_options)?; UpdateFileStore::load_dump(src.as_ref(), &dst)?; UpdateStore::load_dump(&src, &dst, update_db_size)?; + analytics::load_dump(src.as_ref(), dst.as_ref()); info!("Loading indexes."); diff --git a/meilisearch-lib/src/index_controller/dump_actor/mod.rs b/meilisearch-lib/src/index_controller/dump_actor/mod.rs index 8169a0092..c73d5bab7 100644 --- a/meilisearch-lib/src/index_controller/dump_actor/mod.rs +++ b/meilisearch-lib/src/index_controller/dump_actor/mod.rs @@ -17,6 +17,7 @@ use super::index_resolver::index_store::IndexStore; use super::index_resolver::uuid_store::UuidStore; use super::index_resolver::IndexResolver; use super::updates::UpdateSender; +use crate::analytics; use crate::compression::{from_tar_gz, to_tar_gz}; use crate::index_controller::dump_actor::error::DumpActorError; use crate::index_controller::dump_actor::loaders::{v2, v3}; @@ -223,6 +224,7 @@ pub fn load_dump( struct DumpTask { path: PathBuf, + analytics_path: PathBuf, index_resolver: Arc>, update_sender: UpdateSender, uid: String, @@ -247,6 +249,7 @@ where let meta_path = temp_dump_path.join(META_FILE_NAME); let mut meta_file = File::create(&meta_path)?; serde_json::to_writer(&mut meta_file, &meta)?; + analytics::write_dump(&self.analytics_path, &temp_dump_path.join("user-id")); create_dir_all(&temp_dump_path.join("indexes")).await?; let uuids = self.index_resolver.dump(temp_dump_path.clone()).await?; @@ -339,6 +342,8 @@ mod test { let task = DumpTask { path: tmp.path().to_owned(), + // this should do nothing + analytics_path: tmp.path().join("user-id"), index_resolver, update_sender, uid: String::from("test"), @@ -367,6 +372,8 @@ mod test { let task = DumpTask { path: tmp.path().to_owned(), + // this should do nothing + analytics_path: tmp.path().join("user-id"), index_resolver, update_sender, uid: String::from("test"), diff --git a/meilisearch-lib/src/index_controller/mod.rs b/meilisearch-lib/src/index_controller/mod.rs index 7273a80db..4074de1ba 100644 --- a/meilisearch-lib/src/index_controller/mod.rs +++ b/meilisearch-lib/src/index_controller/mod.rs @@ -169,8 +169,10 @@ impl IndexControllerBuilder { let dump_path = self .dump_dst .ok_or_else(|| anyhow::anyhow!("Missing dump directory path"))?; + let analytics_path = db_path.as_ref().join("user-id"); let dump_handle = dump_actor::DumpActorHandleImpl::new( dump_path, + analytics_path, index_resolver.clone(), update_sender.clone(), index_size, diff --git a/meilisearch-lib/src/lib.rs b/meilisearch-lib/src/lib.rs index c7ffca5d6..1dd74f37d 100644 --- a/meilisearch-lib/src/lib.rs +++ b/meilisearch-lib/src/lib.rs @@ -5,6 +5,8 @@ pub mod options; pub mod index; pub mod index_controller; +mod analytics; + pub use index_controller::updates::store::Update; pub use index_controller::MeiliSearch; From c752c14c462724d516c476f4cf0e0ba428cf1b9b Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 26 Oct 2021 13:02:40 +0200 Subject: [PATCH 33/68] refactorize the dump and snapshot --- meilisearch-lib/src/analytics.rs | 13 ++---------- .../src/index_controller/dump_actor/actor.rs | 4 ++-- .../index_controller/dump_actor/loaders/v3.rs | 2 +- .../src/index_controller/dump_actor/mod.rs | 20 +++++++++---------- meilisearch-lib/src/index_controller/mod.rs | 1 + .../src/index_controller/snapshot.rs | 12 +++++++++++ 6 files changed, 28 insertions(+), 24 deletions(-) diff --git a/meilisearch-lib/src/analytics.rs b/meilisearch-lib/src/analytics.rs index 76e673e5a..9dd8d3219 100644 --- a/meilisearch-lib/src/analytics.rs +++ b/meilisearch-lib/src/analytics.rs @@ -1,17 +1,8 @@ use std::{fs, path::Path}; -/// To load a dump we get the user id from the source directory; -/// If there was a user-id, write it to the new destination if not ignore the error -pub fn load_dump(src: &Path, dst: &Path) { +/// Copy the `user-id` contained in one db to another. Ignore all errors. +pub fn copy_user_id(src: &Path, dst: &Path) { if let Ok(user_id) = fs::read_to_string(src.join("user-id")) { let _ = fs::write(dst.join("user-id"), &user_id); } } - -/// To load a dump we get the user id either from the source directory; -/// If there was a user-id, write it to the new destination if not ignore the error -pub fn write_dump(src: &Path, dst: &Path) { - if let Ok(user_id) = fs::read_to_string(src) { - let _ = fs::write(dst, &user_id); - } -} diff --git a/meilisearch-lib/src/index_controller/dump_actor/actor.rs b/meilisearch-lib/src/index_controller/dump_actor/actor.rs index 896f86e3b..03c139c1d 100644 --- a/meilisearch-lib/src/index_controller/dump_actor/actor.rs +++ b/meilisearch-lib/src/index_controller/dump_actor/actor.rs @@ -121,8 +121,8 @@ where ret.send(Ok(info)).expect("Dump actor is dead"); let task = DumpTask { - path: self.dump_path.clone(), - analytics_path: self.analytics_path.clone(), + dump_path: self.dump_path.clone(), + db_path: self.analytics_path.clone(), index_resolver: self.index_resolver.clone(), update_sender: self.update.clone(), uid: uid.clone(), diff --git a/meilisearch-lib/src/index_controller/dump_actor/loaders/v3.rs b/meilisearch-lib/src/index_controller/dump_actor/loaders/v3.rs index 8a67edc67..1eea55451 100644 --- a/meilisearch-lib/src/index_controller/dump_actor/loaders/v3.rs +++ b/meilisearch-lib/src/index_controller/dump_actor/loaders/v3.rs @@ -25,7 +25,7 @@ pub fn load_dump( IndexResolver::load_dump(src.as_ref(), &dst, index_db_size, indexing_options)?; UpdateFileStore::load_dump(src.as_ref(), &dst)?; UpdateStore::load_dump(&src, &dst, update_db_size)?; - analytics::load_dump(src.as_ref(), dst.as_ref()); + analytics::copy_user_id(src.as_ref(), dst.as_ref()); info!("Loading indexes."); diff --git a/meilisearch-lib/src/index_controller/dump_actor/mod.rs b/meilisearch-lib/src/index_controller/dump_actor/mod.rs index c73d5bab7..844dbf768 100644 --- a/meilisearch-lib/src/index_controller/dump_actor/mod.rs +++ b/meilisearch-lib/src/index_controller/dump_actor/mod.rs @@ -223,8 +223,8 @@ pub fn load_dump( } struct DumpTask { - path: PathBuf, - analytics_path: PathBuf, + dump_path: PathBuf, + db_path: PathBuf, index_resolver: Arc>, update_sender: UpdateSender, uid: String, @@ -240,7 +240,7 @@ where async fn run(self) -> Result<()> { trace!("Performing dump."); - create_dir_all(&self.path).await?; + create_dir_all(&self.dump_path).await?; let temp_dump_dir = tokio::task::spawn_blocking(tempfile::TempDir::new).await??; let temp_dump_path = temp_dump_dir.path().to_owned(); @@ -249,7 +249,7 @@ where let meta_path = temp_dump_path.join(META_FILE_NAME); let mut meta_file = File::create(&meta_path)?; serde_json::to_writer(&mut meta_file, &meta)?; - analytics::write_dump(&self.analytics_path, &temp_dump_path.join("user-id")); + analytics::copy_user_id(&self.db_path, &temp_dump_path); create_dir_all(&temp_dump_path.join("indexes")).await?; let uuids = self.index_resolver.dump(temp_dump_path.clone()).await?; @@ -257,11 +257,11 @@ where UpdateMsg::dump(&self.update_sender, uuids, temp_dump_path.clone()).await?; let dump_path = tokio::task::spawn_blocking(move || -> Result { - let temp_dump_file = tempfile::NamedTempFile::new_in(&self.path)?; + let temp_dump_file = tempfile::NamedTempFile::new_in(&self.dump_path)?; to_tar_gz(temp_dump_path, temp_dump_file.path()) .map_err(|e| DumpActorError::Internal(e.into()))?; - let dump_path = self.path.join(self.uid).with_extension("dump"); + let dump_path = self.dump_path.join(self.uid).with_extension("dump"); temp_dump_file.persist(&dump_path)?; Ok(dump_path) @@ -341,9 +341,9 @@ mod test { create_update_handler(index_resolver.clone(), tmp.path(), 4096 * 100).unwrap(); let task = DumpTask { - path: tmp.path().to_owned(), + dump_path: tmp.path().into(), // this should do nothing - analytics_path: tmp.path().join("user-id"), + db_path: tmp.path().into(), index_resolver, update_sender, uid: String::from("test"), @@ -371,9 +371,9 @@ mod test { create_update_handler(index_resolver.clone(), tmp.path(), 4096 * 100).unwrap(); let task = DumpTask { - path: tmp.path().to_owned(), + dump_path: tmp.path().into(), // this should do nothing - analytics_path: tmp.path().join("user-id"), + db_path: tmp.path().into(), index_resolver, update_sender, uid: String::from("test"), diff --git a/meilisearch-lib/src/index_controller/mod.rs b/meilisearch-lib/src/index_controller/mod.rs index 4074de1ba..631bab9e9 100644 --- a/meilisearch-lib/src/index_controller/mod.rs +++ b/meilisearch-lib/src/index_controller/mod.rs @@ -189,6 +189,7 @@ impl IndexControllerBuilder { .ok_or_else(|| anyhow::anyhow!("Snapshot interval not provided."))?, self.snapshot_dir .ok_or_else(|| anyhow::anyhow!("Snapshot path not provided."))?, + db_path.as_ref().into(), db_path .as_ref() .file_name() diff --git a/meilisearch-lib/src/index_controller/snapshot.rs b/meilisearch-lib/src/index_controller/snapshot.rs index 07bf75199..e3fb7e66e 100644 --- a/meilisearch-lib/src/index_controller/snapshot.rs +++ b/meilisearch-lib/src/index_controller/snapshot.rs @@ -8,6 +8,7 @@ use tokio::fs; use tokio::task::spawn_blocking; use tokio::time::sleep; +use crate::analytics; use crate::compression::from_tar_gz; use crate::index_controller::updates::UpdateMsg; @@ -21,6 +22,7 @@ pub struct SnapshotService { update_sender: UpdateSender, snapshot_period: Duration, snapshot_path: PathBuf, + db_path: PathBuf, db_name: String, } @@ -34,6 +36,7 @@ where update_sender: UpdateSender, snapshot_period: Duration, snapshot_path: PathBuf, + db_path: PathBuf, db_name: String, ) -> Self { Self { @@ -41,6 +44,7 @@ where update_sender, snapshot_period, snapshot_path, + db_path, db_name, } } @@ -71,6 +75,8 @@ where .snapshot(temp_snapshot_path.clone()) .await?; + analytics::copy_user_id(&self.db_path, &temp_snapshot_path.clone()); + if indexes.is_empty() { return Ok(()); } @@ -211,6 +217,8 @@ mod test { update_sender, Duration::from_millis(100), snapshot_path.path().to_owned(), + // this should do nothing + snapshot_path.path().to_owned(), "data.ms".to_string(), ); @@ -243,6 +251,8 @@ mod test { update_sender, Duration::from_millis(100), snapshot_path.path().to_owned(), + // this should do nothing + snapshot_path.path().to_owned(), "data.ms".to_string(), ); @@ -292,6 +302,8 @@ mod test { update_sender, Duration::from_millis(100), snapshot_path.path().to_owned(), + // this should do nothing + snapshot_path.path().to_owned(), "data.ms".to_string(), ); From 10de92987a14f4152b1cca9a8a48e6c88a3150cf Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 26 Oct 2021 13:16:05 +0200 Subject: [PATCH 34/68] compile write_user_id only when the analytics are enabled --- meilisearch-http/src/analytics.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/meilisearch-http/src/analytics.rs b/meilisearch-http/src/analytics.rs index 29613cda3..01a993226 100644 --- a/meilisearch-http/src/analytics.rs +++ b/meilisearch-http/src/analytics.rs @@ -32,6 +32,7 @@ fn find_user_id(db_path: &Path) -> Option { .or_else(|| fs::read_to_string(&config_user_id_path(db_path)?).ok()) } +#[cfg(all(not(debug_assertions), feature = "analytics"))] /// Write the user-id in the `data.ms` and in `~/.config/MeiliSearch/path-to-db-user-id`. Ignore the errors. fn write_user_id(db_path: &Path, user_id: &str) { let _ = fs::write(db_path.join("user-id"), user_id.as_bytes()); From 3144b572c425408bf10d58bd0425413ead39918e Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 26 Oct 2021 13:18:24 +0200 Subject: [PATCH 35/68] remove the debug mode in release --- Cargo.toml | 3 --- 1 file changed, 3 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 44732839f..02e9813a4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -6,8 +6,5 @@ members = [ ] resolver = "2" -[profile.release] -debug = true - [patch.crates-io] pest = { git = "https://github.com/pest-parser/pest.git", rev = "51fd1d49f1041f7839975664ef71fe15c7dcaf67" } From 01737ef84771b6e8431ecb36b930dd2657fda1ae Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 26 Oct 2021 13:20:54 +0200 Subject: [PATCH 36/68] remove all the debug prints --- meilisearch-http/src/analytics.rs | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/meilisearch-http/src/analytics.rs b/meilisearch-http/src/analytics.rs index 01a993226..467d04c35 100644 --- a/meilisearch-http/src/analytics.rs +++ b/meilisearch-http/src/analytics.rs @@ -40,7 +40,6 @@ fn write_user_id(db_path: &Path, user_id: &str) { .as_ref() .zip(config_user_id_path(db_path)) { - println!("{}", user_id_path.display()); let _ = fs::create_dir_all(&meilisearch_config_path); let _ = fs::write(user_id_path, user_id.as_bytes()); } @@ -182,7 +181,6 @@ mod segment { if let Ok(stats) = meilisearch.get_all_stats().await { let traits = Self::compute_traits(&self.opt, stats); let user = self.user.clone(); - println!("ANALYTICS: Pushing our identify tick"); let _ = self .batcher .lock() @@ -199,12 +197,10 @@ mod segment { }) .await; } - println!("ANALYTICS: taking the lock on the search batchers"); let get_search = std::mem::take(&mut *self.get_search_batcher.lock().await) .into_event(&self.user, "Document Searched GET"); let post_search = std::mem::take(&mut *self.post_search_batcher.lock().await) .into_event(&self.user, "Document Searched POST"); - println!("ANALYTICS: taking the lock on the documents batchers"); let add_documents = std::mem::take(&mut *self.add_documents_batcher.lock().await) .into_event(&self.user, "Documents Added"); @@ -213,7 +209,6 @@ mod segment { .into_event(&self.user, "Documents Updated"); // keep the lock on the batcher just for these three operations { - println!("ANALYTICS: taking the lock on the batchers"); let mut batcher = self.batcher.lock().await; if let Some(get_search) = get_search { let _ = batcher.push(get_search).await; @@ -227,10 +222,8 @@ mod segment { if let Some(update_documents) = update_documents { let _ = batcher.push(update_documents).await; } - println!("ANALYTICS: Sending the batch"); let _ = batcher.flush().await; } - println!("ANALYTICS: sent the batch"); tokio::time::sleep(Duration::from_secs(60 * 2)).await; // 2 minutes } }); @@ -294,7 +287,6 @@ mod segment { .count() - 1; - println!("Batching a search"); let mut search_batcher = batcher.lock().await; user_agent.into_iter().for_each(|ua| { search_batcher.user_agents.insert(ua); @@ -362,7 +354,6 @@ mod segment { .map(|header| header.to_str().unwrap_or("unknown").to_string()); tokio::spawn(async move { - println!("ANALYTICS pushing {} in the batcher", event_name); let _ = self .batcher .lock() @@ -375,7 +366,6 @@ mod segment { ..Default::default() }) .await; - println!("ANALYTICS {} pushed", event_name); }); } From 9a62ac0c94603d5d93eb8640f738b999e9a92117 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 26 Oct 2021 13:24:53 +0200 Subject: [PATCH 37/68] send the analytics only once every hours --- meilisearch-http/src/analytics.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meilisearch-http/src/analytics.rs b/meilisearch-http/src/analytics.rs index 467d04c35..4f969ea9f 100644 --- a/meilisearch-http/src/analytics.rs +++ b/meilisearch-http/src/analytics.rs @@ -224,7 +224,7 @@ mod segment { } let _ = batcher.flush().await; } - tokio::time::sleep(Duration::from_secs(60 * 2)).await; // 2 minutes + tokio::time::sleep(Duration::from_secs(60 * 60)).await; // one hour } }); } From 5508c6c154bd48b6528c7206448d16f8f33df049 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 26 Oct 2021 13:35:29 +0200 Subject: [PATCH 38/68] a bit of styling --- meilisearch-http/src/analytics.rs | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/meilisearch-http/src/analytics.rs b/meilisearch-http/src/analytics.rs index 4f969ea9f..b75fb7e94 100644 --- a/meilisearch-http/src/analytics.rs +++ b/meilisearch-http/src/analytics.rs @@ -137,10 +137,8 @@ mod segment { } pub async fn new(opt: &Opt, meilisearch: &MeiliSearch) -> &'static Self { - // see if there is already a user-id in the `data.ms` or in `/tmp/path-to-db-user-id` let user_id = super::find_user_id(&opt.db_path); let first_time_run = user_id.is_none(); - // if not, generate a new user-id and save it to the fs let user_id = user_id.unwrap_or_else(|| Uuid::new_v4().to_string()); super::write_user_id(&opt.db_path, &user_id); @@ -168,10 +166,7 @@ mod segment { if first_time_run { segment.publish("Launched".to_string(), json!({}), None); } - - // start the runtime tick segment.tick(meilisearch.clone()); - segment } @@ -179,8 +174,6 @@ mod segment { tokio::spawn(async move { loop { if let Ok(stats) = meilisearch.get_all_stats().await { - let traits = Self::compute_traits(&self.opt, stats); - let user = self.user.clone(); let _ = self .batcher .lock() @@ -191,8 +184,8 @@ mod segment { "version": env!("CARGO_PKG_VERSION").to_string(), }, })), - user, - traits, + user: user.clone(), + traits: Self::compute_traits(&self.opt, stats), ..Default::default() }) .await; @@ -207,7 +200,7 @@ mod segment { let update_documents = std::mem::take(&mut *self.update_documents_batcher.lock().await) .into_event(&self.user, "Documents Updated"); - // keep the lock on the batcher just for these three operations + // keep the lock on the batcher just for these five operations { let mut batcher = self.batcher.lock().await; if let Some(get_search) = get_search { From 6b34318274d122a4da53c0b046546a4deff640f2 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 26 Oct 2021 13:43:49 +0200 Subject: [PATCH 39/68] makes clippy happy --- meilisearch-http/src/analytics.rs | 2 +- meilisearch-http/src/main.rs | 2 +- meilisearch-http/src/routes/indexes/settings.rs | 10 +++++----- meilisearch-http/src/routes/indexes/updates.rs | 2 ++ 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/meilisearch-http/src/analytics.rs b/meilisearch-http/src/analytics.rs index b75fb7e94..a042a5e92 100644 --- a/meilisearch-http/src/analytics.rs +++ b/meilisearch-http/src/analytics.rs @@ -552,7 +552,7 @@ pub struct MockAnalytics { impl MockAnalytics { pub fn new(opt: &Opt) -> &'static Self { - let user = find_user_id(&opt.db_path).unwrap_or(String::new()); + let user = find_user_id(&opt.db_path).unwrap_or_default(); let analytics = Box::new(Self { user }); Box::leak(analytics) } diff --git a/meilisearch-http/src/main.rs b/meilisearch-http/src/main.rs index 60535966d..85166ebe2 100644 --- a/meilisearch-http/src/main.rs +++ b/meilisearch-http/src/main.rs @@ -128,7 +128,7 @@ Anonymous telemetry: \"Enabled\"" } let analytics = analytics.to_string(); - if analytics != "" { + if !analytics.is_empty() { eprintln!("Unique User ID:\t\"{}\"", analytics); } diff --git a/meilisearch-http/src/routes/indexes/settings.rs b/meilisearch-http/src/routes/indexes/settings.rs index f70eb1222..caf78d3fc 100644 --- a/meilisearch-http/src/routes/indexes/settings.rs +++ b/meilisearch-http/src/routes/indexes/settings.rs @@ -102,7 +102,7 @@ make_setting_route!( "total": setting.as_ref().map(|filter| filter.len()), "has_geo": setting.as_ref().map(|filter| filter.contains("_geo")).unwrap_or(false), }), - Some(&req), + Some(req), ); } ); @@ -122,7 +122,7 @@ make_setting_route!( "total": setting.as_ref().map(|sort| sort.len()), "has_geo": setting.as_ref().map(|sort| sort.contains("_geo")).unwrap_or(false), }), - Some(&req), + Some(req), ); } ); @@ -174,9 +174,9 @@ make_setting_route!( analytics.publish( "RankingRules Updated".to_string(), json!({ - "sort_position": setting.as_ref().map(|sort| sort.iter().filter(|s| s.contains(":")).count()), + "sort_position": setting.as_ref().map(|sort| sort.iter().filter(|s| s.contains(':')).count()), }), - Some(&req), + Some(req), ); } ); @@ -218,7 +218,7 @@ pub async fn update_all( "Settings Updated".to_string(), json!({ "ranking_rules": { - "sort_position": settings.ranking_rules.as_ref().set().map(|sort| sort.iter().filter(|s| s.contains(":")).count()), + "sort_position": settings.ranking_rules.as_ref().set().map(|sort| sort.iter().filter(|s| s.contains(':')).count()), }, "sortable_attributes": { "total": settings.sortable_attributes.as_ref().set().map(|sort| sort.len()), diff --git a/meilisearch-http/src/routes/indexes/updates.rs b/meilisearch-http/src/routes/indexes/updates.rs index 2923736b7..dc99ef55a 100644 --- a/meilisearch-http/src/routes/indexes/updates.rs +++ b/meilisearch-http/src/routes/indexes/updates.rs @@ -13,12 +13,14 @@ pub fn configure(cfg: &mut web::ServiceConfig) { .service(web::resource("{update_id}").route(web::get().to(get_update_status))); } +/* #[derive(Debug, Deserialize)] #[serde(rename_all = "camelCase", deny_unknown_fields)] struct UpdateIndexRequest { uid: Option, primary_key: Option, } +*/ #[derive(Debug, Serialize)] #[serde(rename_all = "camelCase")] From 76a4f86e0c07568b9f7575c737934beeaa58ffc7 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 26 Oct 2021 13:57:41 +0200 Subject: [PATCH 40/68] rename user-id to instance-uid --- meilisearch-http/src/analytics.rs | 15 ++++++++++----- meilisearch-lib/src/analytics.rs | 6 +++--- meilisearch-lib/src/index_controller/mod.rs | 2 +- 3 files changed, 14 insertions(+), 9 deletions(-) diff --git a/meilisearch-http/src/analytics.rs b/meilisearch-http/src/analytics.rs index a042a5e92..ddbd842e4 100644 --- a/meilisearch-http/src/analytics.rs +++ b/meilisearch-http/src/analytics.rs @@ -20,22 +20,27 @@ fn config_user_id_path(db_path: &Path) -> Option { db_path .canonicalize() .ok() - .map(|path| path.join("user-id").display().to_string().replace("/", "-")) + .map(|path| { + path.join("instance-uid") + .display() + .to_string() + .replace("/", "-") + }) .zip(MEILISEARCH_CONFIG_PATH.as_ref()) .map(|(filename, config_path)| config_path.join(filename)) } -/// Look for the user-id in the `data.ms` or in `~/.config/MeiliSearch/path-to-db-user-id` +/// Look for the instance-uid in the `data.ms` or in `~/.config/MeiliSearch/path-to-db-instance-uid` fn find_user_id(db_path: &Path) -> Option { - fs::read_to_string(db_path.join("user-id")) + fs::read_to_string(db_path.join("instance-uid")) .ok() .or_else(|| fs::read_to_string(&config_user_id_path(db_path)?).ok()) } #[cfg(all(not(debug_assertions), feature = "analytics"))] -/// Write the user-id in the `data.ms` and in `~/.config/MeiliSearch/path-to-db-user-id`. Ignore the errors. +/// Write the instance-uid in the `data.ms` and in `~/.config/MeiliSearch/path-to-db-instance-uid`. Ignore the errors. fn write_user_id(db_path: &Path, user_id: &str) { - let _ = fs::write(db_path.join("user-id"), user_id.as_bytes()); + let _ = fs::write(db_path.join("instance-uid"), user_id.as_bytes()); if let Some((meilisearch_config_path, user_id_path)) = MEILISEARCH_CONFIG_PATH .as_ref() .zip(config_user_id_path(db_path)) diff --git a/meilisearch-lib/src/analytics.rs b/meilisearch-lib/src/analytics.rs index 9dd8d3219..adfddf998 100644 --- a/meilisearch-lib/src/analytics.rs +++ b/meilisearch-lib/src/analytics.rs @@ -1,8 +1,8 @@ use std::{fs, path::Path}; -/// Copy the `user-id` contained in one db to another. Ignore all errors. +/// Copy the `instance-uid` contained in one db to another. Ignore all errors. pub fn copy_user_id(src: &Path, dst: &Path) { - if let Ok(user_id) = fs::read_to_string(src.join("user-id")) { - let _ = fs::write(dst.join("user-id"), &user_id); + if let Ok(user_id) = fs::read_to_string(src.join("instance-uid")) { + let _ = fs::write(dst.join("instance-uid"), &user_id); } } diff --git a/meilisearch-lib/src/index_controller/mod.rs b/meilisearch-lib/src/index_controller/mod.rs index 631bab9e9..f2571de77 100644 --- a/meilisearch-lib/src/index_controller/mod.rs +++ b/meilisearch-lib/src/index_controller/mod.rs @@ -169,7 +169,7 @@ impl IndexControllerBuilder { let dump_path = self .dump_dst .ok_or_else(|| anyhow::anyhow!("Missing dump directory path"))?; - let analytics_path = db_path.as_ref().join("user-id"); + let analytics_path = db_path.as_ref().join("instance-uid"); let dump_handle = dump_actor::DumpActorHandleImpl::new( dump_path, analytics_path, From ddab9eafa1d40b7b573349a82b42c2b869dfae70 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 26 Oct 2021 16:04:21 +0200 Subject: [PATCH 41/68] fix a typo --- meilisearch-http/src/analytics.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meilisearch-http/src/analytics.rs b/meilisearch-http/src/analytics.rs index ddbd842e4..776329867 100644 --- a/meilisearch-http/src/analytics.rs +++ b/meilisearch-http/src/analytics.rs @@ -189,7 +189,7 @@ mod segment { "version": env!("CARGO_PKG_VERSION").to_string(), }, })), - user: user.clone(), + user: self.user.clone(), traits: Self::compute_traits(&self.opt, stats), ..Default::default() }) From 31c7a0105b7089149940d5f26c274d1e207d3245 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 27 Oct 2021 14:14:05 +0200 Subject: [PATCH 42/68] fix a bug on the batch documents function --- meilisearch-http/src/analytics.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/meilisearch-http/src/analytics.rs b/meilisearch-http/src/analytics.rs index 776329867..3184602d2 100644 --- a/meilisearch-http/src/analytics.rs +++ b/meilisearch-http/src/analytics.rs @@ -338,6 +338,7 @@ mod segment { lock.primary_keys.insert(primary_key); } lock.index_creation |= index_creation; + lock.updated = true; // drop the lock here }); } @@ -522,7 +523,7 @@ mod segment { impl DocumentsBatcher { pub fn into_event(self, user: &User, event_name: &str) -> Option { - if self.updated { + if !self.updated { None } else { let context = Some(json!({ "user-agent": self.user_agents})); From 37ca50832c79055ebe10d155fba58c13c86ebb71 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 27 Oct 2021 14:27:29 +0200 Subject: [PATCH 43/68] fix the sort position --- meilisearch-http/src/routes/indexes/settings.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/meilisearch-http/src/routes/indexes/settings.rs b/meilisearch-http/src/routes/indexes/settings.rs index caf78d3fc..327752da6 100644 --- a/meilisearch-http/src/routes/indexes/settings.rs +++ b/meilisearch-http/src/routes/indexes/settings.rs @@ -172,12 +172,12 @@ make_setting_route!( use serde_json::json; analytics.publish( - "RankingRules Updated".to_string(), - json!({ - "sort_position": setting.as_ref().map(|sort| sort.iter().filter(|s| s.contains(':')).count()), - }), - Some(req), - ); + "RankingRules Updated".to_string(), + json!({ + "sort_position": setting.as_ref().map(|sort| sort.iter().position(|s| s == "sort")), + }), + Some(req), + ); } ); @@ -218,7 +218,7 @@ pub async fn update_all( "Settings Updated".to_string(), json!({ "ranking_rules": { - "sort_position": settings.ranking_rules.as_ref().set().map(|sort| sort.iter().filter(|s| s.contains(':')).count()), + "sort_position": settings.ranking_rules.as_ref().set().map(|sort| sort.iter().position(|s| s == "sort")), }, "sortable_attributes": { "total": settings.sortable_attributes.as_ref().set().map(|sort| sort.len()), From ed750e8792d2ec6f893209d6ef0ab602ddd14c39 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 27 Oct 2021 14:32:15 +0200 Subject: [PATCH 44/68] fix start_since_day --- meilisearch-http/src/analytics.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meilisearch-http/src/analytics.rs b/meilisearch-http/src/analytics.rs index 3184602d2..0c576a5a2 100644 --- a/meilisearch-http/src/analytics.rs +++ b/meilisearch-http/src/analytics.rs @@ -135,7 +135,7 @@ mod segment { "database_size": stats.database_size, "indexes_number": stats.indexes.len(), "documents_number": number_of_documents, - "start_since_days": FIRST_START_TIMESTAMP.elapsed().as_secs() / 60 * 60 * 24, // one day + "start_since_days": FIRST_START_TIMESTAMP.elapsed().as_secs() / (60 * 60 * 24), // one day }, "infos": infos, }) From de35a9a6057d261245eb80ee11bf5df568a9d967 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 27 Oct 2021 16:20:27 +0200 Subject: [PATCH 45/68] use an official release of segment --- Cargo.lock | 3 ++- meilisearch-http/Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2ad7be305..ec7dc6a60 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2584,7 +2584,8 @@ dependencies = [ [[package]] name = "segment" version = "0.1.1" -source = "git+https://github.com/meilisearch/segment#042a8631361f02ba84e8bb06f9120e93bf1922f2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fe736985d17620db13ef750190a3172ff4ab0cec12e4507aa80b329e1a191c3" dependencies = [ "async-trait", "chrono", diff --git a/meilisearch-http/Cargo.toml b/meilisearch-http/Cargo.toml index 41bae43b8..1e2fe9d9b 100644 --- a/meilisearch-http/Cargo.toml +++ b/meilisearch-http/Cargo.toml @@ -56,7 +56,7 @@ rand = "0.8.4" rayon = "1.5.1" regex = "1.5.4" rustls = "0.19.1" -segment = { git = "https://github.com/meilisearch/segment", optional = true } +segment = { version = "0.1.1", optional = true } serde = { version = "1.0.130", features = ["derive"] } serde_json = { version = "1.0.67", features = ["preserve_order"] } sha2 = "0.9.6" From 9abd2aa9d7a920619f57d26ecdc348f5c8627558 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 27 Oct 2021 16:34:35 +0200 Subject: [PATCH 46/68] make the analytics interval a const --- meilisearch-http/src/analytics.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/meilisearch-http/src/analytics.rs b/meilisearch-http/src/analytics.rs index 0c576a5a2..6626c84a5 100644 --- a/meilisearch-http/src/analytics.rs +++ b/meilisearch-http/src/analytics.rs @@ -55,6 +55,7 @@ fn write_user_id(db_path: &Path, user_id: &str) { mod segment { use crate::analytics::Analytics; use crate::routes::indexes::documents::UpdateDocumentsQuery; + use crate::Opt; use actix_web::http::header::USER_AGENT; use actix_web::HttpRequest; use http::header::CONTENT_TYPE; @@ -72,8 +73,6 @@ mod segment { use tokio::sync::Mutex; use uuid::Uuid; - use crate::Opt; - const SEGMENT_API_KEY: &str = "vHi89WrNDckHSQssyUJqLvIyp2QFITSC"; pub fn extract_user_agents(request: &HttpRequest) -> Vec { @@ -222,7 +221,8 @@ mod segment { } let _ = batcher.flush().await; } - tokio::time::sleep(Duration::from_secs(60 * 60)).await; // one hour + const INTERVAL: Duration = Duration::from_secs(60 * 60); // one hour + tokio::time::sleep(INTERVAL).await; } }); } From bba64b32ca7e58c41f70a009f7cbb2435ce612cd Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 27 Oct 2021 16:38:58 +0200 Subject: [PATCH 47/68] async_traits is not needed anymore --- meilisearch-http/src/analytics.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/meilisearch-http/src/analytics.rs b/meilisearch-http/src/analytics.rs index 6626c84a5..1270feb7c 100644 --- a/meilisearch-http/src/analytics.rs +++ b/meilisearch-http/src/analytics.rs @@ -344,7 +344,6 @@ mod segment { } } - #[async_trait::async_trait] impl super::Analytics for SegmentAnalytics { fn publish(&'static self, event_name: String, send: Value, request: Option<&HttpRequest>) { let content_type = request @@ -564,7 +563,6 @@ impl MockAnalytics { } } -#[async_trait::async_trait] impl Analytics for MockAnalytics { // These methods are noop and should be optimized out fn publish(&'static self, _event_name: String, _send: Value, _request: Option<&HttpRequest>) {} @@ -594,7 +592,6 @@ impl Display for MockAnalytics { } } -#[async_trait::async_trait] pub trait Analytics: Display + Sync + Send { /// The method used to publish most analytics that do not need to be batched every hours fn publish(&'static self, event_name: String, send: Value, request: Option<&HttpRequest>); From a1ab02f9fb26a849dedb6e9b29a76da307b22719 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 27 Oct 2021 16:45:14 +0200 Subject: [PATCH 48/68] remove some commented code --- meilisearch-http/src/routes/indexes/updates.rs | 9 --------- 1 file changed, 9 deletions(-) diff --git a/meilisearch-http/src/routes/indexes/updates.rs b/meilisearch-http/src/routes/indexes/updates.rs index dc99ef55a..5902874ac 100644 --- a/meilisearch-http/src/routes/indexes/updates.rs +++ b/meilisearch-http/src/routes/indexes/updates.rs @@ -13,15 +13,6 @@ pub fn configure(cfg: &mut web::ServiceConfig) { .service(web::resource("{update_id}").route(web::get().to(get_update_status))); } -/* -#[derive(Debug, Deserialize)] -#[serde(rename_all = "camelCase", deny_unknown_fields)] -struct UpdateIndexRequest { - uid: Option, - primary_key: Option, -} -*/ - #[derive(Debug, Serialize)] #[serde(rename_all = "camelCase")] pub struct UpdateIndexResponse { From c4737749ab622a441f5d02ef51a6b235344dd063 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 27 Oct 2021 17:01:08 +0200 Subject: [PATCH 49/68] bump segment to be able to display a user --- Cargo.lock | 4 ++-- meilisearch-http/Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ec7dc6a60..4fd864352 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2583,9 +2583,9 @@ dependencies = [ [[package]] name = "segment" -version = "0.1.1" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fe736985d17620db13ef750190a3172ff4ab0cec12e4507aa80b329e1a191c3" +checksum = "9bdcc286fff0e7c5ccd46c06a301c7a8a848b06acedc6983707bd311eb358002" dependencies = [ "async-trait", "chrono", diff --git a/meilisearch-http/Cargo.toml b/meilisearch-http/Cargo.toml index 1e2fe9d9b..6e70c0a1d 100644 --- a/meilisearch-http/Cargo.toml +++ b/meilisearch-http/Cargo.toml @@ -56,7 +56,7 @@ rand = "0.8.4" rayon = "1.5.1" regex = "1.5.4" rustls = "0.19.1" -segment = { version = "0.1.1", optional = true } +segment = { version = "0.1.2", optional = true } serde = { version = "1.0.130", features = ["derive"] } serde_json = { version = "1.0.67", features = ["preserve_order"] } sha2 = "0.9.6" From d8b0d6884083512b2a8226239a10124a2661d3cc Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 27 Oct 2021 17:08:00 +0200 Subject: [PATCH 50/68] use a regex to count the number of filters instead of split + flatten --- meilisearch-http/src/analytics.rs | 30 ++++++++++-------------------- 1 file changed, 10 insertions(+), 20 deletions(-) diff --git a/meilisearch-http/src/analytics.rs b/meilisearch-http/src/analytics.rs index 1270feb7c..f3b0367f7 100644 --- a/meilisearch-http/src/analytics.rs +++ b/meilisearch-http/src/analytics.rs @@ -63,6 +63,7 @@ mod segment { use meilisearch_lib::index_controller::Stats; use meilisearch_lib::MeiliSearch; use once_cell::sync::Lazy; + use regex::Regex; use segment::message::{Identify, Track, User}; use segment::{AutoBatcher, Batcher, HttpClient}; use serde_json::{json, Value}; @@ -82,7 +83,7 @@ mod segment { .map(|header| header.to_str().ok()) .flatten() .unwrap_or("unknown") - .split(";") + .split(';') .map(str::trim) .map(ToString::to_string) .collect() @@ -252,15 +253,17 @@ mod segment { // to avoid blocking the search we are going to do the heavier computation and take the // batcher's mutex in an async task tokio::spawn(async move { + const RE: Lazy = Lazy::new(|| Regex::new("AND | OR").unwrap()); + let filtered = filter.is_some() as usize; let syntax = match filter.as_ref() { Some(Value::String(_)) => "string".to_string(), Some(Value::Array(values)) => { - if values.iter().map(|v| v.to_string()).any(|s| { - s.contains(['=', '<', '>', '!'].as_ref()) - || s.contains("_geoRadius") - || s.contains("TO") - }) { + if values + .iter() + .map(|v| v.to_string()) + .any(|s| RE.is_match(&s)) + { "mixed".to_string() } else { "array".to_string() @@ -270,20 +273,7 @@ mod segment { }; let stringified_filters = filter.map_or(String::new(), |v| v.to_string()); let filter_with_geo_radius = stringified_filters.contains("_geoRadius("); - let filter_number_of_criteria = stringified_filters - .split("!=") - .map(|s| s.split("<=")) - .flatten() - .map(|s| s.split(">=")) - .flatten() - .map(|s| s.split(['=', '<', '>', '!'].as_ref())) - .flatten() - .map(|s| s.split("_geoRadius(")) - .flatten() - .map(|s| s.split("TO")) - .flatten() - .count() - - 1; + let filter_number_of_criteria = RE.split(&stringified_filters).count(); let mut search_batcher = batcher.lock().await; user_agent.into_iter().for_each(|ua| { From b250392e8da758e920ac2213079590a7bb974e4e Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 27 Oct 2021 17:52:53 +0200 Subject: [PATCH 51/68] remove the first - in the path to the db instance in the instance-id --- meilisearch-http/src/analytics.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meilisearch-http/src/analytics.rs b/meilisearch-http/src/analytics.rs index f3b0367f7..175f53c24 100644 --- a/meilisearch-http/src/analytics.rs +++ b/meilisearch-http/src/analytics.rs @@ -27,7 +27,7 @@ fn config_user_id_path(db_path: &Path) -> Option { .replace("/", "-") }) .zip(MEILISEARCH_CONFIG_PATH.as_ref()) - .map(|(filename, config_path)| config_path.join(filename)) + .map(|(filename, config_path)| config_path.join(filename.trim_start_matches('-'))) } /// Look for the instance-uid in the `data.ms` or in `~/.config/MeiliSearch/path-to-db-instance-uid` From 72e3adc55e76cac9e72f1ebaba1ecd3c685f15b0 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 27 Oct 2021 17:55:38 +0200 Subject: [PATCH 52/68] display an instance-id instead of a user-id --- meilisearch-http/src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meilisearch-http/src/main.rs b/meilisearch-http/src/main.rs index 85166ebe2..c6b8be58d 100644 --- a/meilisearch-http/src/main.rs +++ b/meilisearch-http/src/main.rs @@ -129,7 +129,7 @@ Anonymous telemetry: \"Enabled\"" let analytics = analytics.to_string(); if !analytics.is_empty() { - eprintln!("Unique User ID:\t\"{}\"", analytics); + eprintln!("Instance UID:\t\"{}\"", analytics); } eprintln!(); From ea5ae2bae5e33a0555a01e9ece918c2ae9c35253 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 27 Oct 2021 17:57:19 +0200 Subject: [PATCH 53/68] sort the imports --- meilisearch-http/src/analytics.rs | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/meilisearch-http/src/analytics.rs b/meilisearch-http/src/analytics.rs index 175f53c24..9e316f23d 100644 --- a/meilisearch-http/src/analytics.rs +++ b/meilisearch-http/src/analytics.rs @@ -1,13 +1,15 @@ -use crate::routes::indexes::documents::UpdateDocumentsQuery; -use crate::Opt; +use std::fmt::Display; +use std::fs; +use std::path::{Path, PathBuf}; + use actix_web::HttpRequest; use meilisearch_lib::index::SearchQuery; use once_cell::sync::Lazy; use platform_dirs::AppDirs; use serde_json::Value; -use std::fmt::Display; -use std::fs; -use std::path::{Path, PathBuf}; + +use crate::routes::indexes::documents::UpdateDocumentsQuery; +use crate::Opt; /// The MeiliSearch config dir: /// `~/.config/MeiliSearch` on *NIX or *BSD. @@ -53,9 +55,10 @@ fn write_user_id(db_path: &Path, user_id: &str) { // if we are in release mode and the feature analytics was enabled #[cfg(all(not(debug_assertions), feature = "analytics"))] mod segment { - use crate::analytics::Analytics; - use crate::routes::indexes::documents::UpdateDocumentsQuery; - use crate::Opt; + use std::collections::{HashMap, HashSet}; + use std::fmt::Display; + use std::time::{Duration, Instant}; + use actix_web::http::header::USER_AGENT; use actix_web::HttpRequest; use http::header::CONTENT_TYPE; @@ -67,13 +70,14 @@ mod segment { use segment::message::{Identify, Track, User}; use segment::{AutoBatcher, Batcher, HttpClient}; use serde_json::{json, Value}; - use std::collections::{HashMap, HashSet}; - use std::fmt::Display; - use std::time::{Duration, Instant}; use sysinfo::{DiskExt, System, SystemExt}; use tokio::sync::Mutex; use uuid::Uuid; + use crate::analytics::Analytics; + use crate::routes::indexes::documents::UpdateDocumentsQuery; + use crate::Opt; + const SEGMENT_API_KEY: &str = "vHi89WrNDckHSQssyUJqLvIyp2QFITSC"; pub fn extract_user_agents(request: &HttpRequest) -> Vec { From 3ad8311bdd50f13483ceafe80cd3737149328661 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 27 Oct 2021 18:16:13 +0200 Subject: [PATCH 54/68] split the analytics in a module --- meilisearch-http/src/analytics.rs | 617 ------------------ .../src/analytics/mock_analytics.rs | 50 ++ meilisearch-http/src/analytics/mod.rs | 86 +++ .../src/analytics/segment_analytics.rs | 497 ++++++++++++++ 4 files changed, 633 insertions(+), 617 deletions(-) delete mode 100644 meilisearch-http/src/analytics.rs create mode 100644 meilisearch-http/src/analytics/mock_analytics.rs create mode 100644 meilisearch-http/src/analytics/mod.rs create mode 100644 meilisearch-http/src/analytics/segment_analytics.rs diff --git a/meilisearch-http/src/analytics.rs b/meilisearch-http/src/analytics.rs deleted file mode 100644 index 9e316f23d..000000000 --- a/meilisearch-http/src/analytics.rs +++ /dev/null @@ -1,617 +0,0 @@ -use std::fmt::Display; -use std::fs; -use std::path::{Path, PathBuf}; - -use actix_web::HttpRequest; -use meilisearch_lib::index::SearchQuery; -use once_cell::sync::Lazy; -use platform_dirs::AppDirs; -use serde_json::Value; - -use crate::routes::indexes::documents::UpdateDocumentsQuery; -use crate::Opt; - -/// The MeiliSearch config dir: -/// `~/.config/MeiliSearch` on *NIX or *BSD. -/// `~/Library/ApplicationSupport` on macOS. -/// `%APPDATA` (= `C:\Users%USERNAME%\AppData\Roaming`) on windows. -static MEILISEARCH_CONFIG_PATH: Lazy> = - Lazy::new(|| AppDirs::new(Some("MeiliSearch"), false).map(|appdir| appdir.config_dir)); - -fn config_user_id_path(db_path: &Path) -> Option { - db_path - .canonicalize() - .ok() - .map(|path| { - path.join("instance-uid") - .display() - .to_string() - .replace("/", "-") - }) - .zip(MEILISEARCH_CONFIG_PATH.as_ref()) - .map(|(filename, config_path)| config_path.join(filename.trim_start_matches('-'))) -} - -/// Look for the instance-uid in the `data.ms` or in `~/.config/MeiliSearch/path-to-db-instance-uid` -fn find_user_id(db_path: &Path) -> Option { - fs::read_to_string(db_path.join("instance-uid")) - .ok() - .or_else(|| fs::read_to_string(&config_user_id_path(db_path)?).ok()) -} - -#[cfg(all(not(debug_assertions), feature = "analytics"))] -/// Write the instance-uid in the `data.ms` and in `~/.config/MeiliSearch/path-to-db-instance-uid`. Ignore the errors. -fn write_user_id(db_path: &Path, user_id: &str) { - let _ = fs::write(db_path.join("instance-uid"), user_id.as_bytes()); - if let Some((meilisearch_config_path, user_id_path)) = MEILISEARCH_CONFIG_PATH - .as_ref() - .zip(config_user_id_path(db_path)) - { - let _ = fs::create_dir_all(&meilisearch_config_path); - let _ = fs::write(user_id_path, user_id.as_bytes()); - } -} - -// if we are in release mode and the feature analytics was enabled -#[cfg(all(not(debug_assertions), feature = "analytics"))] -mod segment { - use std::collections::{HashMap, HashSet}; - use std::fmt::Display; - use std::time::{Duration, Instant}; - - use actix_web::http::header::USER_AGENT; - use actix_web::HttpRequest; - use http::header::CONTENT_TYPE; - use meilisearch_lib::index::SearchQuery; - use meilisearch_lib::index_controller::Stats; - use meilisearch_lib::MeiliSearch; - use once_cell::sync::Lazy; - use regex::Regex; - use segment::message::{Identify, Track, User}; - use segment::{AutoBatcher, Batcher, HttpClient}; - use serde_json::{json, Value}; - use sysinfo::{DiskExt, System, SystemExt}; - use tokio::sync::Mutex; - use uuid::Uuid; - - use crate::analytics::Analytics; - use crate::routes::indexes::documents::UpdateDocumentsQuery; - use crate::Opt; - - const SEGMENT_API_KEY: &str = "vHi89WrNDckHSQssyUJqLvIyp2QFITSC"; - - pub fn extract_user_agents(request: &HttpRequest) -> Vec { - request - .headers() - .get(USER_AGENT) - .map(|header| header.to_str().ok()) - .flatten() - .unwrap_or("unknown") - .split(';') - .map(str::trim) - .map(ToString::to_string) - .collect() - } - - pub struct SegmentAnalytics { - user: User, - opt: Opt, - batcher: Mutex, - post_search_batcher: Mutex, - get_search_batcher: Mutex, - add_documents_batcher: Mutex, - update_documents_batcher: Mutex, - } - - impl SegmentAnalytics { - fn compute_traits(opt: &Opt, stats: Stats) -> Value { - static FIRST_START_TIMESTAMP: Lazy = Lazy::new(Instant::now); - const SYSTEM: Lazy = Lazy::new(|| { - let mut sys = System::new_all(); - sys.refresh_all(); - let kernel_version = sys - .kernel_version() - .map(|k| k.split_once("-").map(|(k, _)| k.to_string())) - .flatten(); - json!({ - "distribution": sys.name(), - "kernel_version": kernel_version, - "cores": sys.processors().len(), - "ram_size": sys.total_memory(), - "disk_size": sys.disks().iter().map(|disk| disk.available_space()).max(), - "server_provider": std::env::var("MEILI_SERVER_PROVIDER").ok(), - }) - }); - let infos = json!({ - "env": opt.env.clone(), - "has_snapshot": opt.schedule_snapshot, - }); - - let number_of_documents = stats - .indexes - .values() - .map(|index| index.number_of_documents) - .collect::>(); - - json!({ - "system": *SYSTEM, - "stats": { - "database_size": stats.database_size, - "indexes_number": stats.indexes.len(), - "documents_number": number_of_documents, - "start_since_days": FIRST_START_TIMESTAMP.elapsed().as_secs() / (60 * 60 * 24), // one day - }, - "infos": infos, - }) - } - - pub async fn new(opt: &Opt, meilisearch: &MeiliSearch) -> &'static Self { - let user_id = super::find_user_id(&opt.db_path); - let first_time_run = user_id.is_none(); - let user_id = user_id.unwrap_or_else(|| Uuid::new_v4().to_string()); - super::write_user_id(&opt.db_path, &user_id); - - let client = HttpClient::default(); - let user = User::UserId { - user_id: user_id.clone(), - }; - let batcher = Mutex::new(AutoBatcher::new( - client, - Batcher::new(None), - SEGMENT_API_KEY.to_string(), - )); - let segment = Box::new(Self { - user, - opt: opt.clone(), - batcher, - post_search_batcher: Mutex::new(SearchBatcher::default()), - get_search_batcher: Mutex::new(SearchBatcher::default()), - add_documents_batcher: Mutex::new(DocumentsBatcher::default()), - update_documents_batcher: Mutex::new(DocumentsBatcher::default()), - }); - let segment = Box::leak(segment); - - // batch the launched for the first time track event - if first_time_run { - segment.publish("Launched".to_string(), json!({}), None); - } - segment.tick(meilisearch.clone()); - segment - } - - fn tick(&'static self, meilisearch: MeiliSearch) { - tokio::spawn(async move { - loop { - if let Ok(stats) = meilisearch.get_all_stats().await { - let _ = self - .batcher - .lock() - .await - .push(Identify { - context: Some(json!({ - "app": { - "version": env!("CARGO_PKG_VERSION").to_string(), - }, - })), - user: self.user.clone(), - traits: Self::compute_traits(&self.opt, stats), - ..Default::default() - }) - .await; - } - let get_search = std::mem::take(&mut *self.get_search_batcher.lock().await) - .into_event(&self.user, "Document Searched GET"); - let post_search = std::mem::take(&mut *self.post_search_batcher.lock().await) - .into_event(&self.user, "Document Searched POST"); - let add_documents = - std::mem::take(&mut *self.add_documents_batcher.lock().await) - .into_event(&self.user, "Documents Added"); - let update_documents = - std::mem::take(&mut *self.update_documents_batcher.lock().await) - .into_event(&self.user, "Documents Updated"); - // keep the lock on the batcher just for these five operations - { - let mut batcher = self.batcher.lock().await; - if let Some(get_search) = get_search { - let _ = batcher.push(get_search).await; - } - if let Some(post_search) = post_search { - let _ = batcher.push(post_search).await; - } - if let Some(add_documents) = add_documents { - let _ = batcher.push(add_documents).await; - } - if let Some(update_documents) = update_documents { - let _ = batcher.push(update_documents).await; - } - let _ = batcher.flush().await; - } - const INTERVAL: Duration = Duration::from_secs(60 * 60); // one hour - tokio::time::sleep(INTERVAL).await; - } - }); - } - - fn start_search( - &'static self, - batcher: &'static Mutex, - query: &SearchQuery, - request: &HttpRequest, - ) { - let user_agent = extract_user_agents(request); - let sorted = query.sort.is_some() as usize; - let sort_with_geo_point = query - .sort - .as_ref() - .map_or(false, |s| s.iter().any(|s| s.contains("_geoPoint("))); - let sort_criteria_terms = query.sort.as_ref().map_or(0, |s| s.len()); - - // since there is quite a bit of computation made on the filter we are going to do that in the async task - let filter = query.filter.clone(); - let queried = query.q.is_some(); - let nb_terms = query.q.as_ref().map_or(0, |s| s.split_whitespace().count()); - - let max_limit = query.limit; - let max_offset = query.offset.unwrap_or_default(); - - // to avoid blocking the search we are going to do the heavier computation and take the - // batcher's mutex in an async task - tokio::spawn(async move { - const RE: Lazy = Lazy::new(|| Regex::new("AND | OR").unwrap()); - - let filtered = filter.is_some() as usize; - let syntax = match filter.as_ref() { - Some(Value::String(_)) => "string".to_string(), - Some(Value::Array(values)) => { - if values - .iter() - .map(|v| v.to_string()) - .any(|s| RE.is_match(&s)) - { - "mixed".to_string() - } else { - "array".to_string() - } - } - _ => "none".to_string(), - }; - let stringified_filters = filter.map_or(String::new(), |v| v.to_string()); - let filter_with_geo_radius = stringified_filters.contains("_geoRadius("); - let filter_number_of_criteria = RE.split(&stringified_filters).count(); - - let mut search_batcher = batcher.lock().await; - user_agent.into_iter().for_each(|ua| { - search_batcher.user_agents.insert(ua); - }); - search_batcher.total_received += 1; - - // sort - search_batcher.sort_with_geo_point |= sort_with_geo_point; - search_batcher.sort_sum_of_criteria_terms += sort_criteria_terms; - search_batcher.sort_total_number_of_criteria += sorted; - - // filter - search_batcher.filter_with_geo_radius |= filter_with_geo_radius; - search_batcher.filter_sum_of_criteria_terms += filter_number_of_criteria; - search_batcher.filter_total_number_of_criteria += filtered as usize; - *search_batcher.used_syntax.entry(syntax).or_insert(0) += 1; - - // q - search_batcher.sum_of_terms_count += nb_terms; - search_batcher.total_number_of_q += queried as usize; - - // pagination - search_batcher.max_limit = search_batcher.max_limit.max(max_limit); - search_batcher.max_offset = search_batcher.max_offset.max(max_offset); - }); - } - - fn batch_documents( - &'static self, - batcher: &'static Mutex, - documents_query: &UpdateDocumentsQuery, - index_creation: bool, - request: &HttpRequest, - ) { - let user_agents = extract_user_agents(request); - let primary_key = documents_query.primary_key.clone(); - let content_type = request - .headers() - .get(CONTENT_TYPE) - .map(|s| s.to_str().unwrap_or("unkown")) - .unwrap() - .to_string(); - - tokio::spawn(async move { - let mut lock = batcher.lock().await; - for user_agent in user_agents { - lock.user_agents.insert(user_agent); - } - lock.content_types.insert(content_type); - if let Some(primary_key) = primary_key { - lock.primary_keys.insert(primary_key); - } - lock.index_creation |= index_creation; - lock.updated = true; - // drop the lock here - }); - } - } - - impl super::Analytics for SegmentAnalytics { - fn publish(&'static self, event_name: String, send: Value, request: Option<&HttpRequest>) { - let content_type = request - .map(|req| req.headers().get(USER_AGENT)) - .flatten() - .map(|header| header.to_str().unwrap_or("unknown").to_string()); - - tokio::spawn(async move { - let _ = self - .batcher - .lock() - .await - .push(Track { - user: self.user.clone(), - event: event_name.clone(), - context: content_type.map(|user_agent| json!({ "user-agent": user_agent.split(";").map(str::trim).collect::>() })), - properties: send, - ..Default::default() - }) - .await; - }); - } - - fn start_get_search(&'static self, query: &SearchQuery, request: &HttpRequest) { - self.start_search(&self.get_search_batcher, query, request) - } - - fn end_get_search(&'static self, process_time: usize) { - tokio::spawn(async move { - let mut search_batcher = self.get_search_batcher.lock().await; - search_batcher.total_succeeded += 1; - search_batcher.time_spent.push(process_time); - }); - } - - fn start_post_search(&'static self, query: &SearchQuery, request: &HttpRequest) { - self.start_search(&self.post_search_batcher, query, request) - } - - fn end_post_search(&'static self, process_time: usize) { - tokio::spawn(async move { - let mut search_batcher = self.post_search_batcher.lock().await; - search_batcher.total_succeeded += 1; - search_batcher.time_spent.push(process_time); - }); - } - - fn add_documents( - &'static self, - documents_query: &UpdateDocumentsQuery, - index_creation: bool, - request: &HttpRequest, - ) { - self.batch_documents( - &self.add_documents_batcher, - documents_query, - index_creation, - request, - ) - } - - fn update_documents( - &'static self, - documents_query: &UpdateDocumentsQuery, - index_creation: bool, - request: &HttpRequest, - ) { - self.batch_documents( - &self.update_documents_batcher, - documents_query, - index_creation, - request, - ) - } - } - - impl Display for SegmentAnalytics { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.user) - } - } - - #[derive(Default)] - pub struct SearchBatcher { - // context - user_agents: HashSet, - - // requests - total_received: usize, - total_succeeded: usize, - time_spent: Vec, - - // sort - sort_with_geo_point: bool, - // everytime a request has a filter, this field must be incremented by the number of terms it contains - sort_sum_of_criteria_terms: usize, - // everytime a request has a filter, this field must be incremented by one - sort_total_number_of_criteria: usize, - - // filter - filter_with_geo_radius: bool, - // everytime a request has a filter, this field must be incremented by the number of terms it contains - filter_sum_of_criteria_terms: usize, - // everytime a request has a filter, this field must be incremented by one - filter_total_number_of_criteria: usize, - used_syntax: HashMap, - - // q - // everytime a request has a q field, this field must be incremented by the number of terms - sum_of_terms_count: usize, - // everytime a request has a q field, this field must be incremented by one - total_number_of_q: usize, - - // pagination - max_limit: usize, - max_offset: usize, - } - - impl SearchBatcher { - pub fn into_event(mut self, user: &User, event_name: &str) -> Option { - if self.total_received == 0 { - None - } else { - let context = Some(json!({ "user-agent": self.user_agents})); - let percentile_99th = 0.99 * (self.total_succeeded as f64 - 1.) + 1.; - self.time_spent.drain(percentile_99th as usize..); - - let properties = json!({ - "requests": { - "99th_response_time": format!("{:.2}", self.time_spent.iter().sum::() as f64 / self.time_spent.len() as f64), - "total_succeeded": self.total_succeeded, - "total_failed": self.total_received.saturating_sub(self.total_succeeded), // just to be sure we never panics - "total_received": self.total_received, - }, - "sort": { - "with_geoPoint": self.sort_with_geo_point, - "avg_criteria_number": format!("{:.2}", self.sort_sum_of_criteria_terms as f64 / self.sort_total_number_of_criteria as f64), - }, - "filter": { - "with_geoRadius": self.filter_with_geo_radius, - "avg_criteria_number": format!("{:.2}", self.filter_sum_of_criteria_terms as f64 / self.filter_total_number_of_criteria as f64), - "most_used_syntax": self.used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)), - }, - "q": { - "avg_terms_number": format!("{:.2}", self.sum_of_terms_count as f64 / self.total_number_of_q as f64), - }, - "pagination": { - "max_limit": self.max_limit, - "max_offset": self.max_offset, - }, - }); - - Some(Track { - user: user.clone(), - event: event_name.to_string(), - context, - properties, - ..Default::default() - }) - } - } - } - - #[derive(Default)] - pub struct DocumentsBatcher { - // set to true when at least one request was received - updated: bool, - - // context - user_agents: HashSet, - - content_types: HashSet, - primary_keys: HashSet, - index_creation: bool, - } - - impl DocumentsBatcher { - pub fn into_event(self, user: &User, event_name: &str) -> Option { - if !self.updated { - None - } else { - let context = Some(json!({ "user-agent": self.user_agents})); - - let properties = json!({ - "payload_type": self.content_types, - "primary_key": self.primary_keys, - "index_creation": self.index_creation, - }); - - Some(Track { - user: user.clone(), - event: event_name.to_string(), - context, - properties, - ..Default::default() - }) - } - } - } -} - -// if we are in debug mode OR the analytics feature is disabled -#[cfg(any(debug_assertions, not(feature = "analytics")))] -pub type SegmentAnalytics = MockAnalytics; -#[cfg(all(not(debug_assertions), feature = "analytics"))] -pub type SegmentAnalytics = segment::SegmentAnalytics; - -pub struct MockAnalytics { - user: String, -} - -impl MockAnalytics { - pub fn new(opt: &Opt) -> &'static Self { - let user = find_user_id(&opt.db_path).unwrap_or_default(); - let analytics = Box::new(Self { user }); - Box::leak(analytics) - } -} - -impl Analytics for MockAnalytics { - // These methods are noop and should be optimized out - fn publish(&'static self, _event_name: String, _send: Value, _request: Option<&HttpRequest>) {} - fn start_get_search(&'static self, _query: &SearchQuery, _request: &HttpRequest) {} - fn end_get_search(&'static self, _process_time: usize) {} - fn start_post_search(&'static self, _query: &SearchQuery, _request: &HttpRequest) {} - fn end_post_search(&'static self, _process_time: usize) {} - fn add_documents( - &'static self, - _documents_query: &UpdateDocumentsQuery, - _index_creation: bool, - _request: &HttpRequest, - ) { - } - fn update_documents( - &'static self, - _documents_query: &UpdateDocumentsQuery, - _index_creation: bool, - _request: &HttpRequest, - ) { - } -} - -impl Display for MockAnalytics { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.user) - } -} - -pub trait Analytics: Display + Sync + Send { - /// The method used to publish most analytics that do not need to be batched every hours - fn publish(&'static self, event_name: String, send: Value, request: Option<&HttpRequest>); - - /// This method should be called to batch a get search request - fn start_get_search(&'static self, query: &SearchQuery, request: &HttpRequest); - /// This method should be called once a get search request has succeeded - fn end_get_search(&'static self, process_time: usize); - - /// This method should be called to batch a get search request - fn start_post_search(&'static self, query: &SearchQuery, request: &HttpRequest); - /// This method should be called once a post search request has succeeded - fn end_post_search(&'static self, process_time: usize); - - // this method should be called to batch a add documents request - fn add_documents( - &'static self, - documents_query: &UpdateDocumentsQuery, - index_creation: bool, - request: &HttpRequest, - ); - // this method should be called to batch a update documents request - fn update_documents( - &'static self, - documents_query: &UpdateDocumentsQuery, - index_creation: bool, - request: &HttpRequest, - ); -} diff --git a/meilisearch-http/src/analytics/mock_analytics.rs b/meilisearch-http/src/analytics/mock_analytics.rs new file mode 100644 index 000000000..64cd5673f --- /dev/null +++ b/meilisearch-http/src/analytics/mock_analytics.rs @@ -0,0 +1,50 @@ +use std::fmt::Display; + +use actix_web::HttpRequest; +use meilisearch_lib::index::SearchQuery; +use serde_json::Value; + +use crate::{routes::indexes::documents::UpdateDocumentsQuery, Opt}; + +use super::{find_user_id, Analytics}; + +pub struct MockAnalytics { + user: String, +} + +impl MockAnalytics { + pub fn new(opt: &Opt) -> &'static Self { + let user = find_user_id(&opt.db_path).unwrap_or_default(); + let analytics = Box::new(Self { user }); + Box::leak(analytics) + } +} + +impl Analytics for MockAnalytics { + // These methods are noop and should be optimized out + fn publish(&'static self, _event_name: String, _send: Value, _request: Option<&HttpRequest>) {} + fn start_get_search(&'static self, _query: &SearchQuery, _request: &HttpRequest) {} + fn end_get_search(&'static self, _process_time: usize) {} + fn start_post_search(&'static self, _query: &SearchQuery, _request: &HttpRequest) {} + fn end_post_search(&'static self, _process_time: usize) {} + fn add_documents( + &'static self, + _documents_query: &UpdateDocumentsQuery, + _index_creation: bool, + _request: &HttpRequest, + ) { + } + fn update_documents( + &'static self, + _documents_query: &UpdateDocumentsQuery, + _index_creation: bool, + _request: &HttpRequest, + ) { + } +} + +impl Display for MockAnalytics { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.user) + } +} diff --git a/meilisearch-http/src/analytics/mod.rs b/meilisearch-http/src/analytics/mod.rs new file mode 100644 index 000000000..48ba77ddb --- /dev/null +++ b/meilisearch-http/src/analytics/mod.rs @@ -0,0 +1,86 @@ +mod mock_analytics; +// if we are in release mode and the feature analytics was enabled +#[cfg(all(not(debug_assertions), feature = "analytics"))] +mod segment_analytics; + +use std::fmt::Display; +use std::fs; +use std::path::{Path, PathBuf}; + +use actix_web::HttpRequest; +use meilisearch_lib::index::SearchQuery; +use once_cell::sync::Lazy; +use platform_dirs::AppDirs; +use serde_json::Value; + +use crate::routes::indexes::documents::UpdateDocumentsQuery; + +pub use mock_analytics::MockAnalytics; + +// if we are in debug mode OR the analytics feature is disabled +// the `SegmentAnalytics` point to the mock instead of the real analytics +#[cfg(any(debug_assertions, not(feature = "analytics")))] +pub type SegmentAnalytics = MockAnalytics; + +// if we are in release mode and the feature analytics was enabled +// we use the real analytics +#[cfg(all(not(debug_assertions), feature = "analytics"))] +pub type SegmentAnalytics = segment_analytics::SegmentAnalytics; + +/// The MeiliSearch config dir: +/// `~/.config/MeiliSearch` on *NIX or *BSD. +/// `~/Library/ApplicationSupport` on macOS. +/// `%APPDATA` (= `C:\Users%USERNAME%\AppData\Roaming`) on windows. +static MEILISEARCH_CONFIG_PATH: Lazy> = + Lazy::new(|| AppDirs::new(Some("MeiliSearch"), false).map(|appdir| appdir.config_dir)); + +fn config_user_id_path(db_path: &Path) -> Option { + db_path + .canonicalize() + .ok() + .map(|path| { + path.join("instance-uid") + .display() + .to_string() + .replace("/", "-") + }) + .zip(MEILISEARCH_CONFIG_PATH.as_ref()) + .map(|(filename, config_path)| config_path.join(filename.trim_start_matches('-'))) +} + +/// Look for the instance-uid in the `data.ms` or in `~/.config/MeiliSearch/path-to-db-instance-uid` +fn find_user_id(db_path: &Path) -> Option { + fs::read_to_string(db_path.join("instance-uid")) + .ok() + .or_else(|| fs::read_to_string(&config_user_id_path(db_path)?).ok()) +} + +pub trait Analytics: Display + Sync + Send { + /// The method used to publish most analytics that do not need to be batched every hours + fn publish(&'static self, event_name: String, send: Value, request: Option<&HttpRequest>); + + /// This method should be called to batch a get search request + fn start_get_search(&'static self, query: &SearchQuery, request: &HttpRequest); + /// This method should be called once a get search request has succeeded + fn end_get_search(&'static self, process_time: usize); + + /// This method should be called to batch a get search request + fn start_post_search(&'static self, query: &SearchQuery, request: &HttpRequest); + /// This method should be called once a post search request has succeeded + fn end_post_search(&'static self, process_time: usize); + + // this method should be called to batch a add documents request + fn add_documents( + &'static self, + documents_query: &UpdateDocumentsQuery, + index_creation: bool, + request: &HttpRequest, + ); + // this method should be called to batch a update documents request + fn update_documents( + &'static self, + documents_query: &UpdateDocumentsQuery, + index_creation: bool, + request: &HttpRequest, + ); +} diff --git a/meilisearch-http/src/analytics/segment_analytics.rs b/meilisearch-http/src/analytics/segment_analytics.rs new file mode 100644 index 000000000..065fff175 --- /dev/null +++ b/meilisearch-http/src/analytics/segment_analytics.rs @@ -0,0 +1,497 @@ +use std::collections::{HashMap, HashSet}; +use std::fmt::Display; +use std::fs; +use std::path::Path; +use std::time::{Duration, Instant}; + +use actix_web::http::header::USER_AGENT; +use actix_web::HttpRequest; +use http::header::CONTENT_TYPE; +use meilisearch_lib::index::SearchQuery; +use meilisearch_lib::index_controller::Stats; +use meilisearch_lib::MeiliSearch; +use once_cell::sync::Lazy; +use regex::Regex; +use segment::message::{Identify, Track, User}; +use segment::{AutoBatcher, Batcher, HttpClient}; +use serde_json::{json, Value}; +use sysinfo::{DiskExt, System, SystemExt}; +use tokio::sync::Mutex; +use uuid::Uuid; + +use crate::analytics::Analytics; +use crate::routes::indexes::documents::UpdateDocumentsQuery; +use crate::Opt; + +use super::{config_user_id_path, MEILISEARCH_CONFIG_PATH}; + +/// Write the instance-uid in the `data.ms` and in `~/.config/MeiliSearch/path-to-db-instance-uid`. Ignore the errors. +fn write_user_id(db_path: &Path, user_id: &str) { + let _ = fs::write(db_path.join("instance-uid"), user_id.as_bytes()); + if let Some((meilisearch_config_path, user_id_path)) = MEILISEARCH_CONFIG_PATH + .as_ref() + .zip(config_user_id_path(db_path)) + { + let _ = fs::create_dir_all(&meilisearch_config_path); + let _ = fs::write(user_id_path, user_id.as_bytes()); + } +} + +const SEGMENT_API_KEY: &str = "vHi89WrNDckHSQssyUJqLvIyp2QFITSC"; + +pub fn extract_user_agents(request: &HttpRequest) -> Vec { + request + .headers() + .get(USER_AGENT) + .map(|header| header.to_str().ok()) + .flatten() + .unwrap_or("unknown") + .split(';') + .map(str::trim) + .map(ToString::to_string) + .collect() +} + +pub struct SegmentAnalytics { + user: User, + opt: Opt, + batcher: Mutex, + post_search_batcher: Mutex, + get_search_batcher: Mutex, + add_documents_batcher: Mutex, + update_documents_batcher: Mutex, +} + +impl SegmentAnalytics { + fn compute_traits(opt: &Opt, stats: Stats) -> Value { + static FIRST_START_TIMESTAMP: Lazy = Lazy::new(Instant::now); + const SYSTEM: Lazy = Lazy::new(|| { + let mut sys = System::new_all(); + sys.refresh_all(); + let kernel_version = sys + .kernel_version() + .map(|k| k.split_once("-").map(|(k, _)| k.to_string())) + .flatten(); + json!({ + "distribution": sys.name(), + "kernel_version": kernel_version, + "cores": sys.processors().len(), + "ram_size": sys.total_memory(), + "disk_size": sys.disks().iter().map(|disk| disk.available_space()).max(), + "server_provider": std::env::var("MEILI_SERVER_PROVIDER").ok(), + }) + }); + let infos = json!({ + "env": opt.env.clone(), + "has_snapshot": opt.schedule_snapshot, + }); + + let number_of_documents = stats + .indexes + .values() + .map(|index| index.number_of_documents) + .collect::>(); + + json!({ + "system": *SYSTEM, + "stats": { + "database_size": stats.database_size, + "indexes_number": stats.indexes.len(), + "documents_number": number_of_documents, + "start_since_days": FIRST_START_TIMESTAMP.elapsed().as_secs() / (60 * 60 * 24), // one day + }, + "infos": infos, + }) + } + + pub async fn new(opt: &Opt, meilisearch: &MeiliSearch) -> &'static Self { + let user_id = super::find_user_id(&opt.db_path); + let first_time_run = user_id.is_none(); + let user_id = user_id.unwrap_or_else(|| Uuid::new_v4().to_string()); + write_user_id(&opt.db_path, &user_id); + + let client = HttpClient::default(); + let user = User::UserId { + user_id: user_id.clone(), + }; + let batcher = Mutex::new(AutoBatcher::new( + client, + Batcher::new(None), + SEGMENT_API_KEY.to_string(), + )); + let segment = Box::new(Self { + user, + opt: opt.clone(), + batcher, + post_search_batcher: Mutex::new(SearchBatcher::default()), + get_search_batcher: Mutex::new(SearchBatcher::default()), + add_documents_batcher: Mutex::new(DocumentsBatcher::default()), + update_documents_batcher: Mutex::new(DocumentsBatcher::default()), + }); + let segment = Box::leak(segment); + + // batch the launched for the first time track event + if first_time_run { + segment.publish("Launched".to_string(), json!({}), None); + } + segment.tick(meilisearch.clone()); + segment + } + + fn tick(&'static self, meilisearch: MeiliSearch) { + tokio::spawn(async move { + loop { + if let Ok(stats) = meilisearch.get_all_stats().await { + let _ = self + .batcher + .lock() + .await + .push(Identify { + context: Some(json!({ + "app": { + "version": env!("CARGO_PKG_VERSION").to_string(), + }, + })), + user: self.user.clone(), + traits: Self::compute_traits(&self.opt, stats), + ..Default::default() + }) + .await; + } + let get_search = std::mem::take(&mut *self.get_search_batcher.lock().await) + .into_event(&self.user, "Document Searched GET"); + let post_search = std::mem::take(&mut *self.post_search_batcher.lock().await) + .into_event(&self.user, "Document Searched POST"); + let add_documents = std::mem::take(&mut *self.add_documents_batcher.lock().await) + .into_event(&self.user, "Documents Added"); + let update_documents = + std::mem::take(&mut *self.update_documents_batcher.lock().await) + .into_event(&self.user, "Documents Updated"); + // keep the lock on the batcher just for these five operations + { + let mut batcher = self.batcher.lock().await; + if let Some(get_search) = get_search { + let _ = batcher.push(get_search).await; + } + if let Some(post_search) = post_search { + let _ = batcher.push(post_search).await; + } + if let Some(add_documents) = add_documents { + let _ = batcher.push(add_documents).await; + } + if let Some(update_documents) = update_documents { + let _ = batcher.push(update_documents).await; + } + let _ = batcher.flush().await; + } + const INTERVAL: Duration = Duration::from_secs(60 * 60); // one hour + tokio::time::sleep(INTERVAL).await; + } + }); + } + + fn start_search( + &'static self, + batcher: &'static Mutex, + query: &SearchQuery, + request: &HttpRequest, + ) { + let user_agent = extract_user_agents(request); + let sorted = query.sort.is_some() as usize; + let sort_with_geo_point = query + .sort + .as_ref() + .map_or(false, |s| s.iter().any(|s| s.contains("_geoPoint("))); + let sort_criteria_terms = query.sort.as_ref().map_or(0, |s| s.len()); + + // since there is quite a bit of computation made on the filter we are going to do that in the async task + let filter = query.filter.clone(); + let queried = query.q.is_some(); + let nb_terms = query.q.as_ref().map_or(0, |s| s.split_whitespace().count()); + + let max_limit = query.limit; + let max_offset = query.offset.unwrap_or_default(); + + // to avoid blocking the search we are going to do the heavier computation and take the + // batcher's mutex in an async task + tokio::spawn(async move { + const RE: Lazy = Lazy::new(|| Regex::new("AND | OR").unwrap()); + + let filtered = filter.is_some() as usize; + let syntax = match filter.as_ref() { + Some(Value::String(_)) => "string".to_string(), + Some(Value::Array(values)) => { + if values + .iter() + .map(|v| v.to_string()) + .any(|s| RE.is_match(&s)) + { + "mixed".to_string() + } else { + "array".to_string() + } + } + _ => "none".to_string(), + }; + let stringified_filters = filter.map_or(String::new(), |v| v.to_string()); + let filter_with_geo_radius = stringified_filters.contains("_geoRadius("); + let filter_number_of_criteria = RE.split(&stringified_filters).count(); + + let mut search_batcher = batcher.lock().await; + user_agent.into_iter().for_each(|ua| { + search_batcher.user_agents.insert(ua); + }); + search_batcher.total_received += 1; + + // sort + search_batcher.sort_with_geo_point |= sort_with_geo_point; + search_batcher.sort_sum_of_criteria_terms += sort_criteria_terms; + search_batcher.sort_total_number_of_criteria += sorted; + + // filter + search_batcher.filter_with_geo_radius |= filter_with_geo_radius; + search_batcher.filter_sum_of_criteria_terms += filter_number_of_criteria; + search_batcher.filter_total_number_of_criteria += filtered as usize; + *search_batcher.used_syntax.entry(syntax).or_insert(0) += 1; + + // q + search_batcher.sum_of_terms_count += nb_terms; + search_batcher.total_number_of_q += queried as usize; + + // pagination + search_batcher.max_limit = search_batcher.max_limit.max(max_limit); + search_batcher.max_offset = search_batcher.max_offset.max(max_offset); + }); + } + + fn batch_documents( + &'static self, + batcher: &'static Mutex, + documents_query: &UpdateDocumentsQuery, + index_creation: bool, + request: &HttpRequest, + ) { + let user_agents = extract_user_agents(request); + let primary_key = documents_query.primary_key.clone(); + let content_type = request + .headers() + .get(CONTENT_TYPE) + .map(|s| s.to_str().unwrap_or("unkown")) + .unwrap() + .to_string(); + + tokio::spawn(async move { + let mut lock = batcher.lock().await; + for user_agent in user_agents { + lock.user_agents.insert(user_agent); + } + lock.content_types.insert(content_type); + if let Some(primary_key) = primary_key { + lock.primary_keys.insert(primary_key); + } + lock.index_creation |= index_creation; + lock.updated = true; + // drop the lock here + }); + } +} + +impl super::Analytics for SegmentAnalytics { + fn publish(&'static self, event_name: String, send: Value, request: Option<&HttpRequest>) { + let content_type = request + .map(|req| req.headers().get(USER_AGENT)) + .flatten() + .map(|header| header.to_str().unwrap_or("unknown").to_string()); + + tokio::spawn(async move { + let _ = self + .batcher + .lock() + .await + .push(Track { + user: self.user.clone(), + event: event_name.clone(), + context: content_type.map(|user_agent| json!({ "user-agent": user_agent.split(";").map(str::trim).collect::>() })), + properties: send, + ..Default::default() + }) + .await; + }); + } + + fn start_get_search(&'static self, query: &SearchQuery, request: &HttpRequest) { + self.start_search(&self.get_search_batcher, query, request) + } + + fn end_get_search(&'static self, process_time: usize) { + tokio::spawn(async move { + let mut search_batcher = self.get_search_batcher.lock().await; + search_batcher.total_succeeded += 1; + search_batcher.time_spent.push(process_time); + }); + } + + fn start_post_search(&'static self, query: &SearchQuery, request: &HttpRequest) { + self.start_search(&self.post_search_batcher, query, request) + } + + fn end_post_search(&'static self, process_time: usize) { + tokio::spawn(async move { + let mut search_batcher = self.post_search_batcher.lock().await; + search_batcher.total_succeeded += 1; + search_batcher.time_spent.push(process_time); + }); + } + + fn add_documents( + &'static self, + documents_query: &UpdateDocumentsQuery, + index_creation: bool, + request: &HttpRequest, + ) { + self.batch_documents( + &self.add_documents_batcher, + documents_query, + index_creation, + request, + ) + } + + fn update_documents( + &'static self, + documents_query: &UpdateDocumentsQuery, + index_creation: bool, + request: &HttpRequest, + ) { + self.batch_documents( + &self.update_documents_batcher, + documents_query, + index_creation, + request, + ) + } +} + +impl Display for SegmentAnalytics { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.user) + } +} + +#[derive(Default)] +pub struct SearchBatcher { + // context + user_agents: HashSet, + + // requests + total_received: usize, + total_succeeded: usize, + time_spent: Vec, + + // sort + sort_with_geo_point: bool, + // everytime a request has a filter, this field must be incremented by the number of terms it contains + sort_sum_of_criteria_terms: usize, + // everytime a request has a filter, this field must be incremented by one + sort_total_number_of_criteria: usize, + + // filter + filter_with_geo_radius: bool, + // everytime a request has a filter, this field must be incremented by the number of terms it contains + filter_sum_of_criteria_terms: usize, + // everytime a request has a filter, this field must be incremented by one + filter_total_number_of_criteria: usize, + used_syntax: HashMap, + + // q + // everytime a request has a q field, this field must be incremented by the number of terms + sum_of_terms_count: usize, + // everytime a request has a q field, this field must be incremented by one + total_number_of_q: usize, + + // pagination + max_limit: usize, + max_offset: usize, +} + +impl SearchBatcher { + pub fn into_event(mut self, user: &User, event_name: &str) -> Option { + if self.total_received == 0 { + None + } else { + let context = Some(json!({ "user-agent": self.user_agents})); + let percentile_99th = 0.99 * (self.total_succeeded as f64 - 1.) + 1.; + self.time_spent.drain(percentile_99th as usize..); + + let properties = json!({ + "requests": { + "99th_response_time": format!("{:.2}", self.time_spent.iter().sum::() as f64 / self.time_spent.len() as f64), + "total_succeeded": self.total_succeeded, + "total_failed": self.total_received.saturating_sub(self.total_succeeded), // just to be sure we never panics + "total_received": self.total_received, + }, + "sort": { + "with_geoPoint": self.sort_with_geo_point, + "avg_criteria_number": format!("{:.2}", self.sort_sum_of_criteria_terms as f64 / self.sort_total_number_of_criteria as f64), + }, + "filter": { + "with_geoRadius": self.filter_with_geo_radius, + "avg_criteria_number": format!("{:.2}", self.filter_sum_of_criteria_terms as f64 / self.filter_total_number_of_criteria as f64), + "most_used_syntax": self.used_syntax.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)), + }, + "q": { + "avg_terms_number": format!("{:.2}", self.sum_of_terms_count as f64 / self.total_number_of_q as f64), + }, + "pagination": { + "max_limit": self.max_limit, + "max_offset": self.max_offset, + }, + }); + + Some(Track { + user: user.clone(), + event: event_name.to_string(), + context, + properties, + ..Default::default() + }) + } + } +} + +#[derive(Default)] +pub struct DocumentsBatcher { + // set to true when at least one request was received + updated: bool, + + // context + user_agents: HashSet, + + content_types: HashSet, + primary_keys: HashSet, + index_creation: bool, +} + +impl DocumentsBatcher { + pub fn into_event(self, user: &User, event_name: &str) -> Option { + if !self.updated { + None + } else { + let context = Some(json!({ "user-agent": self.user_agents})); + + let properties = json!({ + "payload_type": self.content_types, + "primary_key": self.primary_keys, + "index_creation": self.index_creation, + }); + + Some(Track { + user: user.clone(), + event: event_name.to_string(), + context, + properties, + ..Default::default() + }) + } + } +} From 351ad32d772231a3fcc27318992522ffd84bfb63 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 28 Oct 2021 12:29:32 +0200 Subject: [PATCH 55/68] fix the index_creation boolean --- meilisearch-http/src/routes/indexes/documents.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/meilisearch-http/src/routes/indexes/documents.rs b/meilisearch-http/src/routes/indexes/documents.rs index 932861f36..d0e81e3da 100644 --- a/meilisearch-http/src/routes/indexes/documents.rs +++ b/meilisearch-http/src/routes/indexes/documents.rs @@ -143,7 +143,7 @@ pub async fn add_documents( analytics.add_documents( ¶ms, - meilisearch.get_index(path.index_uid.clone()).await.is_ok(), + meilisearch.get_index(path.index_uid.clone()).await.is_err(), &req, ); @@ -174,7 +174,7 @@ pub async fn update_documents( analytics.update_documents( ¶ms, - meilisearch.get_index(path.index_uid.clone()).await.is_ok(), + meilisearch.get_index(path.index_uid.clone()).await.is_err(), &req, ); From c5164c01c068a8f1e922f26837fa4b3fb3be94f3 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 28 Oct 2021 12:34:39 +0200 Subject: [PATCH 56/68] set the total of sortable attributes and filterable-attributes to 0 when not set --- meilisearch-http/src/routes/indexes/settings.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/meilisearch-http/src/routes/indexes/settings.rs b/meilisearch-http/src/routes/indexes/settings.rs index 327752da6..65b1a9a4b 100644 --- a/meilisearch-http/src/routes/indexes/settings.rs +++ b/meilisearch-http/src/routes/indexes/settings.rs @@ -99,7 +99,7 @@ make_setting_route!( analytics.publish( "FilterableAttributes Updated".to_string(), json!({ - "total": setting.as_ref().map(|filter| filter.len()), + "total": setting.as_ref().map(|filter| filter.len()).unwrap_or(0), "has_geo": setting.as_ref().map(|filter| filter.contains("_geo")).unwrap_or(false), }), Some(req), @@ -119,7 +119,7 @@ make_setting_route!( analytics.publish( "SortableAttributes Updated".to_string(), json!({ - "total": setting.as_ref().map(|sort| sort.len()), + "total": setting.as_ref().map(|sort| sort.len()).unwrap_or(0), "has_geo": setting.as_ref().map(|sort| sort.contains("_geo")).unwrap_or(false), }), Some(req), @@ -221,11 +221,11 @@ pub async fn update_all( "sort_position": settings.ranking_rules.as_ref().set().map(|sort| sort.iter().position(|s| s == "sort")), }, "sortable_attributes": { - "total": settings.sortable_attributes.as_ref().set().map(|sort| sort.len()), + "total": settings.sortable_attributes.as_ref().set().map(|sort| sort.len()).unwrap_or(0), "has_geo": settings.sortable_attributes.as_ref().set().map(|sort| sort.iter().any(|s| s == "_geo")).unwrap_or(false), }, "filterable_attributes": { - "total": settings.filterable_attributes.as_ref().set().map(|filter| filter.len()), + "total": settings.filterable_attributes.as_ref().set().map(|filter| filter.len()).unwrap_or(0), "has_geo": settings.filterable_attributes.as_ref().set().map(|filter| filter.iter().any(|s| s == "_geo")).unwrap_or(false), }, }), From 7c39fab4532fadc5b4d05a78f88d2eb3ea2d7639 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 28 Oct 2021 12:52:53 +0200 Subject: [PATCH 57/68] move the user-agent out of the context in every request --- .../src/analytics/segment_analytics.rs | 37 +++++++++---------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/meilisearch-http/src/analytics/segment_analytics.rs b/meilisearch-http/src/analytics/segment_analytics.rs index 065fff175..150793df7 100644 --- a/meilisearch-http/src/analytics/segment_analytics.rs +++ b/meilisearch-http/src/analytics/segment_analytics.rs @@ -297,25 +297,27 @@ impl SegmentAnalytics { } impl super::Analytics for SegmentAnalytics { - fn publish(&'static self, event_name: String, send: Value, request: Option<&HttpRequest>) { - let content_type = request + fn publish(&'static self, event_name: String, mut send: Value, request: Option<&HttpRequest>) { + let user_agent = request .map(|req| req.headers().get(USER_AGENT)) .flatten() - .map(|header| header.to_str().unwrap_or("unknown").to_string()); + .map(|header| header.to_str().unwrap_or("unknown")) + .map(|s| s.split(';').map(str::trim).collect::>()); + + send["user-agent"] = json!(user_agent); tokio::spawn(async move { let _ = self - .batcher - .lock() - .await - .push(Track { - user: self.user.clone(), - event: event_name.clone(), - context: content_type.map(|user_agent| json!({ "user-agent": user_agent.split(";").map(str::trim).collect::>() })), - properties: send, - ..Default::default() - }) - .await; + .batcher + .lock() + .await + .push(Track { + user: self.user.clone(), + event: event_name.clone(), + properties: send, + ..Default::default() + }) + .await; }); } @@ -419,11 +421,11 @@ impl SearchBatcher { if self.total_received == 0 { None } else { - let context = Some(json!({ "user-agent": self.user_agents})); let percentile_99th = 0.99 * (self.total_succeeded as f64 - 1.) + 1.; self.time_spent.drain(percentile_99th as usize..); let properties = json!({ + "user-agent": self.user_agents, "requests": { "99th_response_time": format!("{:.2}", self.time_spent.iter().sum::() as f64 / self.time_spent.len() as f64), "total_succeeded": self.total_succeeded, @@ -451,7 +453,6 @@ impl SearchBatcher { Some(Track { user: user.clone(), event: event_name.to_string(), - context, properties, ..Default::default() }) @@ -477,9 +478,8 @@ impl DocumentsBatcher { if !self.updated { None } else { - let context = Some(json!({ "user-agent": self.user_agents})); - let properties = json!({ + "user-agent": self.user_agents, "payload_type": self.content_types, "primary_key": self.primary_keys, "index_creation": self.index_creation, @@ -488,7 +488,6 @@ impl DocumentsBatcher { Some(Track { user: user.clone(), event: event_name.to_string(), - context, properties, ..Default::default() }) From fc2f23d36c92fef9884a99ca635f8db422b53ea5 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 28 Oct 2021 12:54:57 +0200 Subject: [PATCH 58/68] move the start_since_days to teh root of the identify --- meilisearch-http/src/analytics/segment_analytics.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meilisearch-http/src/analytics/segment_analytics.rs b/meilisearch-http/src/analytics/segment_analytics.rs index 150793df7..c5f8a64af 100644 --- a/meilisearch-http/src/analytics/segment_analytics.rs +++ b/meilisearch-http/src/analytics/segment_analytics.rs @@ -93,12 +93,12 @@ impl SegmentAnalytics { .collect::>(); json!({ + "start_since_days": FIRST_START_TIMESTAMP.elapsed().as_secs() / (60 * 60 * 24), // one day "system": *SYSTEM, "stats": { "database_size": stats.database_size, "indexes_number": stats.indexes.len(), "documents_number": number_of_documents, - "start_since_days": FIRST_START_TIMESTAMP.elapsed().as_secs() / (60 * 60 * 24), // one day }, "infos": infos, }) From 6ef73eb2262e12ad84ea7df26735954969afb342 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 28 Oct 2021 13:02:48 +0200 Subject: [PATCH 59/68] fix all the single settings route and add the searchable attributes Updated event --- .../src/routes/indexes/settings.rs | 28 +++++++++++++++---- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/meilisearch-http/src/routes/indexes/settings.rs b/meilisearch-http/src/routes/indexes/settings.rs index 65b1a9a4b..4cb939f79 100644 --- a/meilisearch-http/src/routes/indexes/settings.rs +++ b/meilisearch-http/src/routes/indexes/settings.rs @@ -99,8 +99,10 @@ make_setting_route!( analytics.publish( "FilterableAttributes Updated".to_string(), json!({ - "total": setting.as_ref().map(|filter| filter.len()).unwrap_or(0), - "has_geo": setting.as_ref().map(|filter| filter.contains("_geo")).unwrap_or(false), + "filterable_attributes": { + "total": setting.as_ref().map(|filter| filter.len()).unwrap_or(0), + "has_geo": setting.as_ref().map(|filter| filter.contains("_geo")).unwrap_or(false), + } }), Some(req), ); @@ -119,8 +121,10 @@ make_setting_route!( analytics.publish( "SortableAttributes Updated".to_string(), json!({ - "total": setting.as_ref().map(|sort| sort.len()).unwrap_or(0), - "has_geo": setting.as_ref().map(|sort| sort.contains("_geo")).unwrap_or(false), + "sortable_attributes": { + "total": setting.as_ref().map(|sort| sort.len()).unwrap_or(0), + "has_geo": setting.as_ref().map(|sort| sort.contains("_geo")).unwrap_or(false), + }, }), Some(req), ); @@ -138,7 +142,21 @@ make_setting_route!( "/searchable-attributes", Vec, searchable_attributes, - "searchableAttributes" + "searchableAttributes", + analytics, + |setting: &Option>, req: &HttpRequest| { + use serde_json::json; + + analytics.publish( + "SearchableAttributes Updated".to_string(), + json!({ + "searchable_attributes": { + "total": setting.as_ref().map(|sort| sort.len()).unwrap_or(0), + }, + }), + Some(req), + ); + } ); make_setting_route!( From efd0ea9e1e4c94577983505f7377e08d5c788821 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 28 Oct 2021 13:05:58 +0200 Subject: [PATCH 60/68] makes clippy happier --- meilisearch-http/src/analytics/segment_analytics.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/meilisearch-http/src/analytics/segment_analytics.rs b/meilisearch-http/src/analytics/segment_analytics.rs index c5f8a64af..4d975dba1 100644 --- a/meilisearch-http/src/analytics/segment_analytics.rs +++ b/meilisearch-http/src/analytics/segment_analytics.rs @@ -65,7 +65,7 @@ pub struct SegmentAnalytics { impl SegmentAnalytics { fn compute_traits(opt: &Opt, stats: Stats) -> Value { static FIRST_START_TIMESTAMP: Lazy = Lazy::new(Instant::now); - const SYSTEM: Lazy = Lazy::new(|| { + static SYSTEM: Lazy = Lazy::new(|| { let mut sys = System::new_all(); sys.refresh_all(); let kernel_version = sys @@ -111,9 +111,7 @@ impl SegmentAnalytics { write_user_id(&opt.db_path, &user_id); let client = HttpClient::default(); - let user = User::UserId { - user_id: user_id.clone(), - }; + let user = User::UserId { user_id }; let batcher = Mutex::new(AutoBatcher::new( client, Batcher::new(None), @@ -215,7 +213,7 @@ impl SegmentAnalytics { // to avoid blocking the search we are going to do the heavier computation and take the // batcher's mutex in an async task tokio::spawn(async move { - const RE: Lazy = Lazy::new(|| Regex::new("AND | OR").unwrap()); + static RE: Lazy = Lazy::new(|| Regex::new("AND | OR").unwrap()); let filtered = filter.is_some() as usize; let syntax = match filter.as_ref() { From 68fe93b7dbd404c115574d6baf776b796093ee11 Mon Sep 17 00:00:00 2001 From: Guillaume Mourier Date: Thu, 28 Oct 2021 13:25:26 +0200 Subject: [PATCH 61/68] add ranking_rules marker before sort_position --- meilisearch-http/src/routes/indexes/settings.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/meilisearch-http/src/routes/indexes/settings.rs b/meilisearch-http/src/routes/indexes/settings.rs index 4cb939f79..bccb1640c 100644 --- a/meilisearch-http/src/routes/indexes/settings.rs +++ b/meilisearch-http/src/routes/indexes/settings.rs @@ -192,7 +192,9 @@ make_setting_route!( analytics.publish( "RankingRules Updated".to_string(), json!({ - "sort_position": setting.as_ref().map(|sort| sort.iter().position(|s| s == "sort")), + "ranking_rules": { + "sort_position": setting.as_ref().map(|sort| sort.iter().position(|s| s == "sort")), + } }), Some(req), ); From 7934e3956be06509f85cf6aec3473d27347191ff Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 28 Oct 2021 16:28:41 +0200 Subject: [PATCH 62/68] replace all mutexes by channel --- .../src/analytics/mock_analytics.rs | 21 +- meilisearch-http/src/analytics/mod.rs | 21 +- .../src/analytics/segment_analytics.rs | 570 +++++++++--------- meilisearch-http/src/routes/indexes/search.rs | 12 +- 4 files changed, 329 insertions(+), 295 deletions(-) diff --git a/meilisearch-http/src/analytics/mock_analytics.rs b/meilisearch-http/src/analytics/mock_analytics.rs index 64cd5673f..dfc4c788f 100644 --- a/meilisearch-http/src/analytics/mock_analytics.rs +++ b/meilisearch-http/src/analytics/mock_analytics.rs @@ -1,7 +1,6 @@ -use std::fmt::Display; +use std::{any::Any, fmt::Display}; use actix_web::HttpRequest; -use meilisearch_lib::index::SearchQuery; use serde_json::Value; use crate::{routes::indexes::documents::UpdateDocumentsQuery, Opt}; @@ -12,6 +11,18 @@ pub struct MockAnalytics { user: String, } +#[derive(Default)] +pub struct SearchAggregator {} + +#[allow(dead_code)] +impl SearchAggregator { + pub fn from_query(_: &dyn Any, _: &dyn Any) -> Self { + Self::default() + } + + pub fn finish(&mut self, _: &dyn Any) {} +} + impl MockAnalytics { pub fn new(opt: &Opt) -> &'static Self { let user = find_user_id(&opt.db_path).unwrap_or_default(); @@ -23,10 +34,8 @@ impl MockAnalytics { impl Analytics for MockAnalytics { // These methods are noop and should be optimized out fn publish(&'static self, _event_name: String, _send: Value, _request: Option<&HttpRequest>) {} - fn start_get_search(&'static self, _query: &SearchQuery, _request: &HttpRequest) {} - fn end_get_search(&'static self, _process_time: usize) {} - fn start_post_search(&'static self, _query: &SearchQuery, _request: &HttpRequest) {} - fn end_post_search(&'static self, _process_time: usize) {} + fn get_search(&'static self, _aggregate: super::SearchAggregator) {} + fn post_search(&'static self, _aggregate: super::SearchAggregator) {} fn add_documents( &'static self, _documents_query: &UpdateDocumentsQuery, diff --git a/meilisearch-http/src/analytics/mod.rs b/meilisearch-http/src/analytics/mod.rs index 48ba77ddb..9f76766db 100644 --- a/meilisearch-http/src/analytics/mod.rs +++ b/meilisearch-http/src/analytics/mod.rs @@ -8,7 +8,6 @@ use std::fs; use std::path::{Path, PathBuf}; use actix_web::HttpRequest; -use meilisearch_lib::index::SearchQuery; use once_cell::sync::Lazy; use platform_dirs::AppDirs; use serde_json::Value; @@ -20,12 +19,16 @@ pub use mock_analytics::MockAnalytics; // if we are in debug mode OR the analytics feature is disabled // the `SegmentAnalytics` point to the mock instead of the real analytics #[cfg(any(debug_assertions, not(feature = "analytics")))] -pub type SegmentAnalytics = MockAnalytics; +pub type SegmentAnalytics = mock_analytics::MockAnalytics; +#[cfg(any(debug_assertions, not(feature = "analytics")))] +pub type SearchAggregator = mock_analytics::SearchAggregator; // if we are in release mode and the feature analytics was enabled // we use the real analytics #[cfg(all(not(debug_assertions), feature = "analytics"))] pub type SegmentAnalytics = segment_analytics::SegmentAnalytics; +#[cfg(all(not(debug_assertions), feature = "analytics"))] +pub type SearchAggregator = segment_analytics::SearchAggregator; /// The MeiliSearch config dir: /// `~/.config/MeiliSearch` on *NIX or *BSD. @@ -59,17 +62,13 @@ pub trait Analytics: Display + Sync + Send { /// The method used to publish most analytics that do not need to be batched every hours fn publish(&'static self, event_name: String, send: Value, request: Option<&HttpRequest>); - /// This method should be called to batch a get search request - fn start_get_search(&'static self, query: &SearchQuery, request: &HttpRequest); - /// This method should be called once a get search request has succeeded - fn end_get_search(&'static self, process_time: usize); + /// This method should be called to aggergate a get search + fn get_search(&'static self, aggregate: SearchAggregator); - /// This method should be called to batch a get search request - fn start_post_search(&'static self, query: &SearchQuery, request: &HttpRequest); - /// This method should be called once a post search request has succeeded - fn end_post_search(&'static self, process_time: usize); + /// This method should be called to aggregate a post search + fn post_search(&'static self, aggregate: SearchAggregator); - // this method should be called to batch a add documents request + // this method should be called to aggregate a add documents request fn add_documents( &'static self, documents_query: &UpdateDocumentsQuery, diff --git a/meilisearch-http/src/analytics/segment_analytics.rs b/meilisearch-http/src/analytics/segment_analytics.rs index 4d975dba1..f962fe6b9 100644 --- a/meilisearch-http/src/analytics/segment_analytics.rs +++ b/meilisearch-http/src/analytics/segment_analytics.rs @@ -7,7 +7,7 @@ use std::time::{Duration, Instant}; use actix_web::http::header::USER_AGENT; use actix_web::HttpRequest; use http::header::CONTENT_TYPE; -use meilisearch_lib::index::SearchQuery; +use meilisearch_lib::index::{SearchQuery, SearchResult}; use meilisearch_lib::index_controller::Stats; use meilisearch_lib::MeiliSearch; use once_cell::sync::Lazy; @@ -16,7 +16,8 @@ use segment::message::{Identify, Track, User}; use segment::{AutoBatcher, Batcher, HttpClient}; use serde_json::{json, Value}; use sysinfo::{DiskExt, System, SystemExt}; -use tokio::sync::Mutex; +use tokio::select; +use tokio::sync::mpsc::{self, Receiver, Sender}; use uuid::Uuid; use crate::analytics::Analytics; @@ -52,17 +53,125 @@ pub fn extract_user_agents(request: &HttpRequest) -> Vec { .collect() } +pub enum Message { + BatchMessage(Track), + AggregateGetSearch(SearchAggregator), + AggregatePostSearch(SearchAggregator), + AggregateAddDocuments(DocumentsAggregator), + AggregateUpdateDocuments(DocumentsAggregator), +} + pub struct SegmentAnalytics { user: User, - opt: Opt, - batcher: Mutex, - post_search_batcher: Mutex, - get_search_batcher: Mutex, - add_documents_batcher: Mutex, - update_documents_batcher: Mutex, + sender: Sender, } impl SegmentAnalytics { + pub async fn new(opt: &Opt, meilisearch: &MeiliSearch) -> &'static Self { + let user_id = super::find_user_id(&opt.db_path); + let first_time_run = user_id.is_none(); + let user_id = user_id.unwrap_or_else(|| Uuid::new_v4().to_string()); + write_user_id(&opt.db_path, &user_id); + + let client = HttpClient::default(); + let user = User::UserId { user_id }; + let batcher = AutoBatcher::new(client, Batcher::new(None), SEGMENT_API_KEY.to_string()); + + let (sender, inbox) = mpsc::channel(100); // How many analytics can we bufferize + + let segment = Box::new(Segment { + inbox, + user: user.clone(), + opt: opt.clone(), + batcher, + post_search_aggregator: SearchAggregator::default(), + get_search_aggregator: SearchAggregator::default(), + add_documents_aggregator: DocumentsAggregator::default(), + update_documents_aggregator: DocumentsAggregator::default(), + }); + tokio::spawn(segment.run(meilisearch.clone())); + + let ret = Box::new(Self { user, sender }); + let ret = Box::leak(ret); + // batch the launched for the first time track event + if first_time_run { + ret.publish("Launched".to_string(), json!({}), None); + } + + ret + } +} + +impl Display for SegmentAnalytics { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.user) + } +} + +impl super::Analytics for SegmentAnalytics { + fn publish(&'static self, event_name: String, mut send: Value, request: Option<&HttpRequest>) { + let user_agent = request + .map(|req| req.headers().get(USER_AGENT)) + .flatten() + .map(|header| header.to_str().unwrap_or("unknown")) + .map(|s| s.split(';').map(str::trim).collect::>()); + + send["user-agent"] = json!(user_agent); + let event = Track { + user: self.user.clone(), + event: event_name.clone(), + properties: send, + ..Default::default() + }; + let _ = self.sender.try_send(Message::BatchMessage(event.into())); + } + fn get_search(&'static self, aggregate: SearchAggregator) { + let _ = self.sender.try_send(Message::AggregateGetSearch(aggregate)); + } + + fn post_search(&'static self, aggregate: SearchAggregator) { + let _ = self + .sender + .try_send(Message::AggregatePostSearch(aggregate)); + } + + fn add_documents( + &'static self, + documents_query: &UpdateDocumentsQuery, + index_creation: bool, + request: &HttpRequest, + ) { + let aggregate = DocumentsAggregator::from_query(documents_query, index_creation, request); + let _ = self + .sender + .try_send(Message::AggregateAddDocuments(aggregate)); + } + + fn update_documents( + &'static self, + documents_query: &UpdateDocumentsQuery, + index_creation: bool, + request: &HttpRequest, + ) { + let aggregate = DocumentsAggregator::from_query(documents_query, index_creation, request); + let _ = self + .sender + .try_send(Message::AggregateUpdateDocuments(aggregate)); + } +} + +pub struct Segment { + inbox: Receiver, + user: User, + opt: Opt, + batcher: AutoBatcher, + get_search_aggregator: SearchAggregator, + post_search_aggregator: SearchAggregator, + add_documents_aggregator: DocumentsAggregator, + update_documents_aggregator: DocumentsAggregator, +} + +impl Segment { fn compute_traits(opt: &Opt, stats: Stats) -> Value { static FIRST_START_TIMESTAMP: Lazy = Lazy::new(Instant::now); static SYSTEM: Lazy = Lazy::new(|| { @@ -104,282 +213,74 @@ impl SegmentAnalytics { }) } - pub async fn new(opt: &Opt, meilisearch: &MeiliSearch) -> &'static Self { - let user_id = super::find_user_id(&opt.db_path); - let first_time_run = user_id.is_none(); - let user_id = user_id.unwrap_or_else(|| Uuid::new_v4().to_string()); - write_user_id(&opt.db_path, &user_id); - - let client = HttpClient::default(); - let user = User::UserId { user_id }; - let batcher = Mutex::new(AutoBatcher::new( - client, - Batcher::new(None), - SEGMENT_API_KEY.to_string(), - )); - let segment = Box::new(Self { - user, - opt: opt.clone(), - batcher, - post_search_batcher: Mutex::new(SearchBatcher::default()), - get_search_batcher: Mutex::new(SearchBatcher::default()), - add_documents_batcher: Mutex::new(DocumentsBatcher::default()), - update_documents_batcher: Mutex::new(DocumentsBatcher::default()), - }); - let segment = Box::leak(segment); - - // batch the launched for the first time track event - if first_time_run { - segment.publish("Launched".to_string(), json!({}), None); + async fn run(mut self, meilisearch: MeiliSearch) { + println!("CALLED"); + const INTERVAL: Duration = Duration::from_secs(60 * 60); // one hour + loop { + let mut interval = tokio::time::interval(INTERVAL); + select! { + _ = interval.tick() => { + println!("TRIGGERED"); + self.tick(meilisearch.clone()).await; + }, + msg = self.inbox.recv() => { + match msg { + Some(Message::BatchMessage(msg)) => drop(self.batcher.push(msg).await), + Some(Message::AggregateGetSearch(agreg)) => self.get_search_aggregator.aggregate(agreg), + Some(Message::AggregatePostSearch(agreg)) => self.post_search_aggregator.aggregate(agreg), + Some(Message::AggregateAddDocuments(agreg)) => self.add_documents_aggregator.aggregate(agreg), + Some(Message::AggregateUpdateDocuments(agreg)) => self.update_documents_aggregator.aggregate(agreg), + None => (), + } + } + } } - segment.tick(meilisearch.clone()); - segment } - fn tick(&'static self, meilisearch: MeiliSearch) { - tokio::spawn(async move { - loop { - if let Ok(stats) = meilisearch.get_all_stats().await { - let _ = self - .batcher - .lock() - .await - .push(Identify { - context: Some(json!({ - "app": { - "version": env!("CARGO_PKG_VERSION").to_string(), - }, - })), - user: self.user.clone(), - traits: Self::compute_traits(&self.opt, stats), - ..Default::default() - }) - .await; - } - let get_search = std::mem::take(&mut *self.get_search_batcher.lock().await) - .into_event(&self.user, "Document Searched GET"); - let post_search = std::mem::take(&mut *self.post_search_batcher.lock().await) - .into_event(&self.user, "Document Searched POST"); - let add_documents = std::mem::take(&mut *self.add_documents_batcher.lock().await) - .into_event(&self.user, "Documents Added"); - let update_documents = - std::mem::take(&mut *self.update_documents_batcher.lock().await) - .into_event(&self.user, "Documents Updated"); - // keep the lock on the batcher just for these five operations - { - let mut batcher = self.batcher.lock().await; - if let Some(get_search) = get_search { - let _ = batcher.push(get_search).await; - } - if let Some(post_search) = post_search { - let _ = batcher.push(post_search).await; - } - if let Some(add_documents) = add_documents { - let _ = batcher.push(add_documents).await; - } - if let Some(update_documents) = update_documents { - let _ = batcher.push(update_documents).await; - } - let _ = batcher.flush().await; - } - const INTERVAL: Duration = Duration::from_secs(60 * 60); // one hour - tokio::time::sleep(INTERVAL).await; - } - }); - } - - fn start_search( - &'static self, - batcher: &'static Mutex, - query: &SearchQuery, - request: &HttpRequest, - ) { - let user_agent = extract_user_agents(request); - let sorted = query.sort.is_some() as usize; - let sort_with_geo_point = query - .sort - .as_ref() - .map_or(false, |s| s.iter().any(|s| s.contains("_geoPoint("))); - let sort_criteria_terms = query.sort.as_ref().map_or(0, |s| s.len()); - - // since there is quite a bit of computation made on the filter we are going to do that in the async task - let filter = query.filter.clone(); - let queried = query.q.is_some(); - let nb_terms = query.q.as_ref().map_or(0, |s| s.split_whitespace().count()); - - let max_limit = query.limit; - let max_offset = query.offset.unwrap_or_default(); - - // to avoid blocking the search we are going to do the heavier computation and take the - // batcher's mutex in an async task - tokio::spawn(async move { - static RE: Lazy = Lazy::new(|| Regex::new("AND | OR").unwrap()); - - let filtered = filter.is_some() as usize; - let syntax = match filter.as_ref() { - Some(Value::String(_)) => "string".to_string(), - Some(Value::Array(values)) => { - if values - .iter() - .map(|v| v.to_string()) - .any(|s| RE.is_match(&s)) - { - "mixed".to_string() - } else { - "array".to_string() - } - } - _ => "none".to_string(), - }; - let stringified_filters = filter.map_or(String::new(), |v| v.to_string()); - let filter_with_geo_radius = stringified_filters.contains("_geoRadius("); - let filter_number_of_criteria = RE.split(&stringified_filters).count(); - - let mut search_batcher = batcher.lock().await; - user_agent.into_iter().for_each(|ua| { - search_batcher.user_agents.insert(ua); - }); - search_batcher.total_received += 1; - - // sort - search_batcher.sort_with_geo_point |= sort_with_geo_point; - search_batcher.sort_sum_of_criteria_terms += sort_criteria_terms; - search_batcher.sort_total_number_of_criteria += sorted; - - // filter - search_batcher.filter_with_geo_radius |= filter_with_geo_radius; - search_batcher.filter_sum_of_criteria_terms += filter_number_of_criteria; - search_batcher.filter_total_number_of_criteria += filtered as usize; - *search_batcher.used_syntax.entry(syntax).or_insert(0) += 1; - - // q - search_batcher.sum_of_terms_count += nb_terms; - search_batcher.total_number_of_q += queried as usize; - - // pagination - search_batcher.max_limit = search_batcher.max_limit.max(max_limit); - search_batcher.max_offset = search_batcher.max_offset.max(max_offset); - }); - } - - fn batch_documents( - &'static self, - batcher: &'static Mutex, - documents_query: &UpdateDocumentsQuery, - index_creation: bool, - request: &HttpRequest, - ) { - let user_agents = extract_user_agents(request); - let primary_key = documents_query.primary_key.clone(); - let content_type = request - .headers() - .get(CONTENT_TYPE) - .map(|s| s.to_str().unwrap_or("unkown")) - .unwrap() - .to_string(); - - tokio::spawn(async move { - let mut lock = batcher.lock().await; - for user_agent in user_agents { - lock.user_agents.insert(user_agent); - } - lock.content_types.insert(content_type); - if let Some(primary_key) = primary_key { - lock.primary_keys.insert(primary_key); - } - lock.index_creation |= index_creation; - lock.updated = true; - // drop the lock here - }); - } -} - -impl super::Analytics for SegmentAnalytics { - fn publish(&'static self, event_name: String, mut send: Value, request: Option<&HttpRequest>) { - let user_agent = request - .map(|req| req.headers().get(USER_AGENT)) - .flatten() - .map(|header| header.to_str().unwrap_or("unknown")) - .map(|s| s.split(';').map(str::trim).collect::>()); - - send["user-agent"] = json!(user_agent); - - tokio::spawn(async move { + async fn tick(&mut self, meilisearch: MeiliSearch) { + println!("SENDING A TICK"); + if let Ok(stats) = meilisearch.get_all_stats().await { let _ = self .batcher - .lock() - .await - .push(Track { + .push(Identify { + context: Some(json!({ + "app": { + "version": env!("CARGO_PKG_VERSION").to_string(), + }, + })), user: self.user.clone(), - event: event_name.clone(), - properties: send, + traits: Self::compute_traits(&self.opt, stats), ..Default::default() }) .await; - }); - } + } + let get_search = std::mem::take(&mut self.get_search_aggregator) + .into_event(&self.user, "Document Searched GET"); + let post_search = std::mem::take(&mut self.post_search_aggregator) + .into_event(&self.user, "Document Searched POST"); + let add_documents = std::mem::take(&mut self.add_documents_aggregator) + .into_event(&self.user, "Documents Added"); + let update_documents = std::mem::take(&mut self.update_documents_aggregator) + .into_event(&self.user, "Documents Updated"); - fn start_get_search(&'static self, query: &SearchQuery, request: &HttpRequest) { - self.start_search(&self.get_search_batcher, query, request) - } - - fn end_get_search(&'static self, process_time: usize) { - tokio::spawn(async move { - let mut search_batcher = self.get_search_batcher.lock().await; - search_batcher.total_succeeded += 1; - search_batcher.time_spent.push(process_time); - }); - } - - fn start_post_search(&'static self, query: &SearchQuery, request: &HttpRequest) { - self.start_search(&self.post_search_batcher, query, request) - } - - fn end_post_search(&'static self, process_time: usize) { - tokio::spawn(async move { - let mut search_batcher = self.post_search_batcher.lock().await; - search_batcher.total_succeeded += 1; - search_batcher.time_spent.push(process_time); - }); - } - - fn add_documents( - &'static self, - documents_query: &UpdateDocumentsQuery, - index_creation: bool, - request: &HttpRequest, - ) { - self.batch_documents( - &self.add_documents_batcher, - documents_query, - index_creation, - request, - ) - } - - fn update_documents( - &'static self, - documents_query: &UpdateDocumentsQuery, - index_creation: bool, - request: &HttpRequest, - ) { - self.batch_documents( - &self.update_documents_batcher, - documents_query, - index_creation, - request, - ) - } -} - -impl Display for SegmentAnalytics { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.user) + if let Some(get_search) = get_search { + let _ = self.batcher.push(get_search).await; + } + if let Some(post_search) = post_search { + let _ = self.batcher.push(post_search).await; + } + if let Some(add_documents) = add_documents { + let _ = self.batcher.push(add_documents).await; + } + if let Some(update_documents) = update_documents { + let _ = self.batcher.push(update_documents).await; + } + let _ = self.batcher.flush().await; } } #[derive(Default)] -pub struct SearchBatcher { +pub struct SearchAggregator { // context user_agents: HashSet, @@ -414,7 +315,90 @@ pub struct SearchBatcher { max_offset: usize, } -impl SearchBatcher { +impl SearchAggregator { + pub fn from_query(query: &SearchQuery, request: &HttpRequest) -> Self { + let mut ret = Self::default(); + ret.total_received = 1; + ret.user_agents = extract_user_agents(request).into_iter().collect(); + + if let Some(ref sort) = query.sort { + ret.sort_total_number_of_criteria = 1; + ret.sort_with_geo_point = sort.iter().any(|s| s.contains("_geoPoint(")); + ret.sort_sum_of_criteria_terms = sort.len(); + } + + if let Some(ref filter) = query.filter { + static RE: Lazy = Lazy::new(|| Regex::new("AND | OR").unwrap()); + ret.filter_total_number_of_criteria = 1; + + let syntax = match filter { + Value::String(_) => "string".to_string(), + Value::Array(values) => { + if values + .iter() + .map(|v| v.to_string()) + .any(|s| RE.is_match(&s)) + { + "mixed".to_string() + } else { + "array".to_string() + } + } + _ => "none".to_string(), + }; + // convert the string to a HashMap + ret.used_syntax.insert(syntax, 1); + + let stringified_filters = filter.to_string(); + ret.filter_with_geo_radius = stringified_filters.contains("_geoRadius("); + ret.filter_sum_of_criteria_terms = RE.split(&stringified_filters).count(); + } + + if let Some(ref q) = query.q { + ret.total_number_of_q = 1; + ret.sum_of_terms_count = q.split_whitespace().count(); + } + + ret.max_limit = query.limit; + ret.max_offset = query.offset.unwrap_or_default(); + + ret + } + + pub fn finish(&mut self, result: &SearchResult) { + self.total_succeeded += 1; + self.time_spent.push(result.processing_time_ms as usize); + } + + /// Aggregate one [SearchAggregator] into another. + pub fn aggregate(&mut self, mut other: Self) { + // context + for user_agent in other.user_agents.into_iter() { + self.user_agents.insert(user_agent); + } + // request + self.total_received += other.total_received; + self.total_succeeded += other.total_succeeded; + self.time_spent.append(&mut other.time_spent); + // sort + self.sort_with_geo_point |= other.sort_with_geo_point; + self.sort_sum_of_criteria_terms += other.sort_sum_of_criteria_terms; + self.sort_total_number_of_criteria += other.sort_total_number_of_criteria; + // filter + self.filter_with_geo_radius |= other.filter_with_geo_radius; + self.filter_sum_of_criteria_terms += other.filter_sum_of_criteria_terms; + self.filter_total_number_of_criteria += other.filter_total_number_of_criteria; + for (key, value) in other.used_syntax.into_iter() { + *self.used_syntax.entry(key).or_insert(0) += value; + } + // q + self.sum_of_terms_count += other.sum_of_terms_count; + self.total_number_of_q += other.total_number_of_q; + // pagination + self.max_limit = self.max_limit.max(other.max_limit); + self.max_offset = self.max_offset.max(other.max_offset); + } + pub fn into_event(mut self, user: &User, event_name: &str) -> Option { if self.total_received == 0 { None @@ -459,7 +443,7 @@ impl SearchBatcher { } #[derive(Default)] -pub struct DocumentsBatcher { +pub struct DocumentsAggregator { // set to true when at least one request was received updated: bool, @@ -471,7 +455,47 @@ pub struct DocumentsBatcher { index_creation: bool, } -impl DocumentsBatcher { +impl DocumentsAggregator { + pub fn from_query( + documents_query: &UpdateDocumentsQuery, + index_creation: bool, + request: &HttpRequest, + ) -> Self { + let mut ret = Self::default(); + + ret.updated = true; + ret.user_agents = extract_user_agents(request).into_iter().collect(); + if let Some(primary_key) = documents_query.primary_key.clone() { + ret.primary_keys.insert(primary_key); + } + let content_type = request + .headers() + .get(CONTENT_TYPE) + .map(|s| s.to_str().unwrap_or("unkown")) + .unwrap() + .to_string(); + ret.content_types.insert(content_type); + ret.index_creation = index_creation; + + ret + } + + /// Aggregate one [DocumentsAggregator] into another. + pub fn aggregate(&mut self, other: Self) { + self.updated |= other.updated; + // we can't create a union because there is no `into_union` method + for user_agent in other.user_agents.into_iter() { + self.user_agents.insert(user_agent); + } + for primary_key in other.primary_keys.into_iter() { + self.primary_keys.insert(primary_key); + } + for content_type in other.content_types.into_iter() { + self.content_types.insert(content_type); + } + self.index_creation |= other.index_creation; + } + pub fn into_event(self, user: &User, event_name: &str) -> Option { if !self.updated { None diff --git a/meilisearch-http/src/routes/indexes/search.rs b/meilisearch-http/src/routes/indexes/search.rs index d5d2b9540..f82f231f3 100644 --- a/meilisearch-http/src/routes/indexes/search.rs +++ b/meilisearch-http/src/routes/indexes/search.rs @@ -5,7 +5,7 @@ use meilisearch_lib::MeiliSearch; use serde::Deserialize; use serde_json::Value; -use crate::analytics::Analytics; +use crate::analytics::{Analytics, SearchAggregator}; use crate::error::ResponseError; use crate::extractors::authentication::{policies::*, GuardedData}; use crate::routes::IndexParam; @@ -116,7 +116,7 @@ pub async fn search_with_url_query( debug!("called with params: {:?}", params); let query: SearchQuery = params.into_inner().into(); - analytics.start_get_search(&query, &req); + let mut aggregate = SearchAggregator::from_query(&query, &req); let search_result = meilisearch .search(path.into_inner().index_uid, query) @@ -126,7 +126,8 @@ pub async fn search_with_url_query( #[cfg(test)] assert!(!search_result.exhaustive_nb_hits); - analytics.end_post_search(search_result.processing_time_ms as usize); + aggregate.finish(&search_result); + analytics.get_search(aggregate); debug!("returns: {:?}", search_result); Ok(HttpResponse::Ok().json(search_result)) @@ -142,7 +143,7 @@ pub async fn search_with_post( let query = params.into_inner(); debug!("search called with params: {:?}", query); - analytics.start_post_search(&query, &req); + let mut aggregate = SearchAggregator::from_query(&query, &req); let search_result = meilisearch .search(path.into_inner().index_uid, query) @@ -152,7 +153,8 @@ pub async fn search_with_post( #[cfg(test)] assert!(!search_result.exhaustive_nb_hits); - analytics.end_post_search(search_result.processing_time_ms as usize); + aggregate.finish(&search_result); + analytics.post_search(aggregate); debug!("returns: {:?}", search_result); Ok(HttpResponse::Ok().json(search_result)) From ba69ad672a5adc9ca67c9e2c9a6a7ea5262c3cc6 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 28 Oct 2021 16:47:56 +0200 Subject: [PATCH 63/68] fix the timing issue --- meilisearch-http/src/analytics/segment_analytics.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/meilisearch-http/src/analytics/segment_analytics.rs b/meilisearch-http/src/analytics/segment_analytics.rs index f962fe6b9..9933a5deb 100644 --- a/meilisearch-http/src/analytics/segment_analytics.rs +++ b/meilisearch-http/src/analytics/segment_analytics.rs @@ -214,13 +214,12 @@ impl Segment { } async fn run(mut self, meilisearch: MeiliSearch) { - println!("CALLED"); const INTERVAL: Duration = Duration::from_secs(60 * 60); // one hour + let mut interval = tokio::time::interval(INTERVAL); + loop { - let mut interval = tokio::time::interval(INTERVAL); select! { _ = interval.tick() => { - println!("TRIGGERED"); self.tick(meilisearch.clone()).await; }, msg = self.inbox.recv() => { @@ -238,7 +237,6 @@ impl Segment { } async fn tick(&mut self, meilisearch: MeiliSearch) { - println!("SENDING A TICK"); if let Ok(stats) = meilisearch.get_all_stats().await { let _ = self .batcher From 66d87761b7fbc0afb19c65a81987e6a4a6ef3ac3 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 28 Oct 2021 18:39:50 +0200 Subject: [PATCH 64/68] align the parameters in the launche resume --- meilisearch-http/src/main.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/meilisearch-http/src/main.rs b/meilisearch-http/src/main.rs index c6b8be58d..c73f5501f 100644 --- a/meilisearch-http/src/main.rs +++ b/meilisearch-http/src/main.rs @@ -122,14 +122,14 @@ Thank you for using MeiliSearch! We collect anonymized analytics to improve our product and your experience. To learn more, including how to turn off analytics, visit our dedicated documentation page: https://docs.meilisearch.com/learn/what_is_meilisearch/telemetry.html -Anonymous telemetry: \"Enabled\"" +Anonymous telemetry:\t\"Enabled\"" ); } } let analytics = analytics.to_string(); if !analytics.is_empty() { - eprintln!("Instance UID:\t\"{}\"", analytics); + eprintln!("Instance UID:\t\t\"{}\"", analytics); } eprintln!(); From d65f055030d3d4ed2e7fb4c8e9df800dee14efe9 Mon Sep 17 00:00:00 2001 From: marin postma Date: Fri, 29 Oct 2021 15:58:06 +0200 Subject: [PATCH 65/68] pass anaytics into Arc instead of static ref --- .../src/analytics/mock_analytics.rs | 27 +++------ meilisearch-http/src/analytics/mod.rs | 13 ++-- .../src/analytics/segment_analytics.rs | 59 +++++++++---------- meilisearch-http/src/lib.rs | 5 +- meilisearch-http/src/main.rs | 22 +++---- 5 files changed, 57 insertions(+), 69 deletions(-) diff --git a/meilisearch-http/src/analytics/mock_analytics.rs b/meilisearch-http/src/analytics/mock_analytics.rs index dfc4c788f..486a10ac0 100644 --- a/meilisearch-http/src/analytics/mock_analytics.rs +++ b/meilisearch-http/src/analytics/mock_analytics.rs @@ -1,4 +1,4 @@ -use std::{any::Any, fmt::Display}; +use std::{any::Any, sync::Arc}; use actix_web::HttpRequest; use serde_json::Value; @@ -7,9 +7,7 @@ use crate::{routes::indexes::documents::UpdateDocumentsQuery, Opt}; use super::{find_user_id, Analytics}; -pub struct MockAnalytics { - user: String, -} +pub struct MockAnalytics; #[derive(Default)] pub struct SearchAggregator {} @@ -24,36 +22,29 @@ impl SearchAggregator { } impl MockAnalytics { - pub fn new(opt: &Opt) -> &'static Self { + pub fn new(opt: &Opt) -> (Arc, String) { let user = find_user_id(&opt.db_path).unwrap_or_default(); - let analytics = Box::new(Self { user }); - Box::leak(analytics) + (Arc::new(Self), user) } } impl Analytics for MockAnalytics { // These methods are noop and should be optimized out - fn publish(&'static self, _event_name: String, _send: Value, _request: Option<&HttpRequest>) {} - fn get_search(&'static self, _aggregate: super::SearchAggregator) {} - fn post_search(&'static self, _aggregate: super::SearchAggregator) {} + fn publish(&self, _event_name: String, _send: Value, _request: Option<&HttpRequest>) {} + fn get_search(&self, _aggregate: super::SearchAggregator) {} + fn post_search(&self, _aggregate: super::SearchAggregator) {} fn add_documents( - &'static self, + &self, _documents_query: &UpdateDocumentsQuery, _index_creation: bool, _request: &HttpRequest, ) { } fn update_documents( - &'static self, + &self, _documents_query: &UpdateDocumentsQuery, _index_creation: bool, _request: &HttpRequest, ) { } } - -impl Display for MockAnalytics { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.user) - } -} diff --git a/meilisearch-http/src/analytics/mod.rs b/meilisearch-http/src/analytics/mod.rs index 9f76766db..f777a293e 100644 --- a/meilisearch-http/src/analytics/mod.rs +++ b/meilisearch-http/src/analytics/mod.rs @@ -3,7 +3,6 @@ mod mock_analytics; #[cfg(all(not(debug_assertions), feature = "analytics"))] mod segment_analytics; -use std::fmt::Display; use std::fs; use std::path::{Path, PathBuf}; @@ -58,26 +57,26 @@ fn find_user_id(db_path: &Path) -> Option { .or_else(|| fs::read_to_string(&config_user_id_path(db_path)?).ok()) } -pub trait Analytics: Display + Sync + Send { +pub trait Analytics: Sync + Send { /// The method used to publish most analytics that do not need to be batched every hours - fn publish(&'static self, event_name: String, send: Value, request: Option<&HttpRequest>); + fn publish(&self, event_name: String, send: Value, request: Option<&HttpRequest>); /// This method should be called to aggergate a get search - fn get_search(&'static self, aggregate: SearchAggregator); + fn get_search(&self, aggregate: SearchAggregator); /// This method should be called to aggregate a post search - fn post_search(&'static self, aggregate: SearchAggregator); + fn post_search(&self, aggregate: SearchAggregator); // this method should be called to aggregate a add documents request fn add_documents( - &'static self, + &self, documents_query: &UpdateDocumentsQuery, index_creation: bool, request: &HttpRequest, ); // this method should be called to batch a update documents request fn update_documents( - &'static self, + &self, documents_query: &UpdateDocumentsQuery, index_creation: bool, request: &HttpRequest, diff --git a/meilisearch-http/src/analytics/segment_analytics.rs b/meilisearch-http/src/analytics/segment_analytics.rs index 9933a5deb..f34bf4378 100644 --- a/meilisearch-http/src/analytics/segment_analytics.rs +++ b/meilisearch-http/src/analytics/segment_analytics.rs @@ -1,7 +1,7 @@ use std::collections::{HashMap, HashSet}; -use std::fmt::Display; use std::fs; use std::path::Path; +use std::sync::Arc; use std::time::{Duration, Instant}; use actix_web::http::header::USER_AGENT; @@ -53,7 +53,7 @@ pub fn extract_user_agents(request: &HttpRequest) -> Vec { .collect() } -pub enum Message { +pub enum AnalyticsMsg { BatchMessage(Track), AggregateGetSearch(SearchAggregator), AggregatePostSearch(SearchAggregator), @@ -62,12 +62,12 @@ pub enum Message { } pub struct SegmentAnalytics { + sender: Sender, user: User, - sender: Sender, } impl SegmentAnalytics { - pub async fn new(opt: &Opt, meilisearch: &MeiliSearch) -> &'static Self { + pub async fn new(opt: &Opt, meilisearch: &MeiliSearch) -> (Arc, String) { let user_id = super::find_user_id(&opt.db_path); let first_time_run = user_id.is_none(); let user_id = user_id.unwrap_or_else(|| Uuid::new_v4().to_string()); @@ -91,25 +91,21 @@ impl SegmentAnalytics { }); tokio::spawn(segment.run(meilisearch.clone())); - let ret = Box::new(Self { user, sender }); - let ret = Box::leak(ret); + let this = Self { + sender, + user: user.clone(), + }; // batch the launched for the first time track event if first_time_run { - ret.publish("Launched".to_string(), json!({}), None); + this.publish("Launched".to_string(), json!({}), None); } - ret - } -} - -impl Display for SegmentAnalytics { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.user) + (Arc::new(this), user.to_string()) } } impl super::Analytics for SegmentAnalytics { - fn publish(&'static self, event_name: String, mut send: Value, request: Option<&HttpRequest>) { + fn publish(&self, event_name: String, mut send: Value, request: Option<&HttpRequest>) { let user_agent = request .map(|req| req.headers().get(USER_AGENT)) .flatten() @@ -123,20 +119,21 @@ impl super::Analytics for SegmentAnalytics { properties: send, ..Default::default() }; - let _ = self.sender.try_send(Message::BatchMessage(event.into())); - } - fn get_search(&'static self, aggregate: SearchAggregator) { - let _ = self.sender.try_send(Message::AggregateGetSearch(aggregate)); + let _ = self.sender.try_send(AnalyticsMsg::BatchMessage(event.into())); } - fn post_search(&'static self, aggregate: SearchAggregator) { + fn get_search(&self, aggregate: SearchAggregator) { + let _ = self.sender.try_send(AnalyticsMsg::AggregateGetSearch(aggregate)); + } + + fn post_search(&self, aggregate: SearchAggregator) { let _ = self .sender - .try_send(Message::AggregatePostSearch(aggregate)); + .try_send(AnalyticsMsg::AggregatePostSearch(aggregate)); } fn add_documents( - &'static self, + &self, documents_query: &UpdateDocumentsQuery, index_creation: bool, request: &HttpRequest, @@ -144,11 +141,11 @@ impl super::Analytics for SegmentAnalytics { let aggregate = DocumentsAggregator::from_query(documents_query, index_creation, request); let _ = self .sender - .try_send(Message::AggregateAddDocuments(aggregate)); + .try_send(AnalyticsMsg::AggregateAddDocuments(aggregate)); } fn update_documents( - &'static self, + &self, documents_query: &UpdateDocumentsQuery, index_creation: bool, request: &HttpRequest, @@ -156,12 +153,12 @@ impl super::Analytics for SegmentAnalytics { let aggregate = DocumentsAggregator::from_query(documents_query, index_creation, request); let _ = self .sender - .try_send(Message::AggregateUpdateDocuments(aggregate)); + .try_send(AnalyticsMsg::AggregateUpdateDocuments(aggregate)); } } pub struct Segment { - inbox: Receiver, + inbox: Receiver, user: User, opt: Opt, batcher: AutoBatcher, @@ -224,11 +221,11 @@ impl Segment { }, msg = self.inbox.recv() => { match msg { - Some(Message::BatchMessage(msg)) => drop(self.batcher.push(msg).await), - Some(Message::AggregateGetSearch(agreg)) => self.get_search_aggregator.aggregate(agreg), - Some(Message::AggregatePostSearch(agreg)) => self.post_search_aggregator.aggregate(agreg), - Some(Message::AggregateAddDocuments(agreg)) => self.add_documents_aggregator.aggregate(agreg), - Some(Message::AggregateUpdateDocuments(agreg)) => self.update_documents_aggregator.aggregate(agreg), + Some(AnalyticsMsg::BatchMessage(msg)) => drop(self.batcher.push(msg).await), + Some(AnalyticsMsg::AggregateGetSearch(agreg)) => self.get_search_aggregator.aggregate(agreg), + Some(AnalyticsMsg::AggregatePostSearch(agreg)) => self.post_search_aggregator.aggregate(agreg), + Some(AnalyticsMsg::AggregateAddDocuments(agreg)) => self.add_documents_aggregator.aggregate(agreg), + Some(AnalyticsMsg::AggregateUpdateDocuments(agreg)) => self.update_documents_aggregator.aggregate(agreg), None => (), } } diff --git a/meilisearch-http/src/lib.rs b/meilisearch-http/src/lib.rs index ead678693..05d95c19f 100644 --- a/meilisearch-http/src/lib.rs +++ b/meilisearch-http/src/lib.rs @@ -7,6 +7,7 @@ pub mod analytics; pub mod helpers; pub mod option; pub mod routes; +use std::sync::Arc; use std::time::Duration; use crate::error::MeilisearchHttpError; @@ -78,12 +79,12 @@ pub fn configure_data( config: &mut web::ServiceConfig, data: MeiliSearch, opt: &Opt, - analytics: &'static dyn Analytics, + analytics: Arc, ) { let http_payload_size_limit = opt.http_payload_size_limit.get_bytes() as usize; config .app_data(data) - .app_data(web::Data::new(analytics)) + .app_data(web::Data::from(analytics)) .app_data( web::JsonConfig::default() .content_type(|mime| mime == mime::APPLICATION_JSON) diff --git a/meilisearch-http/src/main.rs b/meilisearch-http/src/main.rs index c73f5501f..152c4bbcc 100644 --- a/meilisearch-http/src/main.rs +++ b/meilisearch-http/src/main.rs @@ -1,4 +1,5 @@ use std::env; +use std::sync::Arc; use actix_web::HttpServer; use meilisearch_http::analytics; @@ -46,15 +47,15 @@ async fn main() -> anyhow::Result<()> { let meilisearch = setup_meilisearch(&opt)?; #[cfg(all(not(debug_assertions), feature = "analytics"))] - let analytics = if !opt.no_analytics { - analytics::SegmentAnalytics::new(&opt, &meilisearch).await as &'static dyn Analytics + let (analytics, user) = if !opt.no_analytics { + analytics::SegmentAnalytics::new(&opt, &meilisearch).await } else { - analytics::MockAnalytics::new(&opt) as &'static dyn Analytics + analytics::MockAnalytics::new(&opt) }; #[cfg(any(debug_assertions, not(feature = "analytics")))] - let analytics = analytics::MockAnalytics::new(&opt); + let (analytics, user) = analytics::MockAnalytics::new(&opt); - print_launch_resume(&opt, analytics); + print_launch_resume(&opt, &user); run_http(meilisearch, opt, analytics).await?; @@ -64,12 +65,12 @@ async fn main() -> anyhow::Result<()> { async fn run_http( data: MeiliSearch, opt: Opt, - analytics: &'static dyn Analytics, + analytics: Arc, ) -> anyhow::Result<()> { let _enable_dashboard = &opt.env == "development"; let opt_clone = opt.clone(); let http_server = - HttpServer::new(move || create_app!(data, _enable_dashboard, opt_clone, analytics)) + HttpServer::new(move || create_app!(data, _enable_dashboard, opt_clone, analytics.clone())) // Disable signals allows the server to terminate immediately when a user enter CTRL-C .disable_signals(); @@ -84,7 +85,7 @@ async fn run_http( Ok(()) } -pub fn print_launch_resume(opt: &Opt, analytics: &'static dyn Analytics) { +pub fn print_launch_resume(opt: &Opt, user: &str) { let commit_sha = option_env!("VERGEN_GIT_SHA").unwrap_or("unknown"); let commit_date = option_env!("VERGEN_GIT_COMMIT_TIMESTAMP").unwrap_or("unknown"); @@ -127,9 +128,8 @@ Anonymous telemetry:\t\"Enabled\"" } } - let analytics = analytics.to_string(); - if !analytics.is_empty() { - eprintln!("Instance UID:\t\t\"{}\"", analytics); + if !user.is_empty() { + eprintln!("Instance UID:\t\t\"{}\"", user); } eprintln!(); From 2665c0099d2c14691c20de5b6000b63babb056f8 Mon Sep 17 00:00:00 2001 From: marin postma Date: Fri, 29 Oct 2021 16:10:58 +0200 Subject: [PATCH 66/68] clippy + fmt --- meilisearch-http/src/analytics/mock_analytics.rs | 1 + meilisearch-http/src/analytics/segment_analytics.rs | 8 ++++++-- meilisearch-http/src/main.rs | 2 +- meilisearch-http/src/routes/dump.rs | 2 +- meilisearch-http/src/routes/indexes/documents.rs | 4 ++-- meilisearch-http/src/routes/indexes/mod.rs | 4 ++-- meilisearch-http/src/routes/indexes/search.rs | 4 ++-- meilisearch-http/src/routes/indexes/settings.rs | 4 ++-- meilisearch-http/tests/common/service.rs | 10 +++++----- meilisearch-http/tests/content_type.rs | 2 +- meilisearch-http/tests/documents/add_documents.rs | 6 +++--- 11 files changed, 26 insertions(+), 21 deletions(-) diff --git a/meilisearch-http/src/analytics/mock_analytics.rs b/meilisearch-http/src/analytics/mock_analytics.rs index 486a10ac0..eb26add26 100644 --- a/meilisearch-http/src/analytics/mock_analytics.rs +++ b/meilisearch-http/src/analytics/mock_analytics.rs @@ -22,6 +22,7 @@ impl SearchAggregator { } impl MockAnalytics { + #[allow(clippy::new_ret_no_self)] pub fn new(opt: &Opt) -> (Arc, String) { let user = find_user_id(&opt.db_path).unwrap_or_default(); (Arc::new(Self), user) diff --git a/meilisearch-http/src/analytics/segment_analytics.rs b/meilisearch-http/src/analytics/segment_analytics.rs index f34bf4378..8fbea3eb8 100644 --- a/meilisearch-http/src/analytics/segment_analytics.rs +++ b/meilisearch-http/src/analytics/segment_analytics.rs @@ -119,11 +119,15 @@ impl super::Analytics for SegmentAnalytics { properties: send, ..Default::default() }; - let _ = self.sender.try_send(AnalyticsMsg::BatchMessage(event.into())); + let _ = self + .sender + .try_send(AnalyticsMsg::BatchMessage(event.into())); } fn get_search(&self, aggregate: SearchAggregator) { - let _ = self.sender.try_send(AnalyticsMsg::AggregateGetSearch(aggregate)); + let _ = self + .sender + .try_send(AnalyticsMsg::AggregateGetSearch(aggregate)); } fn post_search(&self, aggregate: SearchAggregator) { diff --git a/meilisearch-http/src/main.rs b/meilisearch-http/src/main.rs index 152c4bbcc..f831c0149 100644 --- a/meilisearch-http/src/main.rs +++ b/meilisearch-http/src/main.rs @@ -50,7 +50,7 @@ async fn main() -> anyhow::Result<()> { let (analytics, user) = if !opt.no_analytics { analytics::SegmentAnalytics::new(&opt, &meilisearch).await } else { - analytics::MockAnalytics::new(&opt) + analytics::create_(&opt) }; #[cfg(any(debug_assertions, not(feature = "analytics")))] let (analytics, user) = analytics::MockAnalytics::new(&opt); diff --git a/meilisearch-http/src/routes/dump.rs b/meilisearch-http/src/routes/dump.rs index 849b8c654..00ff23584 100644 --- a/meilisearch-http/src/routes/dump.rs +++ b/meilisearch-http/src/routes/dump.rs @@ -16,7 +16,7 @@ pub fn configure(cfg: &mut web::ServiceConfig) { pub async fn create_dump( meilisearch: GuardedData, req: HttpRequest, - analytics: web::Data<&'static dyn Analytics>, + analytics: web::Data, ) -> Result { analytics.publish("Dump Created".to_string(), json!({}), Some(&req)); diff --git a/meilisearch-http/src/routes/indexes/documents.rs b/meilisearch-http/src/routes/indexes/documents.rs index d0e81e3da..22a8d5b60 100644 --- a/meilisearch-http/src/routes/indexes/documents.rs +++ b/meilisearch-http/src/routes/indexes/documents.rs @@ -132,7 +132,7 @@ pub async fn add_documents( params: web::Query, body: Payload, req: HttpRequest, - analytics: web::Data<&'static dyn Analytics>, + analytics: web::Data, ) -> Result { debug!("called with params: {:?}", params); let content_type = req @@ -164,7 +164,7 @@ pub async fn update_documents( params: web::Query, body: Payload, req: HttpRequest, - analytics: web::Data<&'static dyn Analytics>, + analytics: web::Data, ) -> Result { debug!("called with params: {:?}", params); let content_type = req diff --git a/meilisearch-http/src/routes/indexes/mod.rs b/meilisearch-http/src/routes/indexes/mod.rs index 81f937013..4a4fcd250 100644 --- a/meilisearch-http/src/routes/indexes/mod.rs +++ b/meilisearch-http/src/routes/indexes/mod.rs @@ -57,7 +57,7 @@ pub async fn create_index( meilisearch: GuardedData, body: web::Json, req: HttpRequest, - analytics: web::Data<&'static dyn Analytics>, + analytics: web::Data, ) -> Result { let body = body.into_inner(); @@ -101,7 +101,7 @@ pub async fn update_index( path: web::Path, body: web::Json, req: HttpRequest, - analytics: web::Data<&'static dyn Analytics>, + analytics: web::Data, ) -> Result { debug!("called with params: {:?}", body); let body = body.into_inner(); diff --git a/meilisearch-http/src/routes/indexes/search.rs b/meilisearch-http/src/routes/indexes/search.rs index f82f231f3..5f7a91673 100644 --- a/meilisearch-http/src/routes/indexes/search.rs +++ b/meilisearch-http/src/routes/indexes/search.rs @@ -111,7 +111,7 @@ pub async fn search_with_url_query( path: web::Path, params: web::Query, req: HttpRequest, - analytics: web::Data<&'static dyn Analytics>, + analytics: web::Data, ) -> Result { debug!("called with params: {:?}", params); let query: SearchQuery = params.into_inner().into(); @@ -138,7 +138,7 @@ pub async fn search_with_post( path: web::Path, params: web::Json, req: HttpRequest, - analytics: web::Data<&'static dyn Analytics>, + analytics: web::Data, ) -> Result { let query = params.into_inner(); debug!("search called with params: {:?}", query); diff --git a/meilisearch-http/src/routes/indexes/settings.rs b/meilisearch-http/src/routes/indexes/settings.rs index bccb1640c..8f9b9f14b 100644 --- a/meilisearch-http/src/routes/indexes/settings.rs +++ b/meilisearch-http/src/routes/indexes/settings.rs @@ -43,7 +43,7 @@ macro_rules! make_setting_route { index_uid: actix_web::web::Path, body: actix_web::web::Json>, req: HttpRequest, - $analytics_var: web::Data<&'static dyn Analytics>, + $analytics_var: web::Data< dyn Analytics>, ) -> std::result::Result { let body = body.into_inner(); @@ -230,7 +230,7 @@ pub async fn update_all( index_uid: web::Path, body: web::Json>, req: HttpRequest, - analytics: web::Data<&'static dyn Analytics>, + analytics: web::Data, ) -> Result { let settings = body.into_inner(); diff --git a/meilisearch-http/tests/common/service.rs b/meilisearch-http/tests/common/service.rs index 299769107..a15525fee 100644 --- a/meilisearch-http/tests/common/service.rs +++ b/meilisearch-http/tests/common/service.rs @@ -15,7 +15,7 @@ impl Service { &self.meilisearch, true, &self.options, - analytics::MockAnalytics::new(&self.options) + analytics::MockAnalytics::new(&self.options).0 )) .await; @@ -41,7 +41,7 @@ impl Service { &self.meilisearch, true, &self.options, - analytics::MockAnalytics::new(&self.options) + analytics::MockAnalytics::new(&self.options).0 )) .await; @@ -63,7 +63,7 @@ impl Service { &self.meilisearch, true, &self.options, - analytics::MockAnalytics::new(&self.options) + analytics::MockAnalytics::new(&self.options).0 )) .await; @@ -81,7 +81,7 @@ impl Service { &self.meilisearch, true, &self.options, - analytics::MockAnalytics::new(&self.options) + analytics::MockAnalytics::new(&self.options).0 )) .await; @@ -102,7 +102,7 @@ impl Service { &self.meilisearch, true, &self.options, - analytics::MockAnalytics::new(&self.options) + analytics::MockAnalytics::new(&self.options).0 )) .await; diff --git a/meilisearch-http/tests/content_type.rs b/meilisearch-http/tests/content_type.rs index 79c497bb7..481b12bf1 100644 --- a/meilisearch-http/tests/content_type.rs +++ b/meilisearch-http/tests/content_type.rs @@ -41,7 +41,7 @@ async fn error_json_bad_content_type() { &server.service.meilisearch, true, &server.service.options, - analytics::MockAnalytics::new(&server.service.options) + analytics::MockAnalytics::new(&server.service.options).0 )) .await; for route in routes { diff --git a/meilisearch-http/tests/documents/add_documents.rs b/meilisearch-http/tests/documents/add_documents.rs index 9e55ef2bd..e9fdf7233 100644 --- a/meilisearch-http/tests/documents/add_documents.rs +++ b/meilisearch-http/tests/documents/add_documents.rs @@ -20,7 +20,7 @@ async fn add_documents_test_json_content_types() { &server.service.meilisearch, true, &server.service.options, - analytics::MockAnalytics::new(&server.service.options) + analytics::MockAnalytics::new(&server.service.options).0 )) .await; // post @@ -65,7 +65,7 @@ async fn error_add_documents_test_bad_content_types() { &server.service.meilisearch, true, &server.service.options, - analytics::MockAnalytics::new(&server.service.options) + analytics::MockAnalytics::new(&server.service.options).0 )) .await; // post @@ -132,7 +132,7 @@ async fn error_add_documents_test_no_content_type() { &server.service.meilisearch, true, &server.service.options, - analytics::MockAnalytics::new(&server.service.options) + analytics::MockAnalytics::new(&server.service.options).0 )) .await; // post From bd49d1c4b55aa251f2b1d0a7c5bf48914ed25b15 Mon Sep 17 00:00:00 2001 From: Tamo Date: Fri, 29 Oct 2021 16:46:00 +0200 Subject: [PATCH 67/68] fix one small bug --- meilisearch-http/src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meilisearch-http/src/main.rs b/meilisearch-http/src/main.rs index f831c0149..152c4bbcc 100644 --- a/meilisearch-http/src/main.rs +++ b/meilisearch-http/src/main.rs @@ -50,7 +50,7 @@ async fn main() -> anyhow::Result<()> { let (analytics, user) = if !opt.no_analytics { analytics::SegmentAnalytics::new(&opt, &meilisearch).await } else { - analytics::create_(&opt) + analytics::MockAnalytics::new(&opt) }; #[cfg(any(debug_assertions, not(feature = "analytics")))] let (analytics, user) = analytics::MockAnalytics::new(&opt); From 519093ea65c9a39a49f56cac8439a68b29a18f7e Mon Sep 17 00:00:00 2001 From: marin postma Date: Fri, 29 Oct 2021 17:32:05 +0200 Subject: [PATCH 68/68] fix bad rebase --- .../tests/documents/add_documents.rs | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/meilisearch-http/tests/documents/add_documents.rs b/meilisearch-http/tests/documents/add_documents.rs index e9fdf7233..e1747d779 100644 --- a/meilisearch-http/tests/documents/add_documents.rs +++ b/meilisearch-http/tests/documents/add_documents.rs @@ -190,7 +190,8 @@ async fn error_add_malformed_csv_documents() { let app = test::init_service(create_app!( &server.service.meilisearch, true, - &server.service.options + &server.service.options, + analytics::MockAnalytics::new(&server.service.options).0 )) .await; // post @@ -250,7 +251,8 @@ async fn error_add_malformed_json_documents() { let app = test::init_service(create_app!( &server.service.meilisearch, true, - &server.service.options + &server.service.options, + analytics::MockAnalytics::new(&server.service.options).0 )) .await; // post @@ -310,7 +312,8 @@ async fn error_add_malformed_ndjson_documents() { let app = test::init_service(create_app!( &server.service.meilisearch, true, - &server.service.options + &server.service.options, + analytics::MockAnalytics::new(&server.service.options).0 )) .await; // post @@ -370,7 +373,8 @@ async fn error_add_missing_payload_csv_documents() { let app = test::init_service(create_app!( &server.service.meilisearch, true, - &server.service.options + &server.service.options, + analytics::MockAnalytics::new(&server.service.options).0 )) .await; // post @@ -420,7 +424,8 @@ async fn error_add_missing_payload_json_documents() { let app = test::init_service(create_app!( &server.service.meilisearch, true, - &server.service.options + &server.service.options, + analytics::MockAnalytics::new(&server.service.options).0 )) .await; // post @@ -470,7 +475,8 @@ async fn error_add_missing_payload_ndjson_documents() { let app = test::init_service(create_app!( &server.service.meilisearch, true, - &server.service.options + &server.service.options, + analytics::MockAnalytics::new(&server.service.options).0 )) .await; // post