write the dump export

This commit is contained in:
Tamo 2022-10-13 15:02:59 +02:00 committed by Clément Renault
parent 7ce336306d
commit 9323f9f1c4
No known key found for this signature in database
GPG key ID: 92ADA4E935E71FA4
25 changed files with 686 additions and 184 deletions

View file

@ -1,16 +1,19 @@
use std::{any::Any, sync::Arc};
use actix_web::HttpRequest;
use meilisearch_types::InstanceUid;
use serde_json::Value;
use crate::{routes::indexes::documents::UpdateDocumentsQuery, Opt};
use super::{find_user_id, Analytics};
pub struct MockAnalytics;
pub struct MockAnalytics {
instance_uid: Option<InstanceUid>,
}
#[derive(Default)]
pub struct SearchAggregator {}
pub struct SearchAggregator;
#[allow(dead_code)]
impl SearchAggregator {
@ -23,13 +26,17 @@ impl SearchAggregator {
impl MockAnalytics {
#[allow(clippy::new_ret_no_self)]
pub fn new(opt: &Opt) -> (Arc<dyn Analytics>, String) {
let user = find_user_id(&opt.db_path).unwrap_or_default();
(Arc::new(Self), user)
pub fn new(opt: &Opt) -> Arc<dyn Analytics> {
let instance_uid = find_user_id(&opt.db_path);
Arc::new(Self { instance_uid })
}
}
impl Analytics for MockAnalytics {
fn instance_uid(&self) -> Option<&meilisearch_types::InstanceUid> {
self.instance_uid.as_ref()
}
// These methods are noop and should be optimized out
fn publish(&self, _event_name: String, _send: Value, _request: Option<&HttpRequest>) {}
fn get_search(&self, _aggregate: super::SearchAggregator) {}

View file

@ -5,8 +5,10 @@ mod segment_analytics;
use std::fs;
use std::path::{Path, PathBuf};
use std::str::FromStr;
use actix_web::HttpRequest;
use meilisearch_types::InstanceUid;
use once_cell::sync::Lazy;
use platform_dirs::AppDirs;
use serde_json::Value;
@ -51,13 +53,16 @@ fn config_user_id_path(db_path: &Path) -> Option<PathBuf> {
}
/// Look for the instance-uid in the `data.ms` or in `~/.config/Meilisearch/path-to-db-instance-uid`
fn find_user_id(db_path: &Path) -> Option<String> {
fn find_user_id(db_path: &Path) -> Option<InstanceUid> {
fs::read_to_string(db_path.join("instance-uid"))
.ok()
.or_else(|| fs::read_to_string(&config_user_id_path(db_path)?).ok())
.and_then(|uid| InstanceUid::from_str(&uid).ok())
}
pub trait Analytics: Sync + Send {
fn instance_uid(&self) -> Option<&InstanceUid>;
/// The method used to publish most analytics that do not need to be batched every hours
fn publish(&self, event_name: String, send: Value, request: Option<&HttpRequest>);

View file

@ -37,6 +37,7 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<IndexScheduler> {
opt.db_path.join("tasks"),
opt.db_path.join("update_files"),
opt.db_path.join("indexes"),
opt.dumps_dir.clone(),
opt.max_index_size.get_bytes() as usize,
(&opt.indexer_options).try_into()?,
true,

View file

@ -53,15 +53,15 @@ async fn main() -> anyhow::Result<()> {
let auth_controller = AuthController::new(&opt.db_path, &opt.master_key)?;
#[cfg(all(not(debug_assertions), feature = "analytics"))]
let (analytics, user) = if !opt.no_analytics {
let analytics = if !opt.no_analytics {
analytics::SegmentAnalytics::new(&opt, &meilisearch).await
} else {
analytics::MockAnalytics::new(&opt)
};
#[cfg(any(debug_assertions, not(feature = "analytics")))]
let (analytics, user) = analytics::MockAnalytics::new(&opt);
let analytics = analytics::MockAnalytics::new(&opt);
print_launch_resume(&opt, &user, config_read_from);
print_launch_resume(&opt, analytics.clone(), config_read_from);
run_http(index_scheduler, auth_controller, opt, analytics).await?;
@ -133,7 +133,7 @@ async fn run_http(
Ok(())
}
pub fn print_launch_resume(opt: &Opt, user: &str, config_read_from: Option<PathBuf>) {
pub fn print_launch_resume(opt: &Opt, analytics: Arc<dyn Analytics>, config_read_from: Option<PathBuf>) {
let commit_sha = option_env!("VERGEN_GIT_SHA").unwrap_or("unknown");
let commit_date = option_env!("VERGEN_GIT_COMMIT_TIMESTAMP").unwrap_or("unknown");
let protocol = if opt.ssl_cert_path.is_some() && opt.ssl_key_path.is_some() {
@ -186,8 +186,8 @@ Anonymous telemetry:\t\"Enabled\""
}
}
if !user.is_empty() {
eprintln!("Instance UID:\t\t\"{}\"", user);
if let Some(instance_uid) = analytics.instance_uid() {
eprintln!("Instance UID:\t\t\"{}\"", instance_uid.to_string());
}
eprintln!();

View file

@ -2,13 +2,17 @@ use actix_web::web::Data;
use actix_web::{web, HttpRequest, HttpResponse};
use index_scheduler::IndexScheduler;
use log::debug;
use meilisearch_auth::AuthController;
use meilisearch_types::error::ResponseError;
use meilisearch_types::tasks::KindWithContent;
use serde_json::json;
use time::macros::format_description;
use time::OffsetDateTime;
use crate::analytics::Analytics;
use crate::extractors::authentication::{policies::*, GuardedData};
use crate::extractors::sequential_extractor::SeqHandler;
use crate::routes::SummarizedTaskView;
pub fn configure(cfg: &mut web::ServiceConfig) {
cfg.service(web::resource("").route(web::post().to(SeqHandler(create_dump))));
@ -16,16 +20,28 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
pub async fn create_dump(
index_scheduler: GuardedData<ActionPolicy<{ actions::DUMPS_CREATE }>, Data<IndexScheduler>>,
auth_controller: GuardedData<ActionPolicy<{ actions::DUMPS_CREATE }>, AuthController>,
req: HttpRequest,
analytics: web::Data<dyn Analytics>,
) -> Result<HttpResponse, ResponseError> {
analytics.publish("Dump Created".to_string(), json!({}), Some(&req));
let task = KindWithContent::DumpExport {
output: "todo".to_string().into(),
};
let res = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??;
let dump_uid = OffsetDateTime::now_utc()
.format(format_description!(
"[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]"
))
.unwrap();
debug!("returns: {:?}", res);
Ok(HttpResponse::Accepted().json(res))
let task = KindWithContent::DumpExport {
keys: auth_controller.list_keys()?,
instance_uid: analytics.instance_uid().cloned(),
dump_uid,
};
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task))
.await??
.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}

View file

@ -109,7 +109,10 @@ pub async fn delete_document(
index_uid,
documents_ids: vec![document_id],
};
let task = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task))
.await??
.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}
@ -314,7 +317,10 @@ pub async fn delete_documents(
index_uid: path.into_inner(),
documents_ids: ids,
};
let task = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task))
.await??
.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
@ -327,7 +333,10 @@ pub async fn clear_all_documents(
let task = KindWithContent::DocumentClear {
index_uid: path.into_inner(),
};
let task = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task))
.await??
.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))

View file

@ -13,7 +13,7 @@ use crate::analytics::Analytics;
use crate::extractors::authentication::{policies::*, AuthenticationError, GuardedData};
use crate::extractors::sequential_extractor::SeqHandler;
use super::Pagination;
use super::{Pagination, SummarizedTaskView};
pub mod documents;
pub mod search;
@ -108,7 +108,10 @@ pub async fn create_index(
index_uid: uid,
primary_key,
};
let task = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task))
.await??
.into();
Ok(HttpResponse::Accepted().json(task))
} else {
@ -156,7 +159,10 @@ pub async fn update_index(
primary_key: body.primary_key,
};
let task = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task))
.await??
.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
@ -169,7 +175,10 @@ pub async fn delete_index(
let task = KindWithContent::IndexDeletion {
index_uid: index_uid.into_inner(),
};
let task = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task))
.await??
.into();
Ok(HttpResponse::Accepted().json(task))
}

View file

@ -1,27 +1,16 @@
use std::collections::BTreeSet;
use std::marker::PhantomData;
use actix_web::web::Data;
use fst::IntoStreamer;
use log::debug;
use actix_web::{web, HttpRequest, HttpResponse};
use index_scheduler::IndexScheduler;
use meilisearch_types::error::ResponseError;
use meilisearch_types::heed::RoTxn;
use meilisearch_types::milli::update::Setting;
use meilisearch_types::milli::{self, DEFAULT_VALUES_PER_FACET};
use meilisearch_types::settings::{
Checked, FacetingSettings, MinWordSizeTyposSetting, PaginationSettings, Settings, TypoSettings,
Unchecked,
};
use meilisearch_types::settings::{settings, Settings, Unchecked};
use meilisearch_types::tasks::KindWithContent;
use meilisearch_types::Index;
use serde_json::json;
use crate::analytics::Analytics;
use crate::extractors::authentication::{policies::*, GuardedData};
use crate::search::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
use crate::routes::SummarizedTaskView;
#[macro_export]
macro_rules! make_setting_route {
@ -33,14 +22,14 @@ macro_rules! make_setting_route {
use index_scheduler::IndexScheduler;
use meilisearch_types::milli::update::Setting;
use meilisearch_types::settings::Settings;
use meilisearch_types::settings::{settings, Settings};
use meilisearch_types::tasks::KindWithContent;
use meilisearch_types::error::ResponseError;
use $crate::analytics::Analytics;
use $crate::extractors::authentication::{policies::*, GuardedData};
use $crate::extractors::sequential_extractor::SeqHandler;
use $crate::routes::indexes::settings::settings;
use $crate::routes::SummarizedTaskView;
pub async fn delete(
index_scheduler: GuardedData<
@ -61,8 +50,10 @@ macro_rules! make_setting_route {
is_deletion: true,
allow_index_creation,
};
let task =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task))
.await??
.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
@ -97,8 +88,10 @@ macro_rules! make_setting_route {
is_deletion: false,
allow_index_creation,
};
let task =
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task))
.await??
.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
@ -459,7 +452,10 @@ pub async fn update_all(
is_deletion: false,
allow_index_creation,
};
let task = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task))
.await??
.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
@ -489,113 +485,11 @@ pub async fn delete_all(
is_deletion: true,
allow_index_creation,
};
let task = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??;
let task: SummarizedTaskView =
tokio::task::spawn_blocking(move || index_scheduler.register(task))
.await??
.into();
debug!("returns: {:?}", task);
Ok(HttpResponse::Accepted().json(task))
}
pub fn settings(index: &Index, rtxn: &RoTxn) -> Result<Settings<Checked>, milli::Error> {
let displayed_attributes = index
.displayed_fields(rtxn)?
.map(|fields| fields.into_iter().map(String::from).collect());
let searchable_attributes = index
.user_defined_searchable_fields(rtxn)?
.map(|fields| fields.into_iter().map(String::from).collect());
let filterable_attributes = index.filterable_fields(rtxn)?.into_iter().collect();
let sortable_attributes = index.sortable_fields(rtxn)?.into_iter().collect();
let criteria = index
.criteria(rtxn)?
.into_iter()
.map(|c| c.to_string())
.collect();
let stop_words = index
.stop_words(rtxn)?
.map(|stop_words| -> Result<BTreeSet<_>, milli::Error> {
Ok(stop_words.stream().into_strs()?.into_iter().collect())
})
.transpose()?
.unwrap_or_default();
let distinct_field = index.distinct_field(rtxn)?.map(String::from);
// in milli each word in the synonyms map were split on their separator. Since we lost
// this information we are going to put space between words.
let synonyms = index
.synonyms(rtxn)?
.iter()
.map(|(key, values)| {
(
key.join(" "),
values.iter().map(|value| value.join(" ")).collect(),
)
})
.collect();
let min_typo_word_len = MinWordSizeTyposSetting {
one_typo: Setting::Set(index.min_word_len_one_typo(rtxn)?),
two_typos: Setting::Set(index.min_word_len_two_typos(rtxn)?),
};
let disabled_words = match index.exact_words(rtxn)? {
Some(fst) => fst.into_stream().into_strs()?.into_iter().collect(),
None => BTreeSet::new(),
};
let disabled_attributes = index
.exact_attributes(rtxn)?
.into_iter()
.map(String::from)
.collect();
let typo_tolerance = TypoSettings {
enabled: Setting::Set(index.authorize_typos(rtxn)?),
min_word_size_for_typos: Setting::Set(min_typo_word_len),
disable_on_words: Setting::Set(disabled_words),
disable_on_attributes: Setting::Set(disabled_attributes),
};
let faceting = FacetingSettings {
max_values_per_facet: Setting::Set(
index
.max_values_per_facet(rtxn)?
.unwrap_or(DEFAULT_VALUES_PER_FACET),
),
};
let pagination = PaginationSettings {
max_total_hits: Setting::Set(
index
.pagination_max_total_hits(rtxn)?
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS),
),
};
Ok(Settings {
displayed_attributes: match displayed_attributes {
Some(attrs) => Setting::Set(attrs),
None => Setting::Reset,
},
searchable_attributes: match searchable_attributes {
Some(attrs) => Setting::Set(attrs),
None => Setting::Reset,
},
filterable_attributes: Setting::Set(filterable_attributes),
sortable_attributes: Setting::Set(sortable_attributes),
ranking_rules: Setting::Set(criteria),
stop_words: Setting::Set(stop_words),
distinct_attribute: match distinct_field {
Some(field) => Setting::Set(field),
None => Setting::Reset,
},
synonyms: Setting::Set(synonyms),
typo_tolerance: Setting::Set(typo_tolerance),
faceting: Setting::Set(faceting),
pagination: Setting::Set(pagination),
_kind: PhantomData,
})
}

View file

@ -121,7 +121,7 @@ impl From<Details> for DetailsView {
indexed_documents,
} => DetailsView {
received_documents: Some(received_documents),
indexed_documents: Some(indexed_documents),
indexed_documents,
..DetailsView::default()
},
Details::Settings { settings } => DetailsView {

View file

@ -4,6 +4,7 @@ use std::str::FromStr;
use std::time::Instant;
use either::Either;
use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
use meilisearch_types::{milli, Document};
use milli::tokenizer::TokenizerBuilder;
use milli::{
@ -24,10 +25,6 @@ pub const DEFAULT_CROP_MARKER: fn() -> String = || "…".to_string();
pub const DEFAULT_HIGHLIGHT_PRE_TAG: fn() -> String = || "<em>".to_string();
pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "</em>".to_string();
/// The maximimum number of results that the engine
/// will be able to return in one search call.
pub const DEFAULT_PAGINATION_MAX_TOTAL_HITS: usize = 1000;
#[derive(Deserialize, Debug, Clone, PartialEq, Eq)]
#[serde(rename_all = "camelCase", deny_unknown_fields)]
pub struct SearchQuery {