mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-03 03:47:02 +02:00
write the dump export
This commit is contained in:
parent
7ce336306d
commit
9323f9f1c4
25 changed files with 686 additions and 184 deletions
|
@ -1,16 +1,19 @@
|
|||
use std::{any::Any, sync::Arc};
|
||||
|
||||
use actix_web::HttpRequest;
|
||||
use meilisearch_types::InstanceUid;
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::{routes::indexes::documents::UpdateDocumentsQuery, Opt};
|
||||
|
||||
use super::{find_user_id, Analytics};
|
||||
|
||||
pub struct MockAnalytics;
|
||||
pub struct MockAnalytics {
|
||||
instance_uid: Option<InstanceUid>,
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct SearchAggregator {}
|
||||
pub struct SearchAggregator;
|
||||
|
||||
#[allow(dead_code)]
|
||||
impl SearchAggregator {
|
||||
|
@ -23,13 +26,17 @@ impl SearchAggregator {
|
|||
|
||||
impl MockAnalytics {
|
||||
#[allow(clippy::new_ret_no_self)]
|
||||
pub fn new(opt: &Opt) -> (Arc<dyn Analytics>, String) {
|
||||
let user = find_user_id(&opt.db_path).unwrap_or_default();
|
||||
(Arc::new(Self), user)
|
||||
pub fn new(opt: &Opt) -> Arc<dyn Analytics> {
|
||||
let instance_uid = find_user_id(&opt.db_path);
|
||||
Arc::new(Self { instance_uid })
|
||||
}
|
||||
}
|
||||
|
||||
impl Analytics for MockAnalytics {
|
||||
fn instance_uid(&self) -> Option<&meilisearch_types::InstanceUid> {
|
||||
self.instance_uid.as_ref()
|
||||
}
|
||||
|
||||
// These methods are noop and should be optimized out
|
||||
fn publish(&self, _event_name: String, _send: Value, _request: Option<&HttpRequest>) {}
|
||||
fn get_search(&self, _aggregate: super::SearchAggregator) {}
|
||||
|
|
|
@ -5,8 +5,10 @@ mod segment_analytics;
|
|||
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::str::FromStr;
|
||||
|
||||
use actix_web::HttpRequest;
|
||||
use meilisearch_types::InstanceUid;
|
||||
use once_cell::sync::Lazy;
|
||||
use platform_dirs::AppDirs;
|
||||
use serde_json::Value;
|
||||
|
@ -51,13 +53,16 @@ fn config_user_id_path(db_path: &Path) -> Option<PathBuf> {
|
|||
}
|
||||
|
||||
/// Look for the instance-uid in the `data.ms` or in `~/.config/Meilisearch/path-to-db-instance-uid`
|
||||
fn find_user_id(db_path: &Path) -> Option<String> {
|
||||
fn find_user_id(db_path: &Path) -> Option<InstanceUid> {
|
||||
fs::read_to_string(db_path.join("instance-uid"))
|
||||
.ok()
|
||||
.or_else(|| fs::read_to_string(&config_user_id_path(db_path)?).ok())
|
||||
.and_then(|uid| InstanceUid::from_str(&uid).ok())
|
||||
}
|
||||
|
||||
pub trait Analytics: Sync + Send {
|
||||
fn instance_uid(&self) -> Option<&InstanceUid>;
|
||||
|
||||
/// The method used to publish most analytics that do not need to be batched every hours
|
||||
fn publish(&self, event_name: String, send: Value, request: Option<&HttpRequest>);
|
||||
|
||||
|
|
|
@ -37,6 +37,7 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<IndexScheduler> {
|
|||
opt.db_path.join("tasks"),
|
||||
opt.db_path.join("update_files"),
|
||||
opt.db_path.join("indexes"),
|
||||
opt.dumps_dir.clone(),
|
||||
opt.max_index_size.get_bytes() as usize,
|
||||
(&opt.indexer_options).try_into()?,
|
||||
true,
|
||||
|
|
|
@ -53,15 +53,15 @@ async fn main() -> anyhow::Result<()> {
|
|||
let auth_controller = AuthController::new(&opt.db_path, &opt.master_key)?;
|
||||
|
||||
#[cfg(all(not(debug_assertions), feature = "analytics"))]
|
||||
let (analytics, user) = if !opt.no_analytics {
|
||||
let analytics = if !opt.no_analytics {
|
||||
analytics::SegmentAnalytics::new(&opt, &meilisearch).await
|
||||
} else {
|
||||
analytics::MockAnalytics::new(&opt)
|
||||
};
|
||||
#[cfg(any(debug_assertions, not(feature = "analytics")))]
|
||||
let (analytics, user) = analytics::MockAnalytics::new(&opt);
|
||||
let analytics = analytics::MockAnalytics::new(&opt);
|
||||
|
||||
print_launch_resume(&opt, &user, config_read_from);
|
||||
print_launch_resume(&opt, analytics.clone(), config_read_from);
|
||||
|
||||
run_http(index_scheduler, auth_controller, opt, analytics).await?;
|
||||
|
||||
|
@ -133,7 +133,7 @@ async fn run_http(
|
|||
Ok(())
|
||||
}
|
||||
|
||||
pub fn print_launch_resume(opt: &Opt, user: &str, config_read_from: Option<PathBuf>) {
|
||||
pub fn print_launch_resume(opt: &Opt, analytics: Arc<dyn Analytics>, config_read_from: Option<PathBuf>) {
|
||||
let commit_sha = option_env!("VERGEN_GIT_SHA").unwrap_or("unknown");
|
||||
let commit_date = option_env!("VERGEN_GIT_COMMIT_TIMESTAMP").unwrap_or("unknown");
|
||||
let protocol = if opt.ssl_cert_path.is_some() && opt.ssl_key_path.is_some() {
|
||||
|
@ -186,8 +186,8 @@ Anonymous telemetry:\t\"Enabled\""
|
|||
}
|
||||
}
|
||||
|
||||
if !user.is_empty() {
|
||||
eprintln!("Instance UID:\t\t\"{}\"", user);
|
||||
if let Some(instance_uid) = analytics.instance_uid() {
|
||||
eprintln!("Instance UID:\t\t\"{}\"", instance_uid.to_string());
|
||||
}
|
||||
|
||||
eprintln!();
|
||||
|
|
|
@ -2,13 +2,17 @@ use actix_web::web::Data;
|
|||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use index_scheduler::IndexScheduler;
|
||||
use log::debug;
|
||||
use meilisearch_auth::AuthController;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::tasks::KindWithContent;
|
||||
use serde_json::json;
|
||||
use time::macros::format_description;
|
||||
use time::OffsetDateTime;
|
||||
|
||||
use crate::analytics::Analytics;
|
||||
use crate::extractors::authentication::{policies::*, GuardedData};
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
use crate::routes::SummarizedTaskView;
|
||||
|
||||
pub fn configure(cfg: &mut web::ServiceConfig) {
|
||||
cfg.service(web::resource("").route(web::post().to(SeqHandler(create_dump))));
|
||||
|
@ -16,16 +20,28 @@ pub fn configure(cfg: &mut web::ServiceConfig) {
|
|||
|
||||
pub async fn create_dump(
|
||||
index_scheduler: GuardedData<ActionPolicy<{ actions::DUMPS_CREATE }>, Data<IndexScheduler>>,
|
||||
auth_controller: GuardedData<ActionPolicy<{ actions::DUMPS_CREATE }>, AuthController>,
|
||||
req: HttpRequest,
|
||||
analytics: web::Data<dyn Analytics>,
|
||||
) -> Result<HttpResponse, ResponseError> {
|
||||
analytics.publish("Dump Created".to_string(), json!({}), Some(&req));
|
||||
|
||||
let task = KindWithContent::DumpExport {
|
||||
output: "todo".to_string().into(),
|
||||
};
|
||||
let res = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??;
|
||||
let dump_uid = OffsetDateTime::now_utc()
|
||||
.format(format_description!(
|
||||
"[year repr:full][month repr:numerical][day padding:zero]-[hour padding:zero][minute padding:zero][second padding:zero][subsecond digits:3]"
|
||||
))
|
||||
.unwrap();
|
||||
|
||||
debug!("returns: {:?}", res);
|
||||
Ok(HttpResponse::Accepted().json(res))
|
||||
let task = KindWithContent::DumpExport {
|
||||
keys: auth_controller.list_keys()?,
|
||||
instance_uid: analytics.instance_uid().cloned(),
|
||||
dump_uid,
|
||||
};
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task))
|
||||
.await??
|
||||
.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
|
|
@ -109,7 +109,10 @@ pub async fn delete_document(
|
|||
index_uid,
|
||||
documents_ids: vec![document_id],
|
||||
};
|
||||
let task = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task))
|
||||
.await??
|
||||
.into();
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
@ -314,7 +317,10 @@ pub async fn delete_documents(
|
|||
index_uid: path.into_inner(),
|
||||
documents_ids: ids,
|
||||
};
|
||||
let task = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task))
|
||||
.await??
|
||||
.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
|
@ -327,7 +333,10 @@ pub async fn clear_all_documents(
|
|||
let task = KindWithContent::DocumentClear {
|
||||
index_uid: path.into_inner(),
|
||||
};
|
||||
let task = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task))
|
||||
.await??
|
||||
.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
|
|
|
@ -13,7 +13,7 @@ use crate::analytics::Analytics;
|
|||
use crate::extractors::authentication::{policies::*, AuthenticationError, GuardedData};
|
||||
use crate::extractors::sequential_extractor::SeqHandler;
|
||||
|
||||
use super::Pagination;
|
||||
use super::{Pagination, SummarizedTaskView};
|
||||
|
||||
pub mod documents;
|
||||
pub mod search;
|
||||
|
@ -108,7 +108,10 @@ pub async fn create_index(
|
|||
index_uid: uid,
|
||||
primary_key,
|
||||
};
|
||||
let task = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task))
|
||||
.await??
|
||||
.into();
|
||||
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
} else {
|
||||
|
@ -156,7 +159,10 @@ pub async fn update_index(
|
|||
primary_key: body.primary_key,
|
||||
};
|
||||
|
||||
let task = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task))
|
||||
.await??
|
||||
.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
|
@ -169,7 +175,10 @@ pub async fn delete_index(
|
|||
let task = KindWithContent::IndexDeletion {
|
||||
index_uid: index_uid.into_inner(),
|
||||
};
|
||||
let task = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task))
|
||||
.await??
|
||||
.into();
|
||||
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
|
|
@ -1,27 +1,16 @@
|
|||
use std::collections::BTreeSet;
|
||||
use std::marker::PhantomData;
|
||||
|
||||
use actix_web::web::Data;
|
||||
use fst::IntoStreamer;
|
||||
use log::debug;
|
||||
|
||||
use actix_web::{web, HttpRequest, HttpResponse};
|
||||
use index_scheduler::IndexScheduler;
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use meilisearch_types::heed::RoTxn;
|
||||
use meilisearch_types::milli::update::Setting;
|
||||
use meilisearch_types::milli::{self, DEFAULT_VALUES_PER_FACET};
|
||||
use meilisearch_types::settings::{
|
||||
Checked, FacetingSettings, MinWordSizeTyposSetting, PaginationSettings, Settings, TypoSettings,
|
||||
Unchecked,
|
||||
};
|
||||
use meilisearch_types::settings::{settings, Settings, Unchecked};
|
||||
use meilisearch_types::tasks::KindWithContent;
|
||||
use meilisearch_types::Index;
|
||||
use serde_json::json;
|
||||
|
||||
use crate::analytics::Analytics;
|
||||
use crate::extractors::authentication::{policies::*, GuardedData};
|
||||
use crate::search::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
|
||||
use crate::routes::SummarizedTaskView;
|
||||
|
||||
#[macro_export]
|
||||
macro_rules! make_setting_route {
|
||||
|
@ -33,14 +22,14 @@ macro_rules! make_setting_route {
|
|||
|
||||
use index_scheduler::IndexScheduler;
|
||||
use meilisearch_types::milli::update::Setting;
|
||||
use meilisearch_types::settings::Settings;
|
||||
use meilisearch_types::settings::{settings, Settings};
|
||||
use meilisearch_types::tasks::KindWithContent;
|
||||
|
||||
use meilisearch_types::error::ResponseError;
|
||||
use $crate::analytics::Analytics;
|
||||
use $crate::extractors::authentication::{policies::*, GuardedData};
|
||||
use $crate::extractors::sequential_extractor::SeqHandler;
|
||||
use $crate::routes::indexes::settings::settings;
|
||||
use $crate::routes::SummarizedTaskView;
|
||||
|
||||
pub async fn delete(
|
||||
index_scheduler: GuardedData<
|
||||
|
@ -61,8 +50,10 @@ macro_rules! make_setting_route {
|
|||
is_deletion: true,
|
||||
allow_index_creation,
|
||||
};
|
||||
let task =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task))
|
||||
.await??
|
||||
.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
|
@ -97,8 +88,10 @@ macro_rules! make_setting_route {
|
|||
is_deletion: false,
|
||||
allow_index_creation,
|
||||
};
|
||||
let task =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task))
|
||||
.await??
|
||||
.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
|
@ -459,7 +452,10 @@ pub async fn update_all(
|
|||
is_deletion: false,
|
||||
allow_index_creation,
|
||||
};
|
||||
let task = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task))
|
||||
.await??
|
||||
.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
|
@ -489,113 +485,11 @@ pub async fn delete_all(
|
|||
is_deletion: true,
|
||||
allow_index_creation,
|
||||
};
|
||||
let task = tokio::task::spawn_blocking(move || index_scheduler.register(task)).await??;
|
||||
let task: SummarizedTaskView =
|
||||
tokio::task::spawn_blocking(move || index_scheduler.register(task))
|
||||
.await??
|
||||
.into();
|
||||
|
||||
debug!("returns: {:?}", task);
|
||||
Ok(HttpResponse::Accepted().json(task))
|
||||
}
|
||||
|
||||
pub fn settings(index: &Index, rtxn: &RoTxn) -> Result<Settings<Checked>, milli::Error> {
|
||||
let displayed_attributes = index
|
||||
.displayed_fields(rtxn)?
|
||||
.map(|fields| fields.into_iter().map(String::from).collect());
|
||||
|
||||
let searchable_attributes = index
|
||||
.user_defined_searchable_fields(rtxn)?
|
||||
.map(|fields| fields.into_iter().map(String::from).collect());
|
||||
|
||||
let filterable_attributes = index.filterable_fields(rtxn)?.into_iter().collect();
|
||||
|
||||
let sortable_attributes = index.sortable_fields(rtxn)?.into_iter().collect();
|
||||
|
||||
let criteria = index
|
||||
.criteria(rtxn)?
|
||||
.into_iter()
|
||||
.map(|c| c.to_string())
|
||||
.collect();
|
||||
|
||||
let stop_words = index
|
||||
.stop_words(rtxn)?
|
||||
.map(|stop_words| -> Result<BTreeSet<_>, milli::Error> {
|
||||
Ok(stop_words.stream().into_strs()?.into_iter().collect())
|
||||
})
|
||||
.transpose()?
|
||||
.unwrap_or_default();
|
||||
let distinct_field = index.distinct_field(rtxn)?.map(String::from);
|
||||
|
||||
// in milli each word in the synonyms map were split on their separator. Since we lost
|
||||
// this information we are going to put space between words.
|
||||
let synonyms = index
|
||||
.synonyms(rtxn)?
|
||||
.iter()
|
||||
.map(|(key, values)| {
|
||||
(
|
||||
key.join(" "),
|
||||
values.iter().map(|value| value.join(" ")).collect(),
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
let min_typo_word_len = MinWordSizeTyposSetting {
|
||||
one_typo: Setting::Set(index.min_word_len_one_typo(rtxn)?),
|
||||
two_typos: Setting::Set(index.min_word_len_two_typos(rtxn)?),
|
||||
};
|
||||
|
||||
let disabled_words = match index.exact_words(rtxn)? {
|
||||
Some(fst) => fst.into_stream().into_strs()?.into_iter().collect(),
|
||||
None => BTreeSet::new(),
|
||||
};
|
||||
|
||||
let disabled_attributes = index
|
||||
.exact_attributes(rtxn)?
|
||||
.into_iter()
|
||||
.map(String::from)
|
||||
.collect();
|
||||
|
||||
let typo_tolerance = TypoSettings {
|
||||
enabled: Setting::Set(index.authorize_typos(rtxn)?),
|
||||
min_word_size_for_typos: Setting::Set(min_typo_word_len),
|
||||
disable_on_words: Setting::Set(disabled_words),
|
||||
disable_on_attributes: Setting::Set(disabled_attributes),
|
||||
};
|
||||
|
||||
let faceting = FacetingSettings {
|
||||
max_values_per_facet: Setting::Set(
|
||||
index
|
||||
.max_values_per_facet(rtxn)?
|
||||
.unwrap_or(DEFAULT_VALUES_PER_FACET),
|
||||
),
|
||||
};
|
||||
|
||||
let pagination = PaginationSettings {
|
||||
max_total_hits: Setting::Set(
|
||||
index
|
||||
.pagination_max_total_hits(rtxn)?
|
||||
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS),
|
||||
),
|
||||
};
|
||||
|
||||
Ok(Settings {
|
||||
displayed_attributes: match displayed_attributes {
|
||||
Some(attrs) => Setting::Set(attrs),
|
||||
None => Setting::Reset,
|
||||
},
|
||||
searchable_attributes: match searchable_attributes {
|
||||
Some(attrs) => Setting::Set(attrs),
|
||||
None => Setting::Reset,
|
||||
},
|
||||
filterable_attributes: Setting::Set(filterable_attributes),
|
||||
sortable_attributes: Setting::Set(sortable_attributes),
|
||||
ranking_rules: Setting::Set(criteria),
|
||||
stop_words: Setting::Set(stop_words),
|
||||
distinct_attribute: match distinct_field {
|
||||
Some(field) => Setting::Set(field),
|
||||
None => Setting::Reset,
|
||||
},
|
||||
synonyms: Setting::Set(synonyms),
|
||||
typo_tolerance: Setting::Set(typo_tolerance),
|
||||
faceting: Setting::Set(faceting),
|
||||
pagination: Setting::Set(pagination),
|
||||
_kind: PhantomData,
|
||||
})
|
||||
}
|
||||
|
|
|
@ -121,7 +121,7 @@ impl From<Details> for DetailsView {
|
|||
indexed_documents,
|
||||
} => DetailsView {
|
||||
received_documents: Some(received_documents),
|
||||
indexed_documents: Some(indexed_documents),
|
||||
indexed_documents,
|
||||
..DetailsView::default()
|
||||
},
|
||||
Details::Settings { settings } => DetailsView {
|
||||
|
|
|
@ -4,6 +4,7 @@ use std::str::FromStr;
|
|||
use std::time::Instant;
|
||||
|
||||
use either::Either;
|
||||
use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
|
||||
use meilisearch_types::{milli, Document};
|
||||
use milli::tokenizer::TokenizerBuilder;
|
||||
use milli::{
|
||||
|
@ -24,10 +25,6 @@ pub const DEFAULT_CROP_MARKER: fn() -> String = || "…".to_string();
|
|||
pub const DEFAULT_HIGHLIGHT_PRE_TAG: fn() -> String = || "<em>".to_string();
|
||||
pub const DEFAULT_HIGHLIGHT_POST_TAG: fn() -> String = || "</em>".to_string();
|
||||
|
||||
/// The maximimum number of results that the engine
|
||||
/// will be able to return in one search call.
|
||||
pub const DEFAULT_PAGINATION_MAX_TOTAL_HITS: usize = 1000;
|
||||
|
||||
#[derive(Deserialize, Debug, Clone, PartialEq, Eq)]
|
||||
#[serde(rename_all = "camelCase", deny_unknown_fields)]
|
||||
pub struct SearchQuery {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue