mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 23:04:26 +01:00
Merge #1824
1824: Fix indexation perfomances on mounted disk r=ManyTheFish a=ManyTheFish We were creating all of our tempfiles in data.ms directory, but when the database directory is stored in a mounted disk, tempfiles I/O throughput decreases, impacting the indexation time. Now, only the persisting tempfiles will be created in the database directory. Other tempfiles will stay in the default tmpdir. Co-authored-by: many <maxime@meilisearch.com>
This commit is contained in:
commit
1c9ceadd8d
@ -8,7 +8,6 @@ pub mod analytics;
|
|||||||
pub mod helpers;
|
pub mod helpers;
|
||||||
pub mod option;
|
pub mod option;
|
||||||
pub mod routes;
|
pub mod routes;
|
||||||
use std::path::Path;
|
|
||||||
use std::time::Duration;
|
use std::time::Duration;
|
||||||
|
|
||||||
use crate::error::MeilisearchHttpError;
|
use crate::error::MeilisearchHttpError;
|
||||||
@ -75,27 +74,6 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<MeiliSearch> {
|
|||||||
meilisearch.build(opt.db_path.clone(), opt.indexer_options.clone())
|
meilisearch.build(opt.db_path.clone(), opt.indexer_options.clone())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Cleans and setup the temporary file folder in the database directory. This must be done after
|
|
||||||
/// the meilisearch instance has been created, to not interfere with the snapshot and dump loading.
|
|
||||||
pub fn setup_temp_dir(db_path: impl AsRef<Path>) -> anyhow::Result<()> {
|
|
||||||
// Set the tempfile directory in the current db path, to avoid cross device references. Also
|
|
||||||
// remove the previous outstanding files found there
|
|
||||||
//
|
|
||||||
// TODO: if two processes open the same db, one might delete the other tmpdir. Need to make
|
|
||||||
// sure that no one is using it before deleting it.
|
|
||||||
let temp_path = db_path.as_ref().join("tmp");
|
|
||||||
// Ignore error if tempdir doesn't exist
|
|
||||||
let _ = std::fs::remove_dir_all(&temp_path);
|
|
||||||
std::fs::create_dir_all(&temp_path)?;
|
|
||||||
if cfg!(windows) {
|
|
||||||
std::env::set_var("TMP", temp_path);
|
|
||||||
} else {
|
|
||||||
std::env::set_var("TMPDIR", temp_path);
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn configure_data(config: &mut web::ServiceConfig, data: MeiliSearch, opt: &Opt) {
|
pub fn configure_data(config: &mut web::ServiceConfig, data: MeiliSearch, opt: &Opt) {
|
||||||
let http_payload_size_limit = opt.http_payload_size_limit.get_bytes() as usize;
|
let http_payload_size_limit = opt.http_payload_size_limit.get_bytes() as usize;
|
||||||
config
|
config
|
||||||
|
@ -46,10 +46,6 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
|
|
||||||
let meilisearch = setup_meilisearch(&opt)?;
|
let meilisearch = setup_meilisearch(&opt)?;
|
||||||
|
|
||||||
// Setup the temp directory to be in the db folder. This is important, since temporary file
|
|
||||||
// don't support to be persisted accross filesystem boundaries.
|
|
||||||
meilisearch_http::setup_temp_dir(&opt.db_path)?;
|
|
||||||
|
|
||||||
#[cfg(all(not(debug_assertions), feature = "analytics"))]
|
#[cfg(all(not(debug_assertions), feature = "analytics"))]
|
||||||
if !opt.no_analytics {
|
if !opt.no_analytics {
|
||||||
let analytics_data = meilisearch.clone();
|
let analytics_data = meilisearch.clone();
|
||||||
|
@ -43,8 +43,9 @@ pub fn load_dump(
|
|||||||
patch_settings(settings_path)?;
|
patch_settings(settings_path)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
let update_path = src.as_ref().join("updates/data.jsonl");
|
let update_dir = src.as_ref().join("updates");
|
||||||
patch_updates(update_path)?;
|
let update_path = update_dir.join("data.jsonl");
|
||||||
|
patch_updates(update_dir, update_path)?;
|
||||||
|
|
||||||
v3::load_dump(
|
v3::load_dump(
|
||||||
meta,
|
meta,
|
||||||
@ -69,7 +70,7 @@ fn patch_settings(path: impl AsRef<Path>) -> anyhow::Result<()> {
|
|||||||
// We first deserialize the dump meta into a serde_json::Value and change
|
// We first deserialize the dump meta into a serde_json::Value and change
|
||||||
// the custom ranking rules settings from the old format to the new format.
|
// the custom ranking rules settings from the old format to the new format.
|
||||||
if let Some(ranking_rules) = meta.pointer_mut("/settings/rankingRules") {
|
if let Some(ranking_rules) = meta.pointer_mut("/settings/rankingRules") {
|
||||||
patch_custon_ranking_rules(ranking_rules);
|
patch_custom_ranking_rules(ranking_rules);
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut meta_file = OpenOptions::new().truncate(true).write(true).open(path)?;
|
let mut meta_file = OpenOptions::new().truncate(true).write(true).open(path)?;
|
||||||
@ -79,8 +80,8 @@ fn patch_settings(path: impl AsRef<Path>) -> anyhow::Result<()> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn patch_updates(path: impl AsRef<Path>) -> anyhow::Result<()> {
|
fn patch_updates(dir: impl AsRef<Path>, path: impl AsRef<Path>) -> anyhow::Result<()> {
|
||||||
let mut output_update_file = NamedTempFile::new()?;
|
let mut output_update_file = NamedTempFile::new_in(&dir)?;
|
||||||
let update_file = File::open(&path)?;
|
let update_file = File::open(&path)?;
|
||||||
|
|
||||||
let stream = Deserializer::from_reader(update_file).into_iter::<compat::UpdateEntry>();
|
let stream = Deserializer::from_reader(update_file).into_iter::<compat::UpdateEntry>();
|
||||||
@ -104,7 +105,7 @@ fn patch_updates(path: impl AsRef<Path>) -> anyhow::Result<()> {
|
|||||||
///
|
///
|
||||||
/// This is done for compatibility reasons, and to avoid a new dump version,
|
/// This is done for compatibility reasons, and to avoid a new dump version,
|
||||||
/// since the new syntax was introduced soon after the new dump version.
|
/// since the new syntax was introduced soon after the new dump version.
|
||||||
fn patch_custon_ranking_rules(ranking_rules: &mut Value) {
|
fn patch_custom_ranking_rules(ranking_rules: &mut Value) {
|
||||||
*ranking_rules = match ranking_rules.take() {
|
*ranking_rules = match ranking_rules.take() {
|
||||||
Value::Array(values) => values
|
Value::Array(values) => values
|
||||||
.into_iter()
|
.into_iter()
|
||||||
|
@ -253,7 +253,7 @@ where
|
|||||||
UpdateMsg::dump(&self.update_sender, uuids, temp_dump_path.clone()).await?;
|
UpdateMsg::dump(&self.update_sender, uuids, temp_dump_path.clone()).await?;
|
||||||
|
|
||||||
let dump_path = tokio::task::spawn_blocking(move || -> Result<PathBuf> {
|
let dump_path = tokio::task::spawn_blocking(move || -> Result<PathBuf> {
|
||||||
let temp_dump_file = tempfile::NamedTempFile::new()?;
|
let temp_dump_file = tempfile::NamedTempFile::new_in(&self.path)?;
|
||||||
to_tar_gz(temp_dump_path, temp_dump_file.path())
|
to_tar_gz(temp_dump_path, temp_dump_file.path())
|
||||||
.map_err(|e| DumpActorError::Internal(e.into()))?;
|
.map_err(|e| DumpActorError::Internal(e.into()))?;
|
||||||
|
|
||||||
|
@ -81,7 +81,7 @@ where
|
|||||||
.snapshot_path
|
.snapshot_path
|
||||||
.join(format!("{}.snapshot", self.db_name));
|
.join(format!("{}.snapshot", self.db_name));
|
||||||
let snapshot_path = spawn_blocking(move || -> anyhow::Result<PathBuf> {
|
let snapshot_path = spawn_blocking(move || -> anyhow::Result<PathBuf> {
|
||||||
let temp_snapshot_file = tempfile::NamedTempFile::new()?;
|
let temp_snapshot_file = tempfile::NamedTempFile::new_in(&snapshot_dir)?;
|
||||||
let temp_snapshot_file_path = temp_snapshot_file.path().to_owned();
|
let temp_snapshot_file_path = temp_snapshot_file.path().to_owned();
|
||||||
crate::compression::to_tar_gz(temp_snapshot_path, temp_snapshot_file_path)?;
|
crate::compression::to_tar_gz(temp_snapshot_path, temp_snapshot_file_path)?;
|
||||||
temp_snapshot_file.persist(&snapshot_path)?;
|
temp_snapshot_file.persist(&snapshot_path)?;
|
||||||
|
@ -107,7 +107,7 @@ impl UpdateFileStore {
|
|||||||
///
|
///
|
||||||
/// A call to `persist` is needed to persist the file in the database.
|
/// A call to `persist` is needed to persist the file in the database.
|
||||||
pub fn new_update(&self) -> Result<(Uuid, UpdateFile)> {
|
pub fn new_update(&self) -> Result<(Uuid, UpdateFile)> {
|
||||||
let file = NamedTempFile::new()?;
|
let file = NamedTempFile::new_in(&self.path)?;
|
||||||
let uuid = Uuid::new_v4();
|
let uuid = Uuid::new_v4();
|
||||||
let path = self.path.join(uuid.to_string());
|
let path = self.path.join(uuid.to_string());
|
||||||
let update_file = UpdateFile { file, path };
|
let update_file = UpdateFile { file, path };
|
||||||
@ -141,7 +141,7 @@ impl UpdateFileStore {
|
|||||||
dst.push(&uuid_string);
|
dst.push(&uuid_string);
|
||||||
|
|
||||||
let update_file = File::open(update_file_path)?;
|
let update_file = File::open(update_file_path)?;
|
||||||
let mut dst_file = NamedTempFile::new()?;
|
let mut dst_file = NamedTempFile::new_in(&dump_path)?;
|
||||||
let mut document_reader = DocumentBatchReader::from_reader(update_file)?;
|
let mut document_reader = DocumentBatchReader::from_reader(update_file)?;
|
||||||
|
|
||||||
let mut document_buffer = Map::new();
|
let mut document_buffer = Map::new();
|
||||||
|
@ -52,7 +52,7 @@ impl UpdateStore {
|
|||||||
uuids: &HashSet<Uuid>,
|
uuids: &HashSet<Uuid>,
|
||||||
path: impl AsRef<Path>,
|
path: impl AsRef<Path>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let mut dump_data_file = NamedTempFile::new()?;
|
let mut dump_data_file = NamedTempFile::new_in(&path)?;
|
||||||
|
|
||||||
self.dump_pending(txn, uuids, &mut dump_data_file, &path)?;
|
self.dump_pending(txn, uuids, &mut dump_data_file, &path)?;
|
||||||
self.dump_completed(txn, uuids, &mut dump_data_file)?;
|
self.dump_completed(txn, uuids, &mut dump_data_file)?;
|
||||||
|
Loading…
Reference in New Issue
Block a user