1824: Fix indexation perfomances on mounted disk r=ManyTheFish a=ManyTheFish

We were creating all of our tempfiles in data.ms directory, but when the database directory is stored in a mounted disk, tempfiles I/O throughput decreases, impacting the indexation time.

Now, only the persisting tempfiles will be created in the database directory. Other tempfiles will stay in the default tmpdir.

Co-authored-by: many <maxime@meilisearch.com>
This commit is contained in:
bors[bot] 2021-10-18 12:42:47 +00:00 committed by GitHub
commit 1c9ceadd8d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 12 additions and 37 deletions

View File

@ -8,7 +8,6 @@ pub mod analytics;
pub mod helpers; pub mod helpers;
pub mod option; pub mod option;
pub mod routes; pub mod routes;
use std::path::Path;
use std::time::Duration; use std::time::Duration;
use crate::error::MeilisearchHttpError; use crate::error::MeilisearchHttpError;
@ -75,27 +74,6 @@ pub fn setup_meilisearch(opt: &Opt) -> anyhow::Result<MeiliSearch> {
meilisearch.build(opt.db_path.clone(), opt.indexer_options.clone()) meilisearch.build(opt.db_path.clone(), opt.indexer_options.clone())
} }
/// Cleans and setup the temporary file folder in the database directory. This must be done after
/// the meilisearch instance has been created, to not interfere with the snapshot and dump loading.
pub fn setup_temp_dir(db_path: impl AsRef<Path>) -> anyhow::Result<()> {
// Set the tempfile directory in the current db path, to avoid cross device references. Also
// remove the previous outstanding files found there
//
// TODO: if two processes open the same db, one might delete the other tmpdir. Need to make
// sure that no one is using it before deleting it.
let temp_path = db_path.as_ref().join("tmp");
// Ignore error if tempdir doesn't exist
let _ = std::fs::remove_dir_all(&temp_path);
std::fs::create_dir_all(&temp_path)?;
if cfg!(windows) {
std::env::set_var("TMP", temp_path);
} else {
std::env::set_var("TMPDIR", temp_path);
}
Ok(())
}
pub fn configure_data(config: &mut web::ServiceConfig, data: MeiliSearch, opt: &Opt) { pub fn configure_data(config: &mut web::ServiceConfig, data: MeiliSearch, opt: &Opt) {
let http_payload_size_limit = opt.http_payload_size_limit.get_bytes() as usize; let http_payload_size_limit = opt.http_payload_size_limit.get_bytes() as usize;
config config

View File

@ -46,10 +46,6 @@ async fn main() -> anyhow::Result<()> {
let meilisearch = setup_meilisearch(&opt)?; let meilisearch = setup_meilisearch(&opt)?;
// Setup the temp directory to be in the db folder. This is important, since temporary file
// don't support to be persisted accross filesystem boundaries.
meilisearch_http::setup_temp_dir(&opt.db_path)?;
#[cfg(all(not(debug_assertions), feature = "analytics"))] #[cfg(all(not(debug_assertions), feature = "analytics"))]
if !opt.no_analytics { if !opt.no_analytics {
let analytics_data = meilisearch.clone(); let analytics_data = meilisearch.clone();

View File

@ -43,8 +43,9 @@ pub fn load_dump(
patch_settings(settings_path)?; patch_settings(settings_path)?;
} }
let update_path = src.as_ref().join("updates/data.jsonl"); let update_dir = src.as_ref().join("updates");
patch_updates(update_path)?; let update_path = update_dir.join("data.jsonl");
patch_updates(update_dir, update_path)?;
v3::load_dump( v3::load_dump(
meta, meta,
@ -69,7 +70,7 @@ fn patch_settings(path: impl AsRef<Path>) -> anyhow::Result<()> {
// We first deserialize the dump meta into a serde_json::Value and change // We first deserialize the dump meta into a serde_json::Value and change
// the custom ranking rules settings from the old format to the new format. // the custom ranking rules settings from the old format to the new format.
if let Some(ranking_rules) = meta.pointer_mut("/settings/rankingRules") { if let Some(ranking_rules) = meta.pointer_mut("/settings/rankingRules") {
patch_custon_ranking_rules(ranking_rules); patch_custom_ranking_rules(ranking_rules);
} }
let mut meta_file = OpenOptions::new().truncate(true).write(true).open(path)?; let mut meta_file = OpenOptions::new().truncate(true).write(true).open(path)?;
@ -79,8 +80,8 @@ fn patch_settings(path: impl AsRef<Path>) -> anyhow::Result<()> {
Ok(()) Ok(())
} }
fn patch_updates(path: impl AsRef<Path>) -> anyhow::Result<()> { fn patch_updates(dir: impl AsRef<Path>, path: impl AsRef<Path>) -> anyhow::Result<()> {
let mut output_update_file = NamedTempFile::new()?; let mut output_update_file = NamedTempFile::new_in(&dir)?;
let update_file = File::open(&path)?; let update_file = File::open(&path)?;
let stream = Deserializer::from_reader(update_file).into_iter::<compat::UpdateEntry>(); let stream = Deserializer::from_reader(update_file).into_iter::<compat::UpdateEntry>();
@ -104,7 +105,7 @@ fn patch_updates(path: impl AsRef<Path>) -> anyhow::Result<()> {
/// ///
/// This is done for compatibility reasons, and to avoid a new dump version, /// This is done for compatibility reasons, and to avoid a new dump version,
/// since the new syntax was introduced soon after the new dump version. /// since the new syntax was introduced soon after the new dump version.
fn patch_custon_ranking_rules(ranking_rules: &mut Value) { fn patch_custom_ranking_rules(ranking_rules: &mut Value) {
*ranking_rules = match ranking_rules.take() { *ranking_rules = match ranking_rules.take() {
Value::Array(values) => values Value::Array(values) => values
.into_iter() .into_iter()

View File

@ -253,7 +253,7 @@ where
UpdateMsg::dump(&self.update_sender, uuids, temp_dump_path.clone()).await?; UpdateMsg::dump(&self.update_sender, uuids, temp_dump_path.clone()).await?;
let dump_path = tokio::task::spawn_blocking(move || -> Result<PathBuf> { let dump_path = tokio::task::spawn_blocking(move || -> Result<PathBuf> {
let temp_dump_file = tempfile::NamedTempFile::new()?; let temp_dump_file = tempfile::NamedTempFile::new_in(&self.path)?;
to_tar_gz(temp_dump_path, temp_dump_file.path()) to_tar_gz(temp_dump_path, temp_dump_file.path())
.map_err(|e| DumpActorError::Internal(e.into()))?; .map_err(|e| DumpActorError::Internal(e.into()))?;

View File

@ -81,7 +81,7 @@ where
.snapshot_path .snapshot_path
.join(format!("{}.snapshot", self.db_name)); .join(format!("{}.snapshot", self.db_name));
let snapshot_path = spawn_blocking(move || -> anyhow::Result<PathBuf> { let snapshot_path = spawn_blocking(move || -> anyhow::Result<PathBuf> {
let temp_snapshot_file = tempfile::NamedTempFile::new()?; let temp_snapshot_file = tempfile::NamedTempFile::new_in(&snapshot_dir)?;
let temp_snapshot_file_path = temp_snapshot_file.path().to_owned(); let temp_snapshot_file_path = temp_snapshot_file.path().to_owned();
crate::compression::to_tar_gz(temp_snapshot_path, temp_snapshot_file_path)?; crate::compression::to_tar_gz(temp_snapshot_path, temp_snapshot_file_path)?;
temp_snapshot_file.persist(&snapshot_path)?; temp_snapshot_file.persist(&snapshot_path)?;

View File

@ -107,7 +107,7 @@ impl UpdateFileStore {
/// ///
/// A call to `persist` is needed to persist the file in the database. /// A call to `persist` is needed to persist the file in the database.
pub fn new_update(&self) -> Result<(Uuid, UpdateFile)> { pub fn new_update(&self) -> Result<(Uuid, UpdateFile)> {
let file = NamedTempFile::new()?; let file = NamedTempFile::new_in(&self.path)?;
let uuid = Uuid::new_v4(); let uuid = Uuid::new_v4();
let path = self.path.join(uuid.to_string()); let path = self.path.join(uuid.to_string());
let update_file = UpdateFile { file, path }; let update_file = UpdateFile { file, path };
@ -141,7 +141,7 @@ impl UpdateFileStore {
dst.push(&uuid_string); dst.push(&uuid_string);
let update_file = File::open(update_file_path)?; let update_file = File::open(update_file_path)?;
let mut dst_file = NamedTempFile::new()?; let mut dst_file = NamedTempFile::new_in(&dump_path)?;
let mut document_reader = DocumentBatchReader::from_reader(update_file)?; let mut document_reader = DocumentBatchReader::from_reader(update_file)?;
let mut document_buffer = Map::new(); let mut document_buffer = Map::new();

View File

@ -52,7 +52,7 @@ impl UpdateStore {
uuids: &HashSet<Uuid>, uuids: &HashSet<Uuid>,
path: impl AsRef<Path>, path: impl AsRef<Path>,
) -> Result<()> { ) -> Result<()> {
let mut dump_data_file = NamedTempFile::new()?; let mut dump_data_file = NamedTempFile::new_in(&path)?;
self.dump_pending(txn, uuids, &mut dump_data_file, &path)?; self.dump_pending(txn, uuids, &mut dump_data_file, &path)?;
self.dump_completed(txn, uuids, &mut dump_data_file)?; self.dump_completed(txn, uuids, &mut dump_data_file)?;