diff --git a/meilisearch-http/src/index/dump.rs b/meilisearch-http/src/index/dump.rs index dd29aa50a..247c02085 100644 --- a/meilisearch-http/src/index/dump.rs +++ b/meilisearch-http/src/index/dump.rs @@ -1,12 +1,9 @@ -use std::{ - fs::{create_dir_all, File}, - io::{BufRead, BufReader}, - path::Path, - sync::Arc, -}; +use std::fs::{create_dir_all, File}; +use std::io::{BufRead, BufReader, Write}; +use std::path::Path; +use std::sync::Arc; -use anyhow::bail; -use anyhow::Context; +use anyhow::{bail, Context}; use heed::RoTxn; use indexmap::IndexMap; use milli::update::{IndexDocumentsMethod, UpdateFormat::JsonStream}; @@ -55,7 +52,7 @@ impl Index { } serde_json::to_writer(&mut document_file, &json_map)?; - std::io::Write::write(&mut document_file, b"\n")?; + document_file.write_all(b"\n")?; json_map.clear(); } @@ -82,7 +79,7 @@ impl Index { pub fn load_dump( src: impl AsRef, dst: impl AsRef, - size: u64, + size: usize, indexing_options: &IndexerOpts, ) -> anyhow::Result<()> { let dir_name = src @@ -99,7 +96,7 @@ impl Index { primary_key, } = serde_json::from_reader(&mut meta_file)?; let settings = settings.check(); - let index = Self::open(&dst_dir_path, size as usize)?; + let index = Self::open(&dst_dir_path, size)?; let mut txn = index.write_txn()?; let handler = UpdateHandler::new(&indexing_options)?; diff --git a/meilisearch-http/src/index/mod.rs b/meilisearch-http/src/index/mod.rs index 7d9603e9e..790ac58f0 100644 --- a/meilisearch-http/src/index/mod.rs +++ b/meilisearch-http/src/index/mod.rs @@ -1,10 +1,9 @@ +use std::collections::{BTreeSet, HashSet}; +use std::fs::create_dir_all; +use std::marker::PhantomData; use std::ops::Deref; +use std::path::Path; use std::sync::Arc; -use std::{ - collections::{BTreeSet, HashSet}, - marker::PhantomData, - path::Path, -}; use anyhow::{bail, Context}; use heed::{EnvOpenOptions, RoTxn}; @@ -44,7 +43,7 @@ where impl Index { pub fn open(path: impl AsRef, size: usize) -> anyhow::Result { - std::fs::create_dir_all(&path)?; + create_dir_all(&path)?; let mut options = EnvOpenOptions::new(); options.map_size(size); let index = milli::Index::new(options, &path)?; @@ -113,8 +112,6 @@ impl Index { let mut documents = Vec::new(); - println!("fields to display: {:?}", fields_to_display); - for entry in iter { let (_id, obkv) = entry?; let object = obkv_to_json(&fields_to_display, &fields_ids_map, obkv)?; diff --git a/meilisearch-http/src/index/updates.rs b/meilisearch-http/src/index/updates.rs index 5ef6d854e..046823fb7 100644 --- a/meilisearch-http/src/index/updates.rs +++ b/meilisearch-http/src/index/updates.rs @@ -197,10 +197,8 @@ impl Index { builder.update_format(format); builder.index_documents_method(method); - //let indexing_callback = - //|indexing_step, update_id| info!("update {}: {:?}", update_id, indexing_step); - - let indexing_callback = |_, _| (); + let indexing_callback = + |indexing_step, update_id| info!("update {}: {:?}", update_id, indexing_step); let gzipped = false; let addition = match content { diff --git a/meilisearch-http/src/index_controller/dump_actor/actor.rs b/meilisearch-http/src/index_controller/dump_actor/actor.rs index 8ea2e1f6d..c78079de6 100644 --- a/meilisearch-http/src/index_controller/dump_actor/actor.rs +++ b/meilisearch-http/src/index_controller/dump_actor/actor.rs @@ -1,8 +1,6 @@ +use std::collections::HashMap; +use std::path::{Path, PathBuf}; use std::sync::Arc; -use std::{ - collections::HashMap, - path::{Path, PathBuf}, -}; use async_stream::stream; use chrono::Utc; @@ -24,8 +22,8 @@ pub struct DumpActor { dump_path: PathBuf, lock: Arc>, dump_infos: Arc>>, - update_db_size: u64, - index_db_size: u64, + update_db_size: usize, + index_db_size: usize, } /// Generate uid from creation date @@ -43,8 +41,8 @@ where uuid_resolver: UuidResolver, update: Update, dump_path: impl AsRef, - index_db_size: u64, - update_db_size: u64, + index_db_size: usize, + update_db_size: usize, ) -> Self { let dump_infos = Arc::new(RwLock::new(HashMap::new())); let lock = Arc::new(Mutex::new(())); diff --git a/meilisearch-http/src/index_controller/dump_actor/handle_impl.rs b/meilisearch-http/src/index_controller/dump_actor/handle_impl.rs index 3d8665e62..ab91aeae6 100644 --- a/meilisearch-http/src/index_controller/dump_actor/handle_impl.rs +++ b/meilisearch-http/src/index_controller/dump_actor/handle_impl.rs @@ -1,8 +1,10 @@ -use super::{DumpActor, DumpActorHandle, DumpInfo, DumpMsg, DumpResult}; -use actix_web::web::Bytes; use std::path::Path; + +use actix_web::web::Bytes; use tokio::sync::{mpsc, oneshot}; +use super::{DumpActor, DumpActorHandle, DumpInfo, DumpMsg, DumpResult}; + #[derive(Clone)] pub struct DumpActorHandleImpl { sender: mpsc::Sender, @@ -30,8 +32,8 @@ impl DumpActorHandleImpl { path: impl AsRef, uuid_resolver: crate::index_controller::uuid_resolver::UuidResolverHandleImpl, update: crate::index_controller::update_actor::UpdateActorHandleImpl, - index_db_size: u64, - update_db_size: u64, + index_db_size: usize, + update_db_size: usize, ) -> anyhow::Result { let (sender, receiver) = mpsc::channel(10); let actor = DumpActor::new( diff --git a/meilisearch-http/src/index_controller/dump_actor/loaders/v1.rs b/meilisearch-http/src/index_controller/dump_actor/loaders/v1.rs index 70c89664b..89893998e 100644 --- a/meilisearch-http/src/index_controller/dump_actor/loaders/v1.rs +++ b/meilisearch-http/src/index_controller/dump_actor/loaders/v1.rs @@ -1,22 +1,20 @@ -use std::{ - collections::{BTreeMap, BTreeSet}, - fs::File, - io::BufRead, - marker::PhantomData, - path::Path, - sync::Arc, -}; +use std::collections::{BTreeMap, BTreeSet}; +use std::fs::{create_dir_all, File}; +use std::io::BufRead; +use std::marker::PhantomData; +use std::path::Path; +use std::sync::Arc; use heed::EnvOpenOptions; use log::{error, info, warn}; -use milli::update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat}; +use milli::update::{IndexDocumentsMethod, UpdateFormat}; use serde::{Deserialize, Serialize}; use uuid::Uuid; -use crate::{index::deserialize_some, index_controller::uuid_resolver::HeedUuidStore}; +use crate::index_controller::{self, uuid_resolver::HeedUuidStore, IndexMetadata}; use crate::{ - index::{Index, Unchecked}, - index_controller::{self, IndexMetadata}, + index::{deserialize_some, update_handler::UpdateHandler, Index, Unchecked}, + option::IndexerOpts, }; #[derive(Serialize, Deserialize, Debug)] @@ -32,28 +30,33 @@ impl MetadataV1 { src: impl AsRef, dst: impl AsRef, size: usize, + indexer_options: &IndexerOpts, ) -> anyhow::Result<()> { info!( "Loading dump, dump database version: {}, dump version: V1", self.db_version ); - dbg!("here"); - let uuid_store = HeedUuidStore::new(&dst)?; - dbg!("here"); for index in self.indexes { let uuid = Uuid::new_v4(); uuid_store.insert(index.uid.clone(), uuid)?; let src = src.as_ref().join(index.uid); - load_index(&src, &dst, uuid, index.meta.primary_key.as_deref(), size)?; + load_index( + &src, + &dst, + uuid, + index.meta.primary_key.as_deref(), + size, + indexer_options, + )?; } Ok(()) } } -//This is the settings used in the last version of meilisearch exporting dump in V1 +// These are the settings used in legacy meilisearch (, size: usize, + indexer_options: &IndexerOpts, ) -> anyhow::Result<()> { let index_path = dst.as_ref().join(&format!("indexes/index-{}", uuid)); - std::fs::create_dir_all(&index_path)?; + create_dir_all(&index_path)?; let mut options = EnvOpenOptions::new(); options.map_size(size); let index = milli::Index::new(options, index_path)?; @@ -99,31 +103,37 @@ fn load_index( // extract `settings.json` file and import content let settings = import_settings(&src)?; let settings: index_controller::Settings = settings.into(); - let update_builder = UpdateBuilder::new(0); - index.update_settings(&settings.check(), update_builder)?; - let update_builder = UpdateBuilder::new(0); + let mut txn = index.write_txn()?; + + let handler = UpdateHandler::new(&indexer_options)?; + + index.update_settings_txn(&mut txn, &settings.check(), handler.update_builder(0))?; + let file = File::open(&src.as_ref().join("documents.jsonl"))?; let mut reader = std::io::BufReader::new(file); reader.fill_buf()?; if !reader.buffer().is_empty() { - index.update_documents( + index.update_documents_txn( + &mut txn, UpdateFormat::JsonStream, IndexDocumentsMethod::ReplaceDocuments, Some(reader), - update_builder, + handler.update_builder(0), primary_key, )?; } - // the last step: we extract the original milli::Index and close it + txn.commit()?; + + // Finaly, we extract the original milli::Index and close it Arc::try_unwrap(index.0) - .map_err(|_e| "[dumps] At this point no one is supposed to have a reference on the index") + .map_err(|_e| "Couln't close index properly") .unwrap() .prepare_for_closing() .wait(); - // Ignore updates in v1. + // Updates are ignored in dumps V1. Ok(()) } @@ -172,7 +182,7 @@ impl From for index_controller::Settings { /// Extract Settings from `settings.json` file present at provided `dir_path` fn import_settings(dir_path: impl AsRef) -> anyhow::Result { - let path = dbg!(dir_path.as_ref().join("settings.json")); + let path = dir_path.as_ref().join("settings.json"); let file = File::open(path)?; let reader = std::io::BufReader::new(file); let metadata = serde_json::from_reader(reader)?; diff --git a/meilisearch-http/src/index_controller/dump_actor/loaders/v2.rs b/meilisearch-http/src/index_controller/dump_actor/loaders/v2.rs index 96001902d..eddd8a3b7 100644 --- a/meilisearch-http/src/index_controller/dump_actor/loaders/v2.rs +++ b/meilisearch-http/src/index_controller/dump_actor/loaders/v2.rs @@ -4,23 +4,21 @@ use chrono::{DateTime, Utc}; use log::info; use serde::{Deserialize, Serialize}; -use crate::{ - index::Index, - index_controller::{update_actor::UpdateStore, uuid_resolver::HeedUuidStore}, - option::IndexerOpts, -}; +use crate::index::Index; +use crate::index_controller::{update_actor::UpdateStore, uuid_resolver::HeedUuidStore}; +use crate::option::IndexerOpts; #[derive(Serialize, Deserialize, Debug)] #[serde(rename_all = "camelCase")] pub struct MetadataV2 { db_version: String, - index_db_size: u64, - update_db_size: u64, + index_db_size: usize, + update_db_size: usize, dump_date: DateTime, } impl MetadataV2 { - pub fn new(index_db_size: u64, update_db_size: u64) -> Self { + pub fn new(index_db_size: usize, update_db_size: usize) -> Self { Self { db_version: env!("CARGO_PKG_VERSION").to_string(), index_db_size, @@ -33,9 +31,8 @@ impl MetadataV2 { self, src: impl AsRef, dst: impl AsRef, - // TODO: use these variable to test if loading the index is possible. - _index_db_size: u64, - _update_db_size: u64, + index_db_size: usize, + update_db_size: usize, indexing_options: &IndexerOpts, ) -> anyhow::Result<()> { info!( @@ -47,14 +44,14 @@ impl MetadataV2 { HeedUuidStore::load_dump(src.as_ref(), &dst)?; info!("Loading updates."); - UpdateStore::load_dump(&src, &dst, self.update_db_size)?; + UpdateStore::load_dump(&src, &dst, update_db_size)?; - info!("Loading indexes"); + info!("Loading indexes."); let indexes_path = src.as_ref().join("indexes"); let indexes = indexes_path.read_dir()?; for index in indexes { let index = index?; - Index::load_dump(&index.path(), &dst, self.index_db_size, indexing_options)?; + Index::load_dump(&index.path(), &dst, index_db_size, indexing_options)?; } Ok(()) diff --git a/meilisearch-http/src/index_controller/dump_actor/mod.rs b/meilisearch-http/src/index_controller/dump_actor/mod.rs index dde04bc12..0bddaf7a3 100644 --- a/meilisearch-http/src/index_controller/dump_actor/mod.rs +++ b/meilisearch-http/src/index_controller/dump_actor/mod.rs @@ -8,6 +8,7 @@ use log::{error, info, warn}; use mockall::automock; use serde::{Deserialize, Serialize}; use thiserror::Error; +use tokio::fs::create_dir_all; use loaders::v1::MetadataV1; use loaders::v2::MetadataV2; @@ -15,7 +16,6 @@ use loaders::v2::MetadataV2; pub use actor::DumpActor; pub use handle_impl::*; pub use message::DumpMsg; -use tokio::fs::create_dir_all; use super::{update_actor::UpdateActorHandle, uuid_resolver::UuidResolverHandle}; use crate::{helpers::compression, option::IndexerOpts}; @@ -61,7 +61,7 @@ pub enum Metadata { } impl Metadata { - pub fn new_v2(index_db_size: u64, update_db_size: u64) -> Self { + pub fn new_v2(index_db_size: usize, update_db_size: usize) -> Self { let meta = MetadataV2::new(index_db_size, update_db_size); Self::V2(meta) } @@ -117,8 +117,8 @@ impl DumpInfo { pub fn load_dump( dst_path: impl AsRef, src_path: impl AsRef, - index_db_size: u64, - update_db_size: u64, + index_db_size: usize, + update_db_size: usize, indexer_opts: &IndexerOpts, ) -> anyhow::Result<()> { let tmp_src = tempfile::tempdir_in(".")?; @@ -139,7 +139,7 @@ pub fn load_dump( match meta { Metadata::V1(meta) => { - meta.load_dump(&tmp_src_path, tmp_dst.path(), index_db_size as usize)? + meta.load_dump(&tmp_src_path, tmp_dst.path(), index_db_size, indexer_opts)? } Metadata::V2(meta) => meta.load_dump( &tmp_src_path, @@ -166,8 +166,8 @@ struct DumpTask { uuid_resolver: U, update_handle: P, uid: String, - update_db_size: u64, - index_db_size: u64, + update_db_size: usize, + index_db_size: usize, } impl DumpTask diff --git a/meilisearch-http/src/index_controller/mod.rs b/meilisearch-http/src/index_controller/mod.rs index 18ba6dee3..0615bb731 100644 --- a/meilisearch-http/src/index_controller/mod.rs +++ b/meilisearch-http/src/index_controller/mod.rs @@ -97,8 +97,8 @@ impl IndexController { load_dump( &options.db_path, src_path, - options.max_mdb_size.get_bytes(), - options.max_udb_size.get_bytes(), + options.max_mdb_size.get_bytes() as usize, + options.max_udb_size.get_bytes() as usize, &options.indexer_options, )?; } @@ -116,8 +116,8 @@ impl IndexController { &options.dumps_dir, uuid_resolver.clone(), update_handle.clone(), - options.max_mdb_size.get_bytes(), - options.max_udb_size.get_bytes(), + options.max_mdb_size.get_bytes() as usize, + options.max_udb_size.get_bytes() as usize, )?; if options.schedule_snapshot { diff --git a/meilisearch-http/src/index_controller/update_actor/actor.rs b/meilisearch-http/src/index_controller/update_actor/actor.rs index 40bba4e2b..7779f2556 100644 --- a/meilisearch-http/src/index_controller/update_actor/actor.rs +++ b/meilisearch-http/src/index_controller/update_actor/actor.rs @@ -197,7 +197,7 @@ where async fn handle_dump(&self, uuids: HashSet, path: PathBuf) -> Result<()> { let index_handle = self.index_handle.clone(); let update_store = self.store.clone(); - println!("starting dump"); + tokio::task::spawn_blocking(move || -> anyhow::Result<()> { update_store.dump(&uuids, path.to_path_buf(), index_handle)?; Ok(()) diff --git a/meilisearch-http/src/index_controller/update_actor/store/dump.rs b/meilisearch-http/src/index_controller/update_actor/store/dump.rs index 6dfb300e2..8f947e459 100644 --- a/meilisearch-http/src/index_controller/update_actor/store/dump.rs +++ b/meilisearch-http/src/index_controller/update_actor/store/dump.rs @@ -129,7 +129,7 @@ impl UpdateStore { pub fn load_dump( src: impl AsRef, dst: impl AsRef, - db_size: u64, + db_size: usize, ) -> anyhow::Result<()> { let dst_update_path = dst.as_ref().join("updates/"); create_dir_all(&dst_update_path)?; diff --git a/meilisearch-http/src/index_controller/update_actor/store/mod.rs b/meilisearch-http/src/index_controller/update_actor/store/mod.rs index 006549fb6..28204f4c0 100644 --- a/meilisearch-http/src/index_controller/update_actor/store/mod.rs +++ b/meilisearch-http/src/index_controller/update_actor/store/mod.rs @@ -589,9 +589,7 @@ mod test { let uuid = Uuid::new_v4(); let store_clone = update_store.clone(); tokio::task::spawn_blocking(move || { - store_clone - .register_update(meta, None, uuid) - .unwrap(); + store_clone.register_update(meta, None, uuid).unwrap(); }) .await .unwrap(); diff --git a/meilisearch-http/src/index_controller/uuid_resolver/store.rs b/meilisearch-http/src/index_controller/uuid_resolver/store.rs index 6289cefcd..1d6ada269 100644 --- a/meilisearch-http/src/index_controller/uuid_resolver/store.rs +++ b/meilisearch-http/src/index_controller/uuid_resolver/store.rs @@ -1,14 +1,10 @@ +use std::collections::HashSet; use std::fs::{create_dir_all, File}; +use std::io::{BufRead, BufReader, Write}; use std::path::{Path, PathBuf}; -use std::{ - collections::HashSet, - io::{BufRead, BufReader, Write}, -}; -use heed::{ - types::{ByteSlice, Str}, - CompactionOption, Database, Env, EnvOpenOptions, -}; +use heed::types::{ByteSlice, Str}; +use heed::{CompactionOption, Database, Env, EnvOpenOptions}; use serde::{Deserialize, Serialize}; use uuid::Uuid; @@ -21,6 +17,8 @@ struct DumpEntry { uid: String, } +const UUIDS_DB_PATH: &str = "index_uuids"; + #[async_trait::async_trait] pub trait UuidStore: Sized { // Create a new entry for `name`. Return an error if `err` and the entry already exists, return @@ -43,7 +41,7 @@ pub struct HeedUuidStore { impl HeedUuidStore { pub fn new(path: impl AsRef) -> anyhow::Result { - let path = path.as_ref().join("index_uuids"); + let path = path.as_ref().join(UUIDS_DB_PATH); create_dir_all(&path)?; let mut options = EnvOpenOptions::new(); options.map_size(UUID_STORE_SIZE); // 1GB @@ -137,7 +135,7 @@ impl HeedUuidStore { // only perform snapshot if there are indexes if !entries.is_empty() { - path.push("index_uuids"); + path.push(UUIDS_DB_PATH); create_dir_all(&path).unwrap(); path.push("data.mdb"); env.copy_to_path(path, CompactionOption::Enabled)?; @@ -150,7 +148,7 @@ impl HeedUuidStore { } pub fn dump(&self, path: PathBuf) -> Result> { - let dump_path = path.join("index_uuids"); + let dump_path = path.join(UUIDS_DB_PATH); create_dir_all(&dump_path)?; let dump_file_path = dump_path.join("data.jsonl"); let mut dump_file = File::create(&dump_file_path)?; @@ -173,10 +171,10 @@ impl HeedUuidStore { } pub fn load_dump(src: impl AsRef, dst: impl AsRef) -> anyhow::Result<()> { - let uuid_resolver_path = dst.as_ref().join("uuid_resolver/"); + let uuid_resolver_path = dst.as_ref().join(UUIDS_DB_PATH); std::fs::create_dir_all(&uuid_resolver_path)?; - let src_indexes = src.as_ref().join("index_uuids/data.jsonl"); + let src_indexes = src.as_ref().join(UUIDS_DB_PATH).join("data.jsonl"); let indexes = File::open(&src_indexes)?; let mut indexes = BufReader::new(indexes); let mut line = String::new();