mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 20:37:15 +02:00
[WIP] rebase on main
This commit is contained in:
parent
c3552cecdf
commit
efca63f9ce
10 changed files with 381 additions and 87 deletions
200
meilisearch-http/src/index_controller/dump_actor/actor.rs
Normal file
200
meilisearch-http/src/index_controller/dump_actor/actor.rs
Normal file
|
@ -0,0 +1,200 @@
|
|||
use super::{DumpError, DumpInfo, DumpMsg, DumpResult, DumpStatus};
|
||||
use crate::helpers::compression;
|
||||
use crate::index_controller::{index_actor, update_actor, uuid_resolver, IndexMetadata};
|
||||
use chrono::Utc;
|
||||
use log::{error, info, warn};
|
||||
use std::{
|
||||
collections::HashSet,
|
||||
path::{Path, PathBuf},
|
||||
sync::Arc,
|
||||
};
|
||||
use tokio::sync::{mpsc, Mutex};
|
||||
use uuid::Uuid;
|
||||
|
||||
pub struct DumpActor<UuidResolver, Index, Update> {
|
||||
inbox: mpsc::Receiver<DumpMsg>,
|
||||
inner: InnerDump<UuidResolver, Index, Update>,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
struct InnerDump<UuidResolver, Index, Update> {
|
||||
pub uuid_resolver: UuidResolver,
|
||||
pub index: Index,
|
||||
pub update: Update,
|
||||
pub dump_path: PathBuf,
|
||||
pub dump_info: Arc<Mutex<Option<DumpInfo>>>,
|
||||
}
|
||||
|
||||
/// Generate uid from creation date
|
||||
fn generate_uid() -> String {
|
||||
Utc::now().format("%Y%m%d-%H%M%S%3f").to_string()
|
||||
}
|
||||
|
||||
impl<UuidResolver, Index, Update> DumpActor<UuidResolver, Index, Update>
|
||||
where
|
||||
UuidResolver: uuid_resolver::UuidResolverHandle + Send + Sync + Clone + 'static,
|
||||
Index: index_actor::IndexActorHandle + Send + Sync + Clone + 'static,
|
||||
Update: update_actor::UpdateActorHandle + Send + Sync + Clone + 'static,
|
||||
{
|
||||
pub fn new(
|
||||
inbox: mpsc::Receiver<DumpMsg>,
|
||||
uuid_resolver: UuidResolver,
|
||||
index: Index,
|
||||
update: Update,
|
||||
dump_path: impl AsRef<Path>,
|
||||
) -> Self {
|
||||
Self {
|
||||
inbox,
|
||||
inner: InnerDump {
|
||||
uuid_resolver,
|
||||
index,
|
||||
update,
|
||||
dump_path: dump_path.as_ref().into(),
|
||||
dump_info: Arc::new(Mutex::new(None)),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn run(mut self) {
|
||||
use DumpMsg::*;
|
||||
|
||||
info!("Started dump actor.");
|
||||
|
||||
loop {
|
||||
match self.inbox.recv().await {
|
||||
Some(CreateDump { ret }) => {
|
||||
let _ = ret.send(self.inner.clone().handle_create_dump().await);
|
||||
}
|
||||
Some(DumpInfo { ret, uid }) => {
|
||||
let _ = ret.send(self.inner.handle_dump_info(uid).await);
|
||||
}
|
||||
None => break,
|
||||
}
|
||||
}
|
||||
|
||||
error!("Dump actor stopped.");
|
||||
}
|
||||
}
|
||||
|
||||
impl<UuidResolver, Index, Update> InnerDump<UuidResolver, Index, Update>
|
||||
where
|
||||
UuidResolver: uuid_resolver::UuidResolverHandle + Send + Sync + Clone + 'static,
|
||||
Index: index_actor::IndexActorHandle + Send + Sync + Clone + 'static,
|
||||
Update: update_actor::UpdateActorHandle + Send + Sync + Clone + 'static,
|
||||
{
|
||||
async fn handle_create_dump(self) -> DumpResult<DumpInfo> {
|
||||
if self.is_running().await {
|
||||
return Err(DumpError::DumpAlreadyRunning);
|
||||
}
|
||||
let uid = generate_uid();
|
||||
let info = DumpInfo::new(uid.clone(), DumpStatus::InProgress);
|
||||
*self.dump_info.lock().await = Some(info.clone());
|
||||
|
||||
let this = self.clone();
|
||||
|
||||
tokio::task::spawn(async move {
|
||||
match this.perform_dump(uid).await {
|
||||
Ok(()) => {
|
||||
if let Some(ref mut info) = *self.dump_info.lock().await {
|
||||
info.done();
|
||||
} else {
|
||||
warn!("dump actor was in an inconsistant state");
|
||||
}
|
||||
info!("Dump succeed");
|
||||
}
|
||||
Err(e) => {
|
||||
if let Some(ref mut info) = *self.dump_info.lock().await {
|
||||
info.with_error(e.to_string());
|
||||
} else {
|
||||
warn!("dump actor was in an inconsistant state");
|
||||
}
|
||||
error!("Dump failed: {}", e);
|
||||
}
|
||||
};
|
||||
});
|
||||
|
||||
Ok(info)
|
||||
}
|
||||
|
||||
async fn perform_dump(self, uid: String) -> anyhow::Result<()> {
|
||||
info!("Performing dump.");
|
||||
|
||||
let dump_dir = self.dump_path.clone();
|
||||
tokio::fs::create_dir_all(&dump_dir).await?;
|
||||
let temp_dump_dir =
|
||||
tokio::task::spawn_blocking(move || tempfile::tempdir_in(dump_dir)).await??;
|
||||
let temp_dump_path = temp_dump_dir.path().to_owned();
|
||||
|
||||
let uuids = self.uuid_resolver.list().await?;
|
||||
// maybe we could just keep the vec as-is
|
||||
let uuids: HashSet<(String, Uuid)> = uuids.into_iter().collect();
|
||||
|
||||
if uuids.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let indexes = self.list_indexes().await?;
|
||||
|
||||
// we create one directory by index
|
||||
for meta in indexes.iter() {
|
||||
tokio::fs::create_dir(temp_dump_path.join(&meta.uid)).await?;
|
||||
}
|
||||
|
||||
let metadata = super::Metadata::new(indexes, env!("CARGO_PKG_VERSION").to_string());
|
||||
metadata.to_path(&temp_dump_path).await?;
|
||||
|
||||
self.update.dump(uuids, temp_dump_path.clone()).await?;
|
||||
|
||||
let dump_dir = self.dump_path.clone();
|
||||
let dump_path = self.dump_path.join(format!("{}.dump", uid));
|
||||
let dump_path = tokio::task::spawn_blocking(move || -> anyhow::Result<PathBuf> {
|
||||
let temp_dump_file = tempfile::NamedTempFile::new_in(dump_dir)?;
|
||||
let temp_dump_file_path = temp_dump_file.path().to_owned();
|
||||
compression::to_tar_gz(temp_dump_path, temp_dump_file_path)?;
|
||||
temp_dump_file.persist(&dump_path)?;
|
||||
Ok(dump_path)
|
||||
})
|
||||
.await??;
|
||||
|
||||
info!("Created dump in {:?}.", dump_path);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
async fn list_indexes(&self) -> anyhow::Result<Vec<IndexMetadata>> {
|
||||
let uuids = self.uuid_resolver.list().await?;
|
||||
|
||||
let mut ret = Vec::new();
|
||||
|
||||
for (uid, uuid) in uuids {
|
||||
let meta = self.index.get_index_meta(uuid).await?;
|
||||
let meta = IndexMetadata {
|
||||
uuid,
|
||||
name: uid.clone(),
|
||||
uid,
|
||||
meta,
|
||||
};
|
||||
ret.push(meta);
|
||||
}
|
||||
|
||||
Ok(ret)
|
||||
}
|
||||
|
||||
async fn handle_dump_info(&self, uid: String) -> DumpResult<DumpInfo> {
|
||||
match &*self.dump_info.lock().await {
|
||||
None => Err(DumpError::DumpDoesNotExist(uid)),
|
||||
Some(DumpInfo { uid: ref s, .. }) if &uid != s => Err(DumpError::DumpDoesNotExist(uid)),
|
||||
Some(info) => Ok(info.clone()),
|
||||
}
|
||||
}
|
||||
|
||||
async fn is_running(&self) -> bool {
|
||||
matches!(
|
||||
*self.dump_info.lock().await,
|
||||
Some(DumpInfo {
|
||||
status: DumpStatus::InProgress,
|
||||
..
|
||||
})
|
||||
)
|
||||
}
|
||||
}
|
|
@ -0,0 +1,41 @@
|
|||
use std::path::{Path};
|
||||
use actix_web::web::Bytes;
|
||||
use tokio::sync::{mpsc, oneshot};
|
||||
use super::{DumpActor, DumpActorHandle, DumpInfo, DumpMsg, DumpResult};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct DumpActorHandleImpl {
|
||||
sender: mpsc::Sender<DumpMsg>,
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
impl DumpActorHandle for DumpActorHandleImpl {
|
||||
async fn create_dump(&self) -> DumpResult<DumpInfo> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = DumpMsg::CreateDump { ret };
|
||||
let _ = self.sender.send(msg).await;
|
||||
receiver.await.expect("IndexActor has been killed")
|
||||
}
|
||||
|
||||
async fn dump_info(&self, uid: String) -> DumpResult<DumpInfo> {
|
||||
let (ret, receiver) = oneshot::channel();
|
||||
let msg = DumpMsg::DumpInfo { ret, uid };
|
||||
let _ = self.sender.send(msg).await;
|
||||
receiver.await.expect("IndexActor has been killed")
|
||||
}
|
||||
}
|
||||
|
||||
impl DumpActorHandleImpl {
|
||||
pub fn new(
|
||||
path: impl AsRef<Path>,
|
||||
uuid_resolver: crate::index_controller::uuid_resolver::UuidResolverHandleImpl,
|
||||
index: crate::index_controller::index_actor::IndexActorHandleImpl,
|
||||
update: crate::index_controller::update_actor::UpdateActorHandleImpl<Bytes>,
|
||||
) -> anyhow::Result<Self> {
|
||||
let (sender, receiver) = mpsc::channel(10);
|
||||
let actor = DumpActor::new(receiver, uuid_resolver, index, update, path);
|
||||
|
||||
tokio::task::spawn(actor.run());
|
||||
Ok(Self { sender })
|
||||
}
|
||||
}
|
15
meilisearch-http/src/index_controller/dump_actor/message.rs
Normal file
15
meilisearch-http/src/index_controller/dump_actor/message.rs
Normal file
|
@ -0,0 +1,15 @@
|
|||
use tokio::sync::oneshot;
|
||||
|
||||
use super::{DumpResult, DumpInfo};
|
||||
|
||||
|
||||
pub enum DumpMsg {
|
||||
CreateDump {
|
||||
ret: oneshot::Sender<DumpResult<DumpInfo>>,
|
||||
},
|
||||
DumpInfo {
|
||||
uid: String,
|
||||
ret: oneshot::Sender<DumpResult<DumpInfo>>,
|
||||
},
|
||||
}
|
||||
|
225
meilisearch-http/src/index_controller/dump_actor/mod.rs
Normal file
225
meilisearch-http/src/index_controller/dump_actor/mod.rs
Normal file
|
@ -0,0 +1,225 @@
|
|||
mod v1;
|
||||
mod v2;
|
||||
mod handle_impl;
|
||||
mod actor;
|
||||
mod message;
|
||||
|
||||
use std::{
|
||||
fs::File,
|
||||
path::Path,
|
||||
sync::Arc,
|
||||
};
|
||||
|
||||
#[cfg(test)]
|
||||
use mockall::automock;
|
||||
use anyhow::bail;
|
||||
use thiserror::Error;
|
||||
use heed::EnvOpenOptions;
|
||||
use log::{error, info};
|
||||
use milli::update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::json;
|
||||
use tempfile::TempDir;
|
||||
|
||||
use super::IndexMetadata;
|
||||
use crate::helpers::compression;
|
||||
use crate::index::Index;
|
||||
use crate::index_controller::uuid_resolver;
|
||||
|
||||
pub use handle_impl::*;
|
||||
pub use actor::DumpActor;
|
||||
pub use message::DumpMsg;
|
||||
|
||||
pub type DumpResult<T> = std::result::Result<T, DumpError>;
|
||||
|
||||
#[derive(Error, Debug)]
|
||||
pub enum DumpError {
|
||||
#[error("error with index: {0}")]
|
||||
Error(#[from] anyhow::Error),
|
||||
#[error("Heed error: {0}")]
|
||||
HeedError(#[from] heed::Error),
|
||||
#[error("dump already running")]
|
||||
DumpAlreadyRunning,
|
||||
#[error("dump `{0}` does not exist")]
|
||||
DumpDoesNotExist(String),
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, Copy, Clone)]
|
||||
enum DumpVersion {
|
||||
V1,
|
||||
V2,
|
||||
}
|
||||
|
||||
impl DumpVersion {
|
||||
const CURRENT: Self = Self::V2;
|
||||
|
||||
/// Select the good importation function from the `DumpVersion` of metadata
|
||||
pub fn import_index(
|
||||
self,
|
||||
size: usize,
|
||||
dump_path: &Path,
|
||||
index_path: &Path,
|
||||
) -> anyhow::Result<()> {
|
||||
match self {
|
||||
Self::V1 => v1::import_index(size, dump_path, index_path),
|
||||
Self::V2 => v2::import_index(size, dump_path, index_path),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait::async_trait]
|
||||
#[cfg_attr(test, automock)]
|
||||
pub trait DumpActorHandle {
|
||||
/// Start the creation of a dump
|
||||
/// Implementation: [handle_impl::DumpActorHandleImpl::create_dump]
|
||||
async fn create_dump(&self) -> DumpResult<DumpInfo>;
|
||||
|
||||
/// Return the status of an already created dump
|
||||
/// Implementation: [handle_impl::DumpActorHandleImpl::dump_status]
|
||||
async fn dump_info(&self, uid: String) -> DumpResult<DumpInfo>;
|
||||
}
|
||||
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Metadata {
|
||||
indexes: Vec<IndexMetadata>,
|
||||
db_version: String,
|
||||
dump_version: DumpVersion,
|
||||
}
|
||||
|
||||
impl Metadata {
|
||||
/// Create a Metadata with the current dump version of meilisearch.
|
||||
pub fn new(indexes: Vec<IndexMetadata>, db_version: String) -> Self {
|
||||
Metadata {
|
||||
indexes,
|
||||
db_version,
|
||||
dump_version: DumpVersion::CURRENT,
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract Metadata from `metadata.json` file present at provided `dir_path`
|
||||
fn from_path(dir_path: &Path) -> anyhow::Result<Self> {
|
||||
let path = dir_path.join("metadata.json");
|
||||
let file = File::open(path)?;
|
||||
let reader = std::io::BufReader::new(file);
|
||||
let metadata = serde_json::from_reader(reader)?;
|
||||
|
||||
Ok(metadata)
|
||||
}
|
||||
|
||||
/// Write Metadata in `metadata.json` file at provided `dir_path`
|
||||
pub async fn to_path(&self, dir_path: &Path) -> anyhow::Result<()> {
|
||||
let path = dir_path.join("metadata.json");
|
||||
tokio::fs::write(path, serde_json::to_string(self)?).await?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Deserialize, PartialEq, Clone)]
|
||||
#[serde(rename_all = "snake_case")]
|
||||
pub enum DumpStatus {
|
||||
Done,
|
||||
InProgress,
|
||||
Failed,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, Clone)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct DumpInfo {
|
||||
pub uid: String,
|
||||
pub status: DumpStatus,
|
||||
#[serde(skip_serializing_if = "Option::is_none", flatten)]
|
||||
pub error: Option<serde_json::Value>,
|
||||
}
|
||||
|
||||
impl DumpInfo {
|
||||
pub fn new(uid: String, status: DumpStatus) -> Self {
|
||||
Self {
|
||||
uid,
|
||||
status,
|
||||
error: None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_error(&mut self, error: String) {
|
||||
self.status = DumpStatus::Failed;
|
||||
self.error = Some(json!(error));
|
||||
}
|
||||
|
||||
pub fn done(&mut self) {
|
||||
self.status = DumpStatus::Done;
|
||||
}
|
||||
|
||||
pub fn dump_already_in_progress(&self) -> bool {
|
||||
self.status == DumpStatus::InProgress
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
pub fn load_dump(
|
||||
db_path: impl AsRef<Path>,
|
||||
dump_path: impl AsRef<Path>,
|
||||
size: usize,
|
||||
) -> anyhow::Result<()> {
|
||||
info!("Importing dump from {}...", dump_path.as_ref().display());
|
||||
let db_path = db_path.as_ref();
|
||||
let dump_path = dump_path.as_ref();
|
||||
let uuid_resolver = uuid_resolver::HeedUuidStore::new(&db_path)?;
|
||||
|
||||
// extract the dump in a temporary directory
|
||||
let tmp_dir = TempDir::new_in(db_path)?;
|
||||
let tmp_dir_path = tmp_dir.path();
|
||||
compression::from_tar_gz(dump_path, tmp_dir_path)?;
|
||||
|
||||
// read dump metadata
|
||||
let metadata = Metadata::from_path(&tmp_dir_path)?;
|
||||
|
||||
// remove indexes which have same `uuid` than indexes to import and create empty indexes
|
||||
let existing_index_uids = uuid_resolver.list()?;
|
||||
|
||||
info!("Deleting indexes already present in the db and provided in the dump...");
|
||||
for idx in &metadata.indexes {
|
||||
if let Some((_, uuid)) = existing_index_uids.iter().find(|(s, _)| s == &idx.uid) {
|
||||
// if we find the index in the `uuid_resolver` it's supposed to exist on the file system
|
||||
// and we want to delete it
|
||||
let path = db_path.join(&format!("indexes/index-{}", uuid));
|
||||
info!("Deleting {}", path.display());
|
||||
use std::io::ErrorKind::*;
|
||||
match std::fs::remove_dir_all(path) {
|
||||
Ok(()) => (),
|
||||
// if an index was present in the metadata but missing of the fs we can ignore the
|
||||
// problem because we are going to create it later
|
||||
Err(e) if e.kind() == NotFound => (),
|
||||
Err(e) => bail!(e),
|
||||
}
|
||||
} else {
|
||||
// if the index does not exist in the `uuid_resolver` we create it
|
||||
uuid_resolver.create_uuid(idx.uid.clone(), false)?;
|
||||
}
|
||||
}
|
||||
|
||||
// import each indexes content
|
||||
for idx in metadata.indexes {
|
||||
let dump_path = tmp_dir_path.join(&idx.uid);
|
||||
// this cannot fail since we created all the missing uuid in the previous loop
|
||||
let uuid = uuid_resolver.get_uuid(idx.uid)?.unwrap();
|
||||
let index_path = db_path.join(&format!("indexes/index-{}", uuid));
|
||||
// let update_path = db_path.join(&format!("updates/updates-{}", uuid)); // TODO: add the update db
|
||||
|
||||
info!(
|
||||
"Importing dump from {} into {}...",
|
||||
dump_path.display(),
|
||||
index_path.display()
|
||||
);
|
||||
metadata
|
||||
.dump_version
|
||||
.import_index(size, &dump_path, &index_path)
|
||||
.unwrap();
|
||||
info!("Dump importation from {} succeed", dump_path.display());
|
||||
}
|
||||
|
||||
info!("Dump importation from {} succeed", dump_path.display());
|
||||
Ok(())
|
||||
}
|
120
meilisearch-http/src/index_controller/dump_actor/v1.rs
Normal file
120
meilisearch-http/src/index_controller/dump_actor/v1.rs
Normal file
|
@ -0,0 +1,120 @@
|
|||
use std::collections::{BTreeMap, BTreeSet};
|
||||
|
||||
use log::warn;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use crate::index_controller;
|
||||
use crate::index::{deserialize_wildcard, deserialize_some};
|
||||
use super::*;
|
||||
|
||||
/// This is the settings used in the last version of meilisearch exporting dump in V1
|
||||
#[derive(Default, Clone, Serialize, Deserialize, Debug)]
|
||||
#[serde(rename_all = "camelCase", deny_unknown_fields)]
|
||||
struct Settings {
|
||||
#[serde(default, deserialize_with = "deserialize_some")]
|
||||
pub ranking_rules: Option<Option<Vec<String>>>,
|
||||
#[serde(default, deserialize_with = "deserialize_some")]
|
||||
pub distinct_attribute: Option<Option<String>>,
|
||||
#[serde(default, deserialize_with = "deserialize_wildcard")]
|
||||
pub searchable_attributes: Option<Option<Vec<String>>>,
|
||||
#[serde(default, deserialize_with = "deserialize_wildcard")]
|
||||
pub displayed_attributes: Option<Option<BTreeSet<String>>>,
|
||||
#[serde(default, deserialize_with = "deserialize_some")]
|
||||
pub stop_words: Option<Option<BTreeSet<String>>>,
|
||||
#[serde(default, deserialize_with = "deserialize_some")]
|
||||
pub synonyms: Option<Option<BTreeMap<String, Vec<String>>>>,
|
||||
#[serde(default, deserialize_with = "deserialize_some")]
|
||||
pub attributes_for_faceting: Option<Option<Vec<String>>>,
|
||||
}
|
||||
|
||||
/// we need to **always** be able to convert the old settings to the settings currently being used
|
||||
impl From<Settings> for index_controller::Settings {
|
||||
fn from(settings: Settings) -> Self {
|
||||
if settings.synonyms.flatten().is_some() {
|
||||
error!("`synonyms` are not yet implemented and thus will be ignored");
|
||||
}
|
||||
Self {
|
||||
distinct_attribute: settings.distinct_attribute,
|
||||
// we need to convert the old `Vec<String>` into a `BTreeSet<String>`
|
||||
displayed_attributes: settings.displayed_attributes.map(|o| o.map(|vec| vec.into_iter().collect())),
|
||||
searchable_attributes: settings.searchable_attributes,
|
||||
// we previously had a `Vec<String>` but now we have a `HashMap<String, String>`
|
||||
// representing the name of the faceted field + the type of the field. Since the type
|
||||
// was not known in the V1 of the dump we are just going to assume everything is a
|
||||
// String
|
||||
attributes_for_faceting: settings.attributes_for_faceting.map(|o| o.map(|vec| vec.into_iter().map(|key| (key, String::from("string"))).collect())),
|
||||
// we need to convert the old `Vec<String>` into a `BTreeSet<String>`
|
||||
ranking_rules: settings.ranking_rules.map(|o| o.map(|vec| vec.into_iter().filter_map(|criterion| {
|
||||
match criterion.as_str() {
|
||||
"words" | "typo" | "proximity" => Some(criterion),
|
||||
s if s.starts_with("asc") || s.starts_with("desc") => Some(criterion),
|
||||
"wordsPosition" => {
|
||||
warn!("The criteria `words` and `wordsPosition` have been merged into a single criterion `words` so `wordsPositon` will be ignored");
|
||||
Some(String::from("words"))
|
||||
}
|
||||
"attribute" | "exactness" => {
|
||||
error!("The criterion `{}` is not implemented currently and thus will be ignored", criterion);
|
||||
None
|
||||
}
|
||||
s => {
|
||||
error!("Unknown criterion found in the dump: `{}`, it will be ignored", s);
|
||||
None
|
||||
}
|
||||
}
|
||||
}).collect())),
|
||||
// we need to convert the old `Vec<String>` into a `BTreeSet<String>`
|
||||
stop_words: settings.stop_words.map(|o| o.map(|vec| vec.into_iter().collect())),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Extract Settings from `settings.json` file present at provided `dir_path`
|
||||
fn import_settings(dir_path: &Path) -> anyhow::Result<Settings> {
|
||||
let path = dir_path.join("settings.json");
|
||||
let file = File::open(path)?;
|
||||
let reader = std::io::BufReader::new(file);
|
||||
let metadata = serde_json::from_reader(reader)?;
|
||||
|
||||
Ok(metadata)
|
||||
}
|
||||
|
||||
|
||||
pub fn import_index(size: usize, dump_path: &Path, index_path: &Path) -> anyhow::Result<()> {
|
||||
info!("Importing a dump from an old version of meilisearch with dump version 1");
|
||||
|
||||
std::fs::create_dir_all(&index_path)?;
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(size);
|
||||
let index = milli::Index::new(options.clone(), index_path)?;
|
||||
let index = Index(Arc::new(index));
|
||||
|
||||
// extract `settings.json` file and import content
|
||||
let settings = import_settings(&dump_path)?;
|
||||
dbg!(&settings);
|
||||
let settings: index_controller::Settings = settings.into();
|
||||
let update_builder = UpdateBuilder::new(0);
|
||||
index.update_settings(&settings, update_builder)?;
|
||||
|
||||
let update_builder = UpdateBuilder::new(1);
|
||||
let file = File::open(&dump_path.join("documents.jsonl"))?;
|
||||
let reader = std::io::BufReader::new(file);
|
||||
|
||||
index.update_documents(
|
||||
UpdateFormat::JsonStream,
|
||||
IndexDocumentsMethod::ReplaceDocuments,
|
||||
Some(reader),
|
||||
update_builder,
|
||||
None,
|
||||
)?;
|
||||
|
||||
// at this point we should handle the updates, but since the update logic is not handled in
|
||||
// meilisearch we are just going to ignore this part
|
||||
|
||||
// the last step: we extract the original milli::Index and close it
|
||||
Arc::try_unwrap(index.0)
|
||||
.map_err(|_e| "[dumps] At this point no one is supposed to have a reference on the index")
|
||||
.unwrap()
|
||||
.prepare_for_closing()
|
||||
.wait();
|
||||
|
||||
Ok(())
|
||||
}
|
51
meilisearch-http/src/index_controller/dump_actor/v2.rs
Normal file
51
meilisearch-http/src/index_controller/dump_actor/v2.rs
Normal file
|
@ -0,0 +1,51 @@
|
|||
use heed::EnvOpenOptions;
|
||||
use milli::update::{IndexDocumentsMethod, UpdateBuilder, UpdateFormat};
|
||||
use crate::index::Index;
|
||||
use crate::index_controller::Settings;
|
||||
use std::{fs::File, path::Path, sync::Arc};
|
||||
|
||||
/// Extract Settings from `settings.json` file present at provided `dir_path`
|
||||
fn import_settings(dir_path: &Path) -> anyhow::Result<Settings> {
|
||||
let path = dir_path.join("settings.json");
|
||||
let file = File::open(path)?;
|
||||
let reader = std::io::BufReader::new(file);
|
||||
let metadata = serde_json::from_reader(reader)?;
|
||||
|
||||
Ok(metadata)
|
||||
}
|
||||
|
||||
pub fn import_index(size: usize, dump_path: &Path, index_path: &Path) -> anyhow::Result<()> {
|
||||
std::fs::create_dir_all(&index_path)?;
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(size);
|
||||
let index = milli::Index::new(options, index_path)?;
|
||||
let index = Index(Arc::new(index));
|
||||
|
||||
// extract `settings.json` file and import content
|
||||
let settings = import_settings(&dump_path)?;
|
||||
let update_builder = UpdateBuilder::new(0);
|
||||
index.update_settings(&settings, update_builder)?;
|
||||
dbg!(settings);
|
||||
|
||||
let update_builder = UpdateBuilder::new(1);
|
||||
let file = File::open(&dump_path.join("documents.jsonl"))?;
|
||||
let reader = std::io::BufReader::new(file);
|
||||
|
||||
index.update_documents(
|
||||
UpdateFormat::JsonStream,
|
||||
IndexDocumentsMethod::ReplaceDocuments,
|
||||
Some(reader),
|
||||
update_builder,
|
||||
None,
|
||||
)?;
|
||||
|
||||
// the last step: we extract the original milli::Index and close it
|
||||
Arc::try_unwrap(index.0)
|
||||
.map_err(|_e| "[dumps] At this point no one is supposed to have a reference on the index")
|
||||
.unwrap()
|
||||
.prepare_for_closing()
|
||||
.wait();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue