mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-22 03:07:27 +01:00
restore dumps
This commit is contained in:
parent
90018755c5
commit
6a1964f146
@ -1,9 +1,9 @@
|
|||||||
use std::fs::{create_dir_all, File};
|
use std::fs::File;
|
||||||
use std::io::Write;
|
use std::io::Write;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
use flate2::{read::GzDecoder, write::GzEncoder, Compression};
|
use flate2::{write::GzEncoder, Compression};
|
||||||
use tar::{Archive, Builder};
|
use tar::Builder;
|
||||||
|
|
||||||
pub fn to_tar_gz(src: impl AsRef<Path>, dest: impl AsRef<Path>) -> anyhow::Result<()> {
|
pub fn to_tar_gz(src: impl AsRef<Path>, dest: impl AsRef<Path>) -> anyhow::Result<()> {
|
||||||
let mut f = File::create(dest)?;
|
let mut f = File::create(dest)?;
|
||||||
|
52
meilisearch-lib/src/document_formats.rs
Normal file
52
meilisearch-lib/src/document_formats.rs
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
use std::{fmt, io::{Read, Seek, Write}};
|
||||||
|
|
||||||
|
use milli::documents::DocumentBatchBuilder;
|
||||||
|
use serde_json::{Deserializer, Map, Value};
|
||||||
|
|
||||||
|
type Result<T> = std::result::Result<T, DocumentFormatError>;
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub enum PayloadType {
|
||||||
|
Jsonl,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Display for PayloadType {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
match self {
|
||||||
|
PayloadType::Jsonl => write!(f, "ndjson"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(thiserror::Error, Debug)]
|
||||||
|
pub enum DocumentFormatError {
|
||||||
|
#[error("Internal error: {0}")]
|
||||||
|
Internal(Box<dyn std::error::Error + Send + Sync + 'static>),
|
||||||
|
#[error("{0}. The {1} payload provided is malformed.")]
|
||||||
|
MalformedPayload(Box<dyn std::error::Error + Send + Sync + 'static>, PayloadType),
|
||||||
|
}
|
||||||
|
|
||||||
|
internal_error!(
|
||||||
|
DocumentFormatError: milli::documents::Error
|
||||||
|
);
|
||||||
|
|
||||||
|
macro_rules! malformed {
|
||||||
|
($type:path, $e:expr) => {
|
||||||
|
$e.map_err(|e| DocumentFormatError::MalformedPayload(Box::new(e), $type))
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/// read jsonl from input and write an obkv batch to writer.
|
||||||
|
pub fn read_jsonl(input: impl Read, writer: impl Write + Seek) -> Result<()> {
|
||||||
|
let mut builder = DocumentBatchBuilder::new(writer)?;
|
||||||
|
let stream = Deserializer::from_reader(input).into_iter::<Map<String, Value>>();
|
||||||
|
|
||||||
|
for value in stream {
|
||||||
|
let value = malformed!(PayloadType::Jsonl, value)?;
|
||||||
|
builder.add_documents(&value)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
builder.finish()?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
@ -1,12 +1,18 @@
|
|||||||
use std::fs::File;
|
use std::fs::{create_dir_all, File};
|
||||||
use std::io::Write;
|
use std::io::{BufReader, Seek, SeekFrom, Write};
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
use heed::RoTxn;
|
use anyhow::Context;
|
||||||
|
use heed::{EnvOpenOptions, RoTxn};
|
||||||
use indexmap::IndexMap;
|
use indexmap::IndexMap;
|
||||||
|
use milli::documents::DocumentBatchReader;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
use serde_json::Value;
|
||||||
|
|
||||||
use crate::options::IndexerOpts;
|
use crate::document_formats::read_jsonl;
|
||||||
|
use crate::index::update_handler::UpdateHandler;
|
||||||
|
use crate::index::updates::apply_settings_to_builder;
|
||||||
|
use crate::index_controller::{asc_ranking_rule, desc_ranking_rule};
|
||||||
|
|
||||||
use super::error::Result;
|
use super::error::Result;
|
||||||
use super::{Index, Settings, Unchecked};
|
use super::{Index, Settings, Unchecked};
|
||||||
@ -24,6 +30,11 @@ impl Index {
|
|||||||
pub fn dump(&self, path: impl AsRef<Path>) -> Result<()> {
|
pub fn dump(&self, path: impl AsRef<Path>) -> Result<()> {
|
||||||
// acquire write txn make sure any ongoing write is finished before we start.
|
// acquire write txn make sure any ongoing write is finished before we start.
|
||||||
let txn = self.env.write_txn()?;
|
let txn = self.env.write_txn()?;
|
||||||
|
let path = path
|
||||||
|
.as_ref()
|
||||||
|
.join(format!("indexes/{}", self.uuid.to_string()));
|
||||||
|
|
||||||
|
create_dir_all(&path)?;
|
||||||
|
|
||||||
self.dump_documents(&txn, &path)?;
|
self.dump_documents(&txn, &path)?;
|
||||||
self.dump_meta(&txn, &path)?;
|
self.dump_meta(&txn, &path)?;
|
||||||
@ -75,92 +86,101 @@ impl Index {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn load_dump(
|
pub fn load_dump(
|
||||||
_src: impl AsRef<Path>,
|
src: impl AsRef<Path>,
|
||||||
_dst: impl AsRef<Path>,
|
dst: impl AsRef<Path>,
|
||||||
_size: usize,
|
size: usize,
|
||||||
_indexing_options: &IndexerOpts,
|
update_handler: &UpdateHandler,
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
//let dir_name = src
|
let dir_name = src
|
||||||
//.as_ref()
|
.as_ref()
|
||||||
//.file_name()
|
.file_name()
|
||||||
//.with_context(|| format!("invalid dump index: {}", src.as_ref().display()))?;
|
.with_context(|| format!("invalid dump index: {}", src.as_ref().display()))?;
|
||||||
|
|
||||||
//let dst_dir_path = dst.as_ref().join("indexes").join(dir_name);
|
let dst_dir_path = dst.as_ref().join("indexes").join(dir_name);
|
||||||
//create_dir_all(&dst_dir_path)?;
|
create_dir_all(&dst_dir_path)?;
|
||||||
|
|
||||||
//let meta_path = src.as_ref().join(META_FILE_NAME);
|
let meta_path = src.as_ref().join(META_FILE_NAME);
|
||||||
//let mut meta_file = File::open(meta_path)?;
|
let mut meta_file = File::open(meta_path)?;
|
||||||
|
|
||||||
//// We first deserialize the dump meta into a serde_json::Value and change
|
// We first deserialize the dump meta into a serde_json::Value and change
|
||||||
//// the custom ranking rules settings from the old format to the new format.
|
// the custom ranking rules settings from the old format to the new format.
|
||||||
//let mut meta: Value = serde_json::from_reader(&mut meta_file)?;
|
let mut meta: Value = serde_json::from_reader(&mut meta_file)?;
|
||||||
//if let Some(ranking_rules) = meta.pointer_mut("/settings/rankingRules") {
|
if let Some(ranking_rules) = meta.pointer_mut("/settings/rankingRules") {
|
||||||
//convert_custom_ranking_rules(ranking_rules);
|
convert_custom_ranking_rules(ranking_rules);
|
||||||
//}
|
}
|
||||||
|
|
||||||
//// Then we serialize it back into a vec to deserialize it
|
// Then we serialize it back into a vec to deserialize it
|
||||||
//// into a `DumpMeta` struct with the newly patched `rankingRules` format.
|
// into a `DumpMeta` struct with the newly patched `rankingRules` format.
|
||||||
//let patched_meta = serde_json::to_vec(&meta)?;
|
let patched_meta = serde_json::to_vec(&meta)?;
|
||||||
|
|
||||||
//let DumpMeta {
|
let DumpMeta {
|
||||||
//settings,
|
settings,
|
||||||
//primary_key,
|
primary_key,
|
||||||
//} = serde_json::from_slice(&patched_meta)?;
|
} = serde_json::from_slice(&patched_meta)?;
|
||||||
//let settings = settings.check();
|
let settings = settings.check();
|
||||||
//let index = Self::open(&dst_dir_path, size)?;
|
|
||||||
//let mut txn = index.write_txn()?;
|
|
||||||
|
|
||||||
//let handler = UpdateHandler::new(indexing_options)?;
|
let mut options = EnvOpenOptions::new();
|
||||||
|
options.map_size(size);
|
||||||
|
let index = milli::Index::new(options, &dst_dir_path)?;
|
||||||
|
|
||||||
//index.update_settings_txn(&mut txn, &settings, handler.update_builder(0))?;
|
let mut txn = index.write_txn()?;
|
||||||
|
|
||||||
//let document_file_path = src.as_ref().join(DATA_FILE_NAME);
|
// Apply settings first
|
||||||
//let reader = File::open(&document_file_path)?;
|
let builder = update_handler.update_builder(0);
|
||||||
//let mut reader = BufReader::new(reader);
|
let mut builder = builder.settings(&mut txn, &index);
|
||||||
//reader.fill_buf()?;
|
|
||||||
// If the document file is empty, we don't perform the document addition, to prevent
|
|
||||||
// a primary key error to be thrown.
|
|
||||||
|
|
||||||
todo!("fix obk document dumps")
|
if let Some(primary_key) = primary_key {
|
||||||
//if !reader.buffer().is_empty() {
|
builder.set_primary_key(primary_key);
|
||||||
//index.update_documents_txn(
|
}
|
||||||
//&mut txn,
|
|
||||||
//IndexDocumentsMethod::UpdateDocuments,
|
|
||||||
//Some(reader),
|
|
||||||
//handler.update_builder(0),
|
|
||||||
//primary_key.as_deref(),
|
|
||||||
//)?;
|
|
||||||
//}
|
|
||||||
|
|
||||||
//txn.commit()?;
|
apply_settings_to_builder(&settings, &mut builder);
|
||||||
|
|
||||||
//match Arc::try_unwrap(index.0) {
|
builder.execute(|_, _| ())?;
|
||||||
//Ok(inner) => inner.prepare_for_closing().wait(),
|
|
||||||
//Err(_) => bail!("Could not close index properly."),
|
|
||||||
//}
|
|
||||||
|
|
||||||
//Ok(())
|
let document_file_path = src.as_ref().join(DATA_FILE_NAME);
|
||||||
|
let reader = BufReader::new(File::open(&document_file_path)?);
|
||||||
|
|
||||||
|
let mut tmp_doc_file = tempfile::tempfile()?;
|
||||||
|
|
||||||
|
read_jsonl(reader, &mut tmp_doc_file)?;
|
||||||
|
|
||||||
|
tmp_doc_file.seek(SeekFrom::Start(0))?;
|
||||||
|
|
||||||
|
let documents_reader = DocumentBatchReader::from_reader(tmp_doc_file)?;
|
||||||
|
|
||||||
|
//If the document file is empty, we don't perform the document addition, to prevent
|
||||||
|
//a primary key error to be thrown.
|
||||||
|
if !documents_reader.is_empty() {
|
||||||
|
let builder = update_handler.update_builder(0).index_documents(&mut txn, &index);
|
||||||
|
builder.execute(documents_reader, |_, _| ())?;
|
||||||
|
}
|
||||||
|
|
||||||
|
txn.commit()?;
|
||||||
|
|
||||||
|
index.prepare_for_closing().wait();
|
||||||
|
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// /// Converts the ranking rules from the format `asc(_)`, `desc(_)` to the format `_:asc`, `_:desc`.
|
/// Converts the ranking rules from the format `asc(_)`, `desc(_)` to the format `_:asc`, `_:desc`.
|
||||||
// ///
|
///
|
||||||
// /// This is done for compatibility reasons, and to avoid a new dump version,
|
/// This is done for compatibility reasons, and to avoid a new dump version,
|
||||||
// /// since the new syntax was introduced soon after the new dump version.
|
/// since the new syntax was introduced soon after the new dump version.
|
||||||
//fn convert_custom_ranking_rules(ranking_rules: &mut Value) {
|
fn convert_custom_ranking_rules(ranking_rules: &mut Value) {
|
||||||
//*ranking_rules = match ranking_rules.take() {
|
*ranking_rules = match ranking_rules.take() {
|
||||||
//Value::Array(values) => values
|
Value::Array(values) => values
|
||||||
//.into_iter()
|
.into_iter()
|
||||||
//.filter_map(|value| match value {
|
.filter_map(|value| match value {
|
||||||
//Value::String(s) if s.starts_with("asc") => asc_ranking_rule(&s)
|
Value::String(s) if s.starts_with("asc") => asc_ranking_rule(&s)
|
||||||
//.map(|f| format!("{}:asc", f))
|
.map(|f| format!("{}:asc", f))
|
||||||
//.map(Value::String),
|
.map(Value::String),
|
||||||
//Value::String(s) if s.starts_with("desc") => desc_ranking_rule(&s)
|
Value::String(s) if s.starts_with("desc") => desc_ranking_rule(&s)
|
||||||
//.map(|f| format!("{}:desc", f))
|
.map(|f| format!("{}:desc", f))
|
||||||
//.map(Value::String),
|
.map(Value::String),
|
||||||
//otherwise => Some(otherwise),
|
otherwise => Some(otherwise),
|
||||||
//})
|
})
|
||||||
//.collect(),
|
.collect(),
|
||||||
//otherwise => otherwise,
|
otherwise => otherwise,
|
||||||
//}
|
}
|
||||||
//}
|
}
|
||||||
|
@ -266,59 +266,7 @@ impl Index {
|
|||||||
// We must use the write transaction of the update here.
|
// We must use the write transaction of the update here.
|
||||||
let mut builder = update_builder.settings(txn, self);
|
let mut builder = update_builder.settings(txn, self);
|
||||||
|
|
||||||
match settings.searchable_attributes {
|
apply_settings_to_builder(settings, &mut builder);
|
||||||
Setting::Set(ref names) => builder.set_searchable_fields(names.clone()),
|
|
||||||
Setting::Reset => builder.reset_searchable_fields(),
|
|
||||||
Setting::NotSet => (),
|
|
||||||
}
|
|
||||||
|
|
||||||
match settings.displayed_attributes {
|
|
||||||
Setting::Set(ref names) => builder.set_displayed_fields(names.clone()),
|
|
||||||
Setting::Reset => builder.reset_displayed_fields(),
|
|
||||||
Setting::NotSet => (),
|
|
||||||
}
|
|
||||||
|
|
||||||
match settings.filterable_attributes {
|
|
||||||
Setting::Set(ref facets) => {
|
|
||||||
builder.set_filterable_fields(facets.clone().into_iter().collect())
|
|
||||||
}
|
|
||||||
Setting::Reset => builder.reset_filterable_fields(),
|
|
||||||
Setting::NotSet => (),
|
|
||||||
}
|
|
||||||
|
|
||||||
match settings.sortable_attributes {
|
|
||||||
Setting::Set(ref fields) => {
|
|
||||||
builder.set_sortable_fields(fields.iter().cloned().collect())
|
|
||||||
}
|
|
||||||
Setting::Reset => builder.reset_sortable_fields(),
|
|
||||||
Setting::NotSet => (),
|
|
||||||
}
|
|
||||||
|
|
||||||
match settings.ranking_rules {
|
|
||||||
Setting::Set(ref criteria) => builder.set_criteria(criteria.clone()),
|
|
||||||
Setting::Reset => builder.reset_criteria(),
|
|
||||||
Setting::NotSet => (),
|
|
||||||
}
|
|
||||||
|
|
||||||
match settings.stop_words {
|
|
||||||
Setting::Set(ref stop_words) => builder.set_stop_words(stop_words.clone()),
|
|
||||||
Setting::Reset => builder.reset_stop_words(),
|
|
||||||
Setting::NotSet => (),
|
|
||||||
}
|
|
||||||
|
|
||||||
match settings.synonyms {
|
|
||||||
Setting::Set(ref synonyms) => {
|
|
||||||
builder.set_synonyms(synonyms.clone().into_iter().collect())
|
|
||||||
}
|
|
||||||
Setting::Reset => builder.reset_synonyms(),
|
|
||||||
Setting::NotSet => (),
|
|
||||||
}
|
|
||||||
|
|
||||||
match settings.distinct_attribute {
|
|
||||||
Setting::Set(ref attr) => builder.set_distinct_field(attr.clone()),
|
|
||||||
Setting::Reset => builder.reset_distinct_field(),
|
|
||||||
Setting::NotSet => (),
|
|
||||||
}
|
|
||||||
|
|
||||||
builder.execute(|indexing_step, update_id| {
|
builder.execute(|indexing_step, update_id| {
|
||||||
debug!("update {}: {:?}", update_id, indexing_step)
|
debug!("update {}: {:?}", update_id, indexing_step)
|
||||||
@ -328,6 +276,62 @@ impl Index {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn apply_settings_to_builder(settings: &Settings<Checked>, builder: &mut milli::update::Settings) {
|
||||||
|
match settings.searchable_attributes {
|
||||||
|
Setting::Set(ref names) => builder.set_searchable_fields(names.clone()),
|
||||||
|
Setting::Reset => builder.reset_searchable_fields(),
|
||||||
|
Setting::NotSet => (),
|
||||||
|
}
|
||||||
|
|
||||||
|
match settings.displayed_attributes {
|
||||||
|
Setting::Set(ref names) => builder.set_displayed_fields(names.clone()),
|
||||||
|
Setting::Reset => builder.reset_displayed_fields(),
|
||||||
|
Setting::NotSet => (),
|
||||||
|
}
|
||||||
|
|
||||||
|
match settings.filterable_attributes {
|
||||||
|
Setting::Set(ref facets) => {
|
||||||
|
builder.set_filterable_fields(facets.clone().into_iter().collect())
|
||||||
|
}
|
||||||
|
Setting::Reset => builder.reset_filterable_fields(),
|
||||||
|
Setting::NotSet => (),
|
||||||
|
}
|
||||||
|
|
||||||
|
match settings.sortable_attributes {
|
||||||
|
Setting::Set(ref fields) => {
|
||||||
|
builder.set_sortable_fields(fields.iter().cloned().collect())
|
||||||
|
}
|
||||||
|
Setting::Reset => builder.reset_sortable_fields(),
|
||||||
|
Setting::NotSet => (),
|
||||||
|
}
|
||||||
|
|
||||||
|
match settings.ranking_rules {
|
||||||
|
Setting::Set(ref criteria) => builder.set_criteria(criteria.clone()),
|
||||||
|
Setting::Reset => builder.reset_criteria(),
|
||||||
|
Setting::NotSet => (),
|
||||||
|
}
|
||||||
|
|
||||||
|
match settings.stop_words {
|
||||||
|
Setting::Set(ref stop_words) => builder.set_stop_words(stop_words.clone()),
|
||||||
|
Setting::Reset => builder.reset_stop_words(),
|
||||||
|
Setting::NotSet => (),
|
||||||
|
}
|
||||||
|
|
||||||
|
match settings.synonyms {
|
||||||
|
Setting::Set(ref synonyms) => {
|
||||||
|
builder.set_synonyms(synonyms.clone().into_iter().collect())
|
||||||
|
}
|
||||||
|
Setting::Reset => builder.reset_synonyms(),
|
||||||
|
Setting::NotSet => (),
|
||||||
|
}
|
||||||
|
|
||||||
|
match settings.distinct_attribute {
|
||||||
|
Setting::Set(ref attr) => builder.set_distinct_field(attr.clone()),
|
||||||
|
Setting::Reset => builder.reset_distinct_field(),
|
||||||
|
Setting::NotSet => (),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod test {
|
mod test {
|
||||||
use super::*;
|
use super::*;
|
||||||
|
@ -4,8 +4,8 @@ use chrono::{DateTime, Utc};
|
|||||||
use log::info;
|
use log::info;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
use crate::index::Index;
|
use crate::index_controller::index_resolver::IndexResolver;
|
||||||
use crate::index_controller::index_resolver::uuid_store::HeedUuidStore;
|
use crate::index_controller::update_file_store::UpdateFileStore;
|
||||||
use crate::index_controller::updates::store::UpdateStore;
|
use crate::index_controller::updates::store::UpdateStore;
|
||||||
use crate::options::IndexerOpts;
|
use crate::options::IndexerOpts;
|
||||||
|
|
||||||
@ -41,19 +41,11 @@ impl MetadataV2 {
|
|||||||
self.dump_date, self.db_version
|
self.dump_date, self.db_version
|
||||||
);
|
);
|
||||||
|
|
||||||
info!("Loading index database.");
|
IndexResolver::load_dump(src.as_ref(), &dst, index_db_size, indexing_options)?;
|
||||||
HeedUuidStore::load_dump(src.as_ref(), &dst)?;
|
UpdateFileStore::load_dump(src.as_ref(), &dst)?;
|
||||||
|
|
||||||
info!("Loading updates.");
|
|
||||||
UpdateStore::load_dump(&src, &dst, update_db_size)?;
|
UpdateStore::load_dump(&src, &dst, update_db_size)?;
|
||||||
|
|
||||||
info!("Loading indexes.");
|
info!("Loading indexes.");
|
||||||
let indexes_path = src.as_ref().join("indexes");
|
|
||||||
let indexes = indexes_path.read_dir()?;
|
|
||||||
for index in indexes {
|
|
||||||
let index = index?;
|
|
||||||
Index::load_dump(&index.path(), &dst, index_db_size, indexing_options)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
@ -115,6 +115,7 @@ pub fn load_dump(
|
|||||||
let tmp_src = tempfile::tempdir_in(".")?;
|
let tmp_src = tempfile::tempdir_in(".")?;
|
||||||
let tmp_src_path = tmp_src.path();
|
let tmp_src_path = tmp_src.path();
|
||||||
|
|
||||||
|
println!("importing to {}", dst_path.as_ref().display());
|
||||||
crate::from_tar_gz(&src_path, tmp_src_path)?;
|
crate::from_tar_gz(&src_path, tmp_src_path)?;
|
||||||
|
|
||||||
let meta_path = tmp_src_path.join(META_FILE_NAME);
|
let meta_path = tmp_src_path.join(META_FILE_NAME);
|
||||||
@ -179,7 +180,7 @@ impl DumpTask {
|
|||||||
|
|
||||||
let uuids = self.index_resolver.dump(temp_dump_path.clone()).await?;
|
let uuids = self.index_resolver.dump(temp_dump_path.clone()).await?;
|
||||||
|
|
||||||
UpdateMsg::dump(&self.update_handle, uuids.into_iter().collect(), temp_dump_path.clone()).await?;
|
UpdateMsg::dump(&self.update_handle, uuids, temp_dump_path.clone()).await?;
|
||||||
|
|
||||||
let dump_path = tokio::task::spawn_blocking(move || -> Result<PathBuf> {
|
let dump_path = tokio::task::spawn_blocking(move || -> Result<PathBuf> {
|
||||||
let temp_dump_file = tempfile::NamedTempFile::new_in(&self.path)?;
|
let temp_dump_file = tempfile::NamedTempFile::new_in(&self.path)?;
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
pub mod uuid_store;
|
pub mod uuid_store;
|
||||||
mod index_store;
|
mod index_store;
|
||||||
//mod message;
|
|
||||||
pub mod error;
|
pub mod error;
|
||||||
|
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
@ -10,7 +9,7 @@ use uuid_store::{UuidStore, HeedUuidStore};
|
|||||||
use index_store::{IndexStore, MapIndexStore};
|
use index_store::{IndexStore, MapIndexStore};
|
||||||
use error::{Result, IndexResolverError};
|
use error::{Result, IndexResolverError};
|
||||||
|
|
||||||
use crate::{index::Index, options::IndexerOpts};
|
use crate::{index::{Index, update_handler::UpdateHandler}, options::IndexerOpts};
|
||||||
|
|
||||||
pub type HardStateIndexResolver = IndexResolver<HeedUuidStore, MapIndexStore>;
|
pub type HardStateIndexResolver = IndexResolver<HeedUuidStore, MapIndexStore>;
|
||||||
|
|
||||||
@ -25,6 +24,28 @@ pub struct IndexResolver<U, I> {
|
|||||||
index_store: I,
|
index_store: I,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl IndexResolver<HeedUuidStore, MapIndexStore> {
|
||||||
|
pub fn load_dump(
|
||||||
|
src: impl AsRef<Path>,
|
||||||
|
dst: impl AsRef<Path>,
|
||||||
|
index_db_size: usize,
|
||||||
|
indexer_opts: &IndexerOpts,
|
||||||
|
) -> anyhow::Result<()> {
|
||||||
|
HeedUuidStore::load_dump(&src, &dst)?;
|
||||||
|
|
||||||
|
let indexes_path = src.as_ref().join("indexes");
|
||||||
|
let indexes = indexes_path.read_dir()?;
|
||||||
|
|
||||||
|
let update_handler = UpdateHandler::new(indexer_opts).unwrap();
|
||||||
|
for index in indexes {
|
||||||
|
let index = index?;
|
||||||
|
Index::load_dump(&index.path(), &dst, index_db_size, &update_handler)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl<U, I> IndexResolver<U ,I>
|
impl<U, I> IndexResolver<U ,I>
|
||||||
where U: UuidStore,
|
where U: UuidStore,
|
||||||
I: IndexStore,
|
I: IndexStore,
|
||||||
@ -39,8 +60,14 @@ where U: UuidStore,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn dump(&self, _path: impl AsRef<Path>) -> Result<Vec<Uuid>> {
|
pub async fn dump(&self, path: impl AsRef<Path>) -> Result<Vec<Index>> {
|
||||||
todo!()
|
let uuids = self.index_uuid_store.dump(path.as_ref().to_owned()).await?;
|
||||||
|
let mut indexes = Vec::new();
|
||||||
|
for uuid in uuids {
|
||||||
|
indexes.push(self.get_index_by_uuid(uuid).await?);
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(indexes)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn get_size(&self) -> Result<u64> {
|
pub async fn get_size(&self) -> Result<u64> {
|
||||||
@ -51,7 +78,6 @@ where U: UuidStore,
|
|||||||
pub async fn snapshot(&self, path: impl AsRef<Path>) -> Result<Vec<Index>> {
|
pub async fn snapshot(&self, path: impl AsRef<Path>) -> Result<Vec<Index>> {
|
||||||
let uuids = self.index_uuid_store.snapshot(path.as_ref().to_owned()).await?;
|
let uuids = self.index_uuid_store.snapshot(path.as_ref().to_owned()).await?;
|
||||||
let mut indexes = Vec::new();
|
let mut indexes = Vec::new();
|
||||||
|
|
||||||
for uuid in uuids {
|
for uuid in uuids {
|
||||||
indexes.push(self.get_index_by_uuid(uuid).await?);
|
indexes.push(self.get_index_by_uuid(uuid).await?);
|
||||||
}
|
}
|
||||||
|
@ -1,14 +1,17 @@
|
|||||||
use std::fs::File;
|
use std::fs::{File, create_dir_all};
|
||||||
|
use std::io::{BufReader, BufWriter, Write};
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
use std::ops::{Deref, DerefMut};
|
use std::ops::{Deref, DerefMut};
|
||||||
|
|
||||||
//use milli::documents::DocumentBatchReader;
|
use milli::documents::DocumentBatchReader;
|
||||||
//use serde_json::Map;
|
use serde_json::Map;
|
||||||
use tempfile::NamedTempFile;
|
use tempfile::NamedTempFile;
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
|
||||||
const UPDATE_FILES_PATH: &str = "updates/updates_files";
|
const UPDATE_FILES_PATH: &str = "updates/updates_files";
|
||||||
|
|
||||||
|
use crate::document_formats::read_jsonl;
|
||||||
|
|
||||||
use super::error::Result;
|
use super::error::Result;
|
||||||
|
|
||||||
pub struct UpdateFile {
|
pub struct UpdateFile {
|
||||||
@ -42,6 +45,27 @@ pub struct UpdateFileStore {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl UpdateFileStore {
|
impl UpdateFileStore {
|
||||||
|
pub fn load_dump(src: impl AsRef<Path>, dst: impl AsRef<Path>) -> anyhow::Result<()> {
|
||||||
|
let src_update_files_path = src.as_ref().join(UPDATE_FILES_PATH);
|
||||||
|
let dst_update_files_path = dst.as_ref().join(UPDATE_FILES_PATH);
|
||||||
|
|
||||||
|
create_dir_all(&dst_update_files_path).unwrap();
|
||||||
|
|
||||||
|
let entries = std::fs::read_dir(src_update_files_path).unwrap();
|
||||||
|
|
||||||
|
for entry in entries {
|
||||||
|
let entry = entry.unwrap();
|
||||||
|
let update_file = BufReader::new(File::open(entry.path()).unwrap());
|
||||||
|
let file_uuid = entry.file_name();
|
||||||
|
let file_uuid = file_uuid.to_str().ok_or_else(|| anyhow::anyhow!("invalid update file name"))?;
|
||||||
|
let dst_path = dst_update_files_path.join(file_uuid);
|
||||||
|
let dst_file = BufWriter::new(File::create(dst_path)?);
|
||||||
|
read_jsonl(update_file, dst_file)?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
pub fn new(path: impl AsRef<Path>) -> Result<Self> {
|
pub fn new(path: impl AsRef<Path>) -> Result<Self> {
|
||||||
let path = path.as_ref().join(UPDATE_FILES_PATH);
|
let path = path.as_ref().join(UPDATE_FILES_PATH);
|
||||||
std::fs::create_dir_all(&path).unwrap();
|
std::fs::create_dir_all(&path).unwrap();
|
||||||
@ -78,27 +102,34 @@ impl UpdateFileStore {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Peform a dump of the given update file uuid into the provided snapshot path.
|
/// Peform a dump of the given update file uuid into the provided snapshot path.
|
||||||
pub fn dump(&self, _uuid: Uuid, _snapshot_path: impl AsRef<Path>) -> Result<()> {
|
pub fn dump(&self, uuid: Uuid, dump_path: impl AsRef<Path>) -> Result<()> {
|
||||||
todo!()
|
let uuid_string = uuid.to_string();
|
||||||
//let update_file_path = self.path.join(uuid.to_string());
|
let update_file_path = self.path.join(&uuid_string);
|
||||||
//let snapshot_file_path: snapshot_path.as_ref().join(format!("update_files/uuid", uuid));
|
let mut dst = dump_path.as_ref().join(UPDATE_FILES_PATH);
|
||||||
|
std::fs::create_dir_all(&dst).unwrap();
|
||||||
|
dst.push(&uuid_string);
|
||||||
|
|
||||||
//let update_file = File::open(update_file_path).unwrap();
|
let update_file = File::open(update_file_path).unwrap();
|
||||||
|
let mut dst_file = NamedTempFile::new().unwrap();
|
||||||
|
let mut document_reader = DocumentBatchReader::from_reader(update_file).unwrap();
|
||||||
|
|
||||||
|
let mut document_buffer = Map::new();
|
||||||
|
// TODO: we need to find a way to do this more efficiently. (create a custom serializer to
|
||||||
|
// jsonl for example...)
|
||||||
|
while let Some((index, document)) = document_reader.next_document_with_index().unwrap() {
|
||||||
|
for (field_id, content) in document.iter() {
|
||||||
|
let field_name = index.get_by_left(&field_id).unwrap();
|
||||||
|
let content = serde_json::from_slice(content).unwrap();
|
||||||
|
document_buffer.insert(field_name.to_string(), content);
|
||||||
|
}
|
||||||
|
|
||||||
//let mut document_reader = DocumentBatchReader::from_reader(update_file).unwrap();
|
serde_json::to_writer(&mut dst_file, &document_buffer).unwrap();
|
||||||
|
dst_file.write(b"\n").unwrap();
|
||||||
|
document_buffer.clear();
|
||||||
|
}
|
||||||
|
|
||||||
//let mut document_buffer = Map::new();
|
dst_file.persist(dst).unwrap();
|
||||||
//// TODO: we need to find a way to do this more efficiently. (create a custom serializer to
|
|
||||||
//// jsonl for example...)
|
|
||||||
//while let Some((index, document)) = document_reader.next_document_with_index().unwrap() {
|
|
||||||
//for (field_id, content) in document.iter() {
|
|
||||||
//let field_name = index.get_by_left(&field_id).unwrap();
|
|
||||||
//let content = serde_json::from_slice(content).unwrap();
|
|
||||||
//document_buffer.insert(field_name.to_string(), content);
|
|
||||||
//}
|
|
||||||
|
|
||||||
//}
|
Ok(())
|
||||||
//Ok(())
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
use std::collections::HashSet;
|
|
||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
|
|
||||||
use tokio::sync::{mpsc, oneshot};
|
use tokio::sync::{mpsc, oneshot};
|
||||||
@ -35,7 +34,7 @@ pub enum UpdateMsg {
|
|||||||
ret: oneshot::Sender<Result<()>>,
|
ret: oneshot::Sender<Result<()>>,
|
||||||
},
|
},
|
||||||
Dump {
|
Dump {
|
||||||
uuids: HashSet<Uuid>,
|
indexes: Vec<Index>,
|
||||||
path: PathBuf,
|
path: PathBuf,
|
||||||
ret: oneshot::Sender<Result<()>>,
|
ret: oneshot::Sender<Result<()>>,
|
||||||
},
|
},
|
||||||
@ -54,11 +53,11 @@ impl UpdateMsg {
|
|||||||
|
|
||||||
pub async fn dump(
|
pub async fn dump(
|
||||||
sender: &mpsc::Sender<Self>,
|
sender: &mpsc::Sender<Self>,
|
||||||
uuids: HashSet<Uuid>,
|
indexes: Vec<Index>,
|
||||||
path: PathBuf,
|
path: PathBuf,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let (ret, rcv) = oneshot::channel();
|
let (ret, rcv) = oneshot::channel();
|
||||||
let msg = Self::Dump { path, uuids, ret };
|
let msg = Self::Dump { path, indexes, ret };
|
||||||
sender.send(msg).await?;
|
sender.send(msg).await?;
|
||||||
rcv.await?
|
rcv.await?
|
||||||
}
|
}
|
||||||
|
@ -3,7 +3,6 @@ mod message;
|
|||||||
pub mod status;
|
pub mod status;
|
||||||
pub mod store;
|
pub mod store;
|
||||||
|
|
||||||
use std::collections::HashSet;
|
|
||||||
use std::io;
|
use std::io;
|
||||||
use std::path::{Path, PathBuf};
|
use std::path::{Path, PathBuf};
|
||||||
use std::sync::atomic::AtomicBool;
|
use std::sync::atomic::AtomicBool;
|
||||||
@ -104,7 +103,6 @@ pub struct UpdateLoop {
|
|||||||
store: Arc<UpdateStore>,
|
store: Arc<UpdateStore>,
|
||||||
inbox: Option<mpsc::Receiver<UpdateMsg>>,
|
inbox: Option<mpsc::Receiver<UpdateMsg>>,
|
||||||
update_file_store: UpdateFileStore,
|
update_file_store: UpdateFileStore,
|
||||||
index_resolver: Arc<HardStateIndexResolver>,
|
|
||||||
must_exit: Arc<AtomicBool>,
|
must_exit: Arc<AtomicBool>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -133,7 +131,6 @@ impl UpdateLoop {
|
|||||||
inbox,
|
inbox,
|
||||||
must_exit,
|
must_exit,
|
||||||
update_file_store,
|
update_file_store,
|
||||||
index_resolver,
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -184,8 +181,8 @@ impl UpdateLoop {
|
|||||||
GetInfo { ret } => {
|
GetInfo { ret } => {
|
||||||
let _ = ret.send(self.handle_get_info().await);
|
let _ = ret.send(self.handle_get_info().await);
|
||||||
}
|
}
|
||||||
Dump { uuids, path, ret } => {
|
Dump { indexes, path, ret } => {
|
||||||
let _ = ret.send(self.handle_dump(uuids, path).await);
|
let _ = ret.send(self.handle_dump(indexes, path).await);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
@ -278,12 +275,11 @@ impl UpdateLoop {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn handle_dump(&self, uuids: HashSet<Uuid>, path: PathBuf) -> Result<()> {
|
async fn handle_dump(&self, indexes: Vec<Index>, path: PathBuf) -> Result<()> {
|
||||||
let index_handle = self.index_resolver.clone();
|
|
||||||
let update_store = self.store.clone();
|
let update_store = self.store.clone();
|
||||||
|
|
||||||
tokio::task::spawn_blocking(move || -> Result<()> {
|
tokio::task::spawn_blocking(move || -> Result<()> {
|
||||||
update_store.dump(&uuids, path.to_path_buf(), index_handle)?;
|
update_store.dump(&indexes, path.to_path_buf())?;
|
||||||
Ok(())
|
Ok(())
|
||||||
})
|
})
|
||||||
.await??;
|
.await??;
|
||||||
|
@ -1,11 +1,17 @@
|
|||||||
use std::{collections::HashSet, fs::{create_dir_all, File}, io::Write, path::{Path, PathBuf}, sync::Arc};
|
use std::collections::HashSet;
|
||||||
|
use std::path::{Path, PathBuf};
|
||||||
|
use std::io::{BufReader, Write};
|
||||||
|
use std::fs::{File, create_dir_all};
|
||||||
|
|
||||||
use heed::RoTxn;
|
use heed::{EnvOpenOptions, RoTxn};
|
||||||
|
use rayon::prelude::*;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
use serde_json::Deserializer;
|
||||||
|
use tempfile::{NamedTempFile, TempDir};
|
||||||
use uuid::Uuid;
|
use uuid::Uuid;
|
||||||
|
|
||||||
use super::{Result, State, UpdateStore};
|
use super::{Result, State, UpdateStore};
|
||||||
use crate::index_controller::{index_resolver::HardStateIndexResolver, updates::status::UpdateStatus};
|
use crate::{RegisterUpdate, index::Index, index_controller::{update_file_store::UpdateFileStore, updates::status::{Enqueued, UpdateStatus}}};
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize)]
|
#[derive(Serialize, Deserialize)]
|
||||||
struct UpdateEntry {
|
struct UpdateEntry {
|
||||||
@ -16,9 +22,8 @@ struct UpdateEntry {
|
|||||||
impl UpdateStore {
|
impl UpdateStore {
|
||||||
pub fn dump(
|
pub fn dump(
|
||||||
&self,
|
&self,
|
||||||
uuids: &HashSet<Uuid>,
|
indexes: &[Index],
|
||||||
path: PathBuf,
|
path: PathBuf,
|
||||||
handle: Arc<HardStateIndexResolver>,
|
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let state_lock = self.state.write();
|
let state_lock = self.state.write();
|
||||||
state_lock.swap(State::Dumping);
|
state_lock.swap(State::Dumping);
|
||||||
@ -26,15 +31,11 @@ impl UpdateStore {
|
|||||||
// txn must *always* be acquired after state lock, or it will dead lock.
|
// txn must *always* be acquired after state lock, or it will dead lock.
|
||||||
let txn = self.env.write_txn()?;
|
let txn = self.env.write_txn()?;
|
||||||
|
|
||||||
let dump_path = path.join("updates");
|
let uuids = indexes.iter().map(|i| i.uuid).collect();
|
||||||
create_dir_all(&dump_path)?;
|
|
||||||
|
|
||||||
self.dump_updates(&txn, uuids, &dump_path)?;
|
self.dump_updates(&txn, &uuids, &path)?;
|
||||||
|
|
||||||
let fut = dump_indexes(uuids, handle, &path);
|
indexes.par_iter().try_for_each(|index| index.dump(&path)).unwrap();
|
||||||
tokio::runtime::Handle::current().block_on(fut)?;
|
|
||||||
|
|
||||||
state_lock.swap(State::Idle);
|
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
@ -45,58 +46,59 @@ impl UpdateStore {
|
|||||||
uuids: &HashSet<Uuid>,
|
uuids: &HashSet<Uuid>,
|
||||||
path: impl AsRef<Path>,
|
path: impl AsRef<Path>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
//let dump_data_path = path.as_ref().join("data.jsonl");
|
let mut dump_data_file = NamedTempFile::new()?;
|
||||||
//let mut dump_data_file = File::create(dump_data_path)?;
|
|
||||||
|
|
||||||
//let update_files_path = path.as_ref().join(super::UPDATE_DIR);
|
self.dump_pending(txn, uuids, &mut dump_data_file, &path)?;
|
||||||
//create_dir_all(&update_files_path)?;
|
self.dump_completed(txn, uuids, &mut dump_data_file)?;
|
||||||
|
|
||||||
//self.dump_pending(txn, uuids, &mut dump_data_file, &path)?;
|
let mut dst_path = path.as_ref().join("updates");
|
||||||
//self.dump_completed(txn, uuids, &mut dump_data_file)?;
|
create_dir_all(&dst_path)?;
|
||||||
|
dst_path.push("data.jsonl");
|
||||||
|
dump_data_file.persist(dst_path).unwrap();
|
||||||
|
|
||||||
//Ok(())
|
Ok(())
|
||||||
todo!()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn dump_pending(
|
fn dump_pending(
|
||||||
&self,
|
&self,
|
||||||
_txn: &RoTxn,
|
txn: &RoTxn,
|
||||||
_uuids: &HashSet<Uuid>,
|
uuids: &HashSet<Uuid>,
|
||||||
_file: &mut File,
|
mut file: impl Write,
|
||||||
_dst_path: impl AsRef<Path>,
|
dst_path: impl AsRef<Path>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
todo!()
|
let pendings = self.pending_queue.iter(txn)?.lazily_decode_data();
|
||||||
//let pendings = self.pending_queue.iter(txn)?.lazily_decode_data();
|
|
||||||
|
|
||||||
//for pending in pendings {
|
for pending in pendings {
|
||||||
//let ((_, uuid, _), data) = pending?;
|
let ((_, uuid, _), data) = pending?;
|
||||||
//if uuids.contains(&uuid) {
|
if uuids.contains(&uuid) {
|
||||||
//let update = data.decode()?;
|
let update = data.decode()?;
|
||||||
|
|
||||||
//if let Some(ref update_uuid) = update.content {
|
if let Enqueued {
|
||||||
//let src = super::update_uuid_to_file_path(&self.path, *update_uuid);
|
meta: RegisterUpdate::DocumentAddition {
|
||||||
//let dst = super::update_uuid_to_file_path(&dst_path, *update_uuid);
|
content_uuid, ..
|
||||||
//std::fs::copy(src, dst)?;
|
}, ..
|
||||||
//}
|
} = update {
|
||||||
|
self.update_file_store.dump(content_uuid, &dst_path).unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
//let update_json = UpdateEntry {
|
let update_json = UpdateEntry {
|
||||||
//uuid,
|
uuid,
|
||||||
//update: update.into(),
|
update: update.into(),
|
||||||
//};
|
};
|
||||||
|
|
||||||
//serde_json::to_writer(&mut file, &update_json)?;
|
serde_json::to_writer(&mut file, &update_json)?;
|
||||||
//file.write_all(b"\n")?;
|
file.write_all(b"\n")?;
|
||||||
//}
|
}
|
||||||
//}
|
}
|
||||||
|
|
||||||
//Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn dump_completed(
|
fn dump_completed(
|
||||||
&self,
|
&self,
|
||||||
txn: &RoTxn,
|
txn: &RoTxn,
|
||||||
uuids: &HashSet<Uuid>,
|
uuids: &HashSet<Uuid>,
|
||||||
mut file: &mut File,
|
mut file: impl Write,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let updates = self.updates.iter(txn)?.lazily_decode_data();
|
let updates = self.updates.iter(txn)?.lazily_decode_data();
|
||||||
|
|
||||||
@ -116,65 +118,35 @@ impl UpdateStore {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn load_dump(
|
pub fn load_dump(
|
||||||
_src: impl AsRef<Path>,
|
src: impl AsRef<Path>,
|
||||||
_dst: impl AsRef<Path>,
|
dst: impl AsRef<Path>,
|
||||||
_db_size: usize,
|
db_size: usize,
|
||||||
) -> anyhow::Result<()> {
|
) -> anyhow::Result<()> {
|
||||||
todo!()
|
|
||||||
//let dst_update_path = dst.as_ref().join("updates/");
|
|
||||||
//create_dir_all(&dst_update_path)?;
|
|
||||||
|
|
||||||
//let mut options = EnvOpenOptions::new();
|
println!("target path: {}", dst.as_ref().display());
|
||||||
//options.map_size(db_size as usize);
|
|
||||||
//let (store, _) = UpdateStore::new(options, &dst_update_path)?;
|
|
||||||
|
|
||||||
//let src_update_path = src.as_ref().join("updates");
|
let mut options = EnvOpenOptions::new();
|
||||||
//let update_data = File::open(&src_update_path.join("data.jsonl"))?;
|
options.map_size(db_size as usize);
|
||||||
//let mut update_data = BufReader::new(update_data);
|
|
||||||
|
|
||||||
//std::fs::create_dir_all(dst_update_path.join("update_files/"))?;
|
// create a dummy update fiel store, since it is not needed right now.
|
||||||
|
let tmp = TempDir::new().unwrap();
|
||||||
|
let update_file_store = UpdateFileStore::new(tmp.path()).unwrap();
|
||||||
|
let (store, _) = UpdateStore::new(options, &dst, update_file_store)?;
|
||||||
|
|
||||||
//let mut wtxn = store.env.write_txn()?;
|
let src_update_path = src.as_ref().join("updates");
|
||||||
//let mut line = String::new();
|
let update_data = File::open(&src_update_path.join("data.jsonl"))?;
|
||||||
//loop {
|
let update_data = BufReader::new(update_data);
|
||||||
//match update_data.read_line(&mut line) {
|
|
||||||
//Ok(0) => break,
|
|
||||||
//Ok(_) => {
|
|
||||||
//let UpdateEntry { uuid, update } = serde_json::from_str(&line)?;
|
|
||||||
//store.register_raw_updates(&mut wtxn, &update, uuid)?;
|
|
||||||
|
|
||||||
//// Copy ascociated update path if it exists
|
let stream = Deserializer::from_reader(update_data).into_iter::<UpdateEntry>();
|
||||||
//if let UpdateStatus::Enqueued(Enqueued {
|
let mut wtxn = store.env.write_txn()?;
|
||||||
//content: Some(uuid),
|
|
||||||
//..
|
|
||||||
//}) = update
|
|
||||||
//{
|
|
||||||
//let src = update_uuid_to_file_path(&src_update_path, uuid);
|
|
||||||
//let dst = update_uuid_to_file_path(&dst_update_path, uuid);
|
|
||||||
//std::fs::copy(src, dst)?;
|
|
||||||
//}
|
|
||||||
//}
|
|
||||||
//_ => break,
|
|
||||||
//}
|
|
||||||
|
|
||||||
//line.clear();
|
for entry in stream {
|
||||||
//}
|
let UpdateEntry { uuid, update } = entry?;
|
||||||
|
store.register_raw_updates(&mut wtxn, &update, uuid)?;
|
||||||
|
}
|
||||||
|
|
||||||
//wtxn.commit()?;
|
wtxn.commit()?;
|
||||||
|
|
||||||
//Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn dump_indexes(
|
|
||||||
_uuids: &HashSet<Uuid>,
|
|
||||||
_handle: Arc<HardStateIndexResolver>,
|
|
||||||
_path: impl AsRef<Path>,
|
|
||||||
) -> Result<()> {
|
|
||||||
todo!()
|
|
||||||
//for uuid in uuids {
|
|
||||||
//IndexMsg::dump(&handle, *uuid, path.as_ref().to_owned()).await?;
|
|
||||||
//}
|
|
||||||
|
|
||||||
//Ok(())
|
|
||||||
}
|
|
||||||
|
@ -262,28 +262,28 @@ impl UpdateStore {
|
|||||||
|
|
||||||
// /// Push already processed update in the UpdateStore without triggering the notification
|
// /// Push already processed update in the UpdateStore without triggering the notification
|
||||||
// /// process. This is useful for the dumps.
|
// /// process. This is useful for the dumps.
|
||||||
//pub fn register_raw_updates(
|
pub fn register_raw_updates(
|
||||||
//&self,
|
&self,
|
||||||
//wtxn: &mut heed::RwTxn,
|
wtxn: &mut heed::RwTxn,
|
||||||
//update: &UpdateStatus,
|
update: &UpdateStatus,
|
||||||
//index_uuid: Uuid,
|
index_uuid: Uuid,
|
||||||
//) -> heed::Result<()> {
|
) -> heed::Result<()> {
|
||||||
//match update {
|
match update {
|
||||||
//UpdateStatus::Enqueued(enqueued) => {
|
UpdateStatus::Enqueued(enqueued) => {
|
||||||
//let (global_id, _update_id) = self.next_update_id(wtxn, index_uuid)?;
|
let (global_id, _update_id) = self.next_update_id(wtxn, index_uuid)?;
|
||||||
//self.pending_queue.remap_key_type::<PendingKeyCodec>().put(
|
self.pending_queue.remap_key_type::<PendingKeyCodec>().put(
|
||||||
//wtxn,
|
wtxn,
|
||||||
//&(global_id, index_uuid, enqueued.id()),
|
&(global_id, index_uuid, enqueued.id()),
|
||||||
//enqueued,
|
enqueued,
|
||||||
//)?;
|
)?;
|
||||||
//}
|
}
|
||||||
//_ => {
|
_ => {
|
||||||
//let _update_id = self.next_update_id_raw(wtxn, index_uuid)?;
|
let _update_id = self.next_update_id_raw(wtxn, index_uuid)?;
|
||||||
//self.updates.put(wtxn, &(index_uuid, update.id()), update)?;
|
self.updates.put(wtxn, &(index_uuid, update.id()), update)?;
|
||||||
//}
|
}
|
||||||
//}
|
}
|
||||||
//Ok(())
|
Ok(())
|
||||||
//}
|
}
|
||||||
|
|
||||||
/// Executes the user provided function on the next pending update (the one with the lowest id).
|
/// Executes the user provided function on the next pending update (the one with the lowest id).
|
||||||
/// This is asynchronous as it let the user process the update with a read-only txn and
|
/// This is asynchronous as it let the user process the update with a read-only txn and
|
||||||
|
@ -8,6 +8,7 @@ pub mod index_controller;
|
|||||||
pub use index_controller::{IndexController as MeiliSearch, updates::RegisterUpdate};
|
pub use index_controller::{IndexController as MeiliSearch, updates::RegisterUpdate};
|
||||||
|
|
||||||
mod compression;
|
mod compression;
|
||||||
|
mod document_formats;
|
||||||
|
|
||||||
use walkdir::WalkDir;
|
use walkdir::WalkDir;
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user