mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 04:17:10 +02:00
restore dumps
This commit is contained in:
parent
90018755c5
commit
6a1964f146
13 changed files with 395 additions and 301 deletions
|
@ -1,12 +1,18 @@
|
|||
use std::fs::File;
|
||||
use std::io::Write;
|
||||
use std::fs::{create_dir_all, File};
|
||||
use std::io::{BufReader, Seek, SeekFrom, Write};
|
||||
use std::path::Path;
|
||||
|
||||
use heed::RoTxn;
|
||||
use anyhow::Context;
|
||||
use heed::{EnvOpenOptions, RoTxn};
|
||||
use indexmap::IndexMap;
|
||||
use milli::documents::DocumentBatchReader;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::Value;
|
||||
|
||||
use crate::options::IndexerOpts;
|
||||
use crate::document_formats::read_jsonl;
|
||||
use crate::index::update_handler::UpdateHandler;
|
||||
use crate::index::updates::apply_settings_to_builder;
|
||||
use crate::index_controller::{asc_ranking_rule, desc_ranking_rule};
|
||||
|
||||
use super::error::Result;
|
||||
use super::{Index, Settings, Unchecked};
|
||||
|
@ -24,6 +30,11 @@ impl Index {
|
|||
pub fn dump(&self, path: impl AsRef<Path>) -> Result<()> {
|
||||
// acquire write txn make sure any ongoing write is finished before we start.
|
||||
let txn = self.env.write_txn()?;
|
||||
let path = path
|
||||
.as_ref()
|
||||
.join(format!("indexes/{}", self.uuid.to_string()));
|
||||
|
||||
create_dir_all(&path)?;
|
||||
|
||||
self.dump_documents(&txn, &path)?;
|
||||
self.dump_meta(&txn, &path)?;
|
||||
|
@ -75,92 +86,101 @@ impl Index {
|
|||
}
|
||||
|
||||
pub fn load_dump(
|
||||
_src: impl AsRef<Path>,
|
||||
_dst: impl AsRef<Path>,
|
||||
_size: usize,
|
||||
_indexing_options: &IndexerOpts,
|
||||
src: impl AsRef<Path>,
|
||||
dst: impl AsRef<Path>,
|
||||
size: usize,
|
||||
update_handler: &UpdateHandler,
|
||||
) -> anyhow::Result<()> {
|
||||
//let dir_name = src
|
||||
//.as_ref()
|
||||
//.file_name()
|
||||
//.with_context(|| format!("invalid dump index: {}", src.as_ref().display()))?;
|
||||
let dir_name = src
|
||||
.as_ref()
|
||||
.file_name()
|
||||
.with_context(|| format!("invalid dump index: {}", src.as_ref().display()))?;
|
||||
|
||||
//let dst_dir_path = dst.as_ref().join("indexes").join(dir_name);
|
||||
//create_dir_all(&dst_dir_path)?;
|
||||
let dst_dir_path = dst.as_ref().join("indexes").join(dir_name);
|
||||
create_dir_all(&dst_dir_path)?;
|
||||
|
||||
//let meta_path = src.as_ref().join(META_FILE_NAME);
|
||||
//let mut meta_file = File::open(meta_path)?;
|
||||
let meta_path = src.as_ref().join(META_FILE_NAME);
|
||||
let mut meta_file = File::open(meta_path)?;
|
||||
|
||||
//// We first deserialize the dump meta into a serde_json::Value and change
|
||||
//// the custom ranking rules settings from the old format to the new format.
|
||||
//let mut meta: Value = serde_json::from_reader(&mut meta_file)?;
|
||||
//if let Some(ranking_rules) = meta.pointer_mut("/settings/rankingRules") {
|
||||
//convert_custom_ranking_rules(ranking_rules);
|
||||
//}
|
||||
// We first deserialize the dump meta into a serde_json::Value and change
|
||||
// the custom ranking rules settings from the old format to the new format.
|
||||
let mut meta: Value = serde_json::from_reader(&mut meta_file)?;
|
||||
if let Some(ranking_rules) = meta.pointer_mut("/settings/rankingRules") {
|
||||
convert_custom_ranking_rules(ranking_rules);
|
||||
}
|
||||
|
||||
//// Then we serialize it back into a vec to deserialize it
|
||||
//// into a `DumpMeta` struct with the newly patched `rankingRules` format.
|
||||
//let patched_meta = serde_json::to_vec(&meta)?;
|
||||
// Then we serialize it back into a vec to deserialize it
|
||||
// into a `DumpMeta` struct with the newly patched `rankingRules` format.
|
||||
let patched_meta = serde_json::to_vec(&meta)?;
|
||||
|
||||
//let DumpMeta {
|
||||
//settings,
|
||||
//primary_key,
|
||||
//} = serde_json::from_slice(&patched_meta)?;
|
||||
//let settings = settings.check();
|
||||
//let index = Self::open(&dst_dir_path, size)?;
|
||||
//let mut txn = index.write_txn()?;
|
||||
let DumpMeta {
|
||||
settings,
|
||||
primary_key,
|
||||
} = serde_json::from_slice(&patched_meta)?;
|
||||
let settings = settings.check();
|
||||
|
||||
//let handler = UpdateHandler::new(indexing_options)?;
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(size);
|
||||
let index = milli::Index::new(options, &dst_dir_path)?;
|
||||
|
||||
//index.update_settings_txn(&mut txn, &settings, handler.update_builder(0))?;
|
||||
let mut txn = index.write_txn()?;
|
||||
|
||||
//let document_file_path = src.as_ref().join(DATA_FILE_NAME);
|
||||
//let reader = File::open(&document_file_path)?;
|
||||
//let mut reader = BufReader::new(reader);
|
||||
//reader.fill_buf()?;
|
||||
// If the document file is empty, we don't perform the document addition, to prevent
|
||||
// a primary key error to be thrown.
|
||||
// Apply settings first
|
||||
let builder = update_handler.update_builder(0);
|
||||
let mut builder = builder.settings(&mut txn, &index);
|
||||
|
||||
todo!("fix obk document dumps")
|
||||
//if !reader.buffer().is_empty() {
|
||||
//index.update_documents_txn(
|
||||
//&mut txn,
|
||||
//IndexDocumentsMethod::UpdateDocuments,
|
||||
//Some(reader),
|
||||
//handler.update_builder(0),
|
||||
//primary_key.as_deref(),
|
||||
//)?;
|
||||
//}
|
||||
if let Some(primary_key) = primary_key {
|
||||
builder.set_primary_key(primary_key);
|
||||
}
|
||||
|
||||
//txn.commit()?;
|
||||
apply_settings_to_builder(&settings, &mut builder);
|
||||
|
||||
//match Arc::try_unwrap(index.0) {
|
||||
//Ok(inner) => inner.prepare_for_closing().wait(),
|
||||
//Err(_) => bail!("Could not close index properly."),
|
||||
//}
|
||||
builder.execute(|_, _| ())?;
|
||||
|
||||
//Ok(())
|
||||
let document_file_path = src.as_ref().join(DATA_FILE_NAME);
|
||||
let reader = BufReader::new(File::open(&document_file_path)?);
|
||||
|
||||
let mut tmp_doc_file = tempfile::tempfile()?;
|
||||
|
||||
read_jsonl(reader, &mut tmp_doc_file)?;
|
||||
|
||||
tmp_doc_file.seek(SeekFrom::Start(0))?;
|
||||
|
||||
let documents_reader = DocumentBatchReader::from_reader(tmp_doc_file)?;
|
||||
|
||||
//If the document file is empty, we don't perform the document addition, to prevent
|
||||
//a primary key error to be thrown.
|
||||
if !documents_reader.is_empty() {
|
||||
let builder = update_handler.update_builder(0).index_documents(&mut txn, &index);
|
||||
builder.execute(documents_reader, |_, _| ())?;
|
||||
}
|
||||
|
||||
txn.commit()?;
|
||||
|
||||
index.prepare_for_closing().wait();
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
// /// Converts the ranking rules from the format `asc(_)`, `desc(_)` to the format `_:asc`, `_:desc`.
|
||||
// ///
|
||||
// /// This is done for compatibility reasons, and to avoid a new dump version,
|
||||
// /// since the new syntax was introduced soon after the new dump version.
|
||||
//fn convert_custom_ranking_rules(ranking_rules: &mut Value) {
|
||||
//*ranking_rules = match ranking_rules.take() {
|
||||
//Value::Array(values) => values
|
||||
//.into_iter()
|
||||
//.filter_map(|value| match value {
|
||||
//Value::String(s) if s.starts_with("asc") => asc_ranking_rule(&s)
|
||||
//.map(|f| format!("{}:asc", f))
|
||||
//.map(Value::String),
|
||||
//Value::String(s) if s.starts_with("desc") => desc_ranking_rule(&s)
|
||||
//.map(|f| format!("{}:desc", f))
|
||||
//.map(Value::String),
|
||||
//otherwise => Some(otherwise),
|
||||
//})
|
||||
//.collect(),
|
||||
//otherwise => otherwise,
|
||||
//}
|
||||
//}
|
||||
/// Converts the ranking rules from the format `asc(_)`, `desc(_)` to the format `_:asc`, `_:desc`.
|
||||
///
|
||||
/// This is done for compatibility reasons, and to avoid a new dump version,
|
||||
/// since the new syntax was introduced soon after the new dump version.
|
||||
fn convert_custom_ranking_rules(ranking_rules: &mut Value) {
|
||||
*ranking_rules = match ranking_rules.take() {
|
||||
Value::Array(values) => values
|
||||
.into_iter()
|
||||
.filter_map(|value| match value {
|
||||
Value::String(s) if s.starts_with("asc") => asc_ranking_rule(&s)
|
||||
.map(|f| format!("{}:asc", f))
|
||||
.map(Value::String),
|
||||
Value::String(s) if s.starts_with("desc") => desc_ranking_rule(&s)
|
||||
.map(|f| format!("{}:desc", f))
|
||||
.map(Value::String),
|
||||
otherwise => Some(otherwise),
|
||||
})
|
||||
.collect(),
|
||||
otherwise => otherwise,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -266,59 +266,7 @@ impl Index {
|
|||
// We must use the write transaction of the update here.
|
||||
let mut builder = update_builder.settings(txn, self);
|
||||
|
||||
match settings.searchable_attributes {
|
||||
Setting::Set(ref names) => builder.set_searchable_fields(names.clone()),
|
||||
Setting::Reset => builder.reset_searchable_fields(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match settings.displayed_attributes {
|
||||
Setting::Set(ref names) => builder.set_displayed_fields(names.clone()),
|
||||
Setting::Reset => builder.reset_displayed_fields(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match settings.filterable_attributes {
|
||||
Setting::Set(ref facets) => {
|
||||
builder.set_filterable_fields(facets.clone().into_iter().collect())
|
||||
}
|
||||
Setting::Reset => builder.reset_filterable_fields(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match settings.sortable_attributes {
|
||||
Setting::Set(ref fields) => {
|
||||
builder.set_sortable_fields(fields.iter().cloned().collect())
|
||||
}
|
||||
Setting::Reset => builder.reset_sortable_fields(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match settings.ranking_rules {
|
||||
Setting::Set(ref criteria) => builder.set_criteria(criteria.clone()),
|
||||
Setting::Reset => builder.reset_criteria(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match settings.stop_words {
|
||||
Setting::Set(ref stop_words) => builder.set_stop_words(stop_words.clone()),
|
||||
Setting::Reset => builder.reset_stop_words(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match settings.synonyms {
|
||||
Setting::Set(ref synonyms) => {
|
||||
builder.set_synonyms(synonyms.clone().into_iter().collect())
|
||||
}
|
||||
Setting::Reset => builder.reset_synonyms(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match settings.distinct_attribute {
|
||||
Setting::Set(ref attr) => builder.set_distinct_field(attr.clone()),
|
||||
Setting::Reset => builder.reset_distinct_field(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
apply_settings_to_builder(settings, &mut builder);
|
||||
|
||||
builder.execute(|indexing_step, update_id| {
|
||||
debug!("update {}: {:?}", update_id, indexing_step)
|
||||
|
@ -328,6 +276,62 @@ impl Index {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn apply_settings_to_builder(settings: &Settings<Checked>, builder: &mut milli::update::Settings) {
|
||||
match settings.searchable_attributes {
|
||||
Setting::Set(ref names) => builder.set_searchable_fields(names.clone()),
|
||||
Setting::Reset => builder.reset_searchable_fields(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match settings.displayed_attributes {
|
||||
Setting::Set(ref names) => builder.set_displayed_fields(names.clone()),
|
||||
Setting::Reset => builder.reset_displayed_fields(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match settings.filterable_attributes {
|
||||
Setting::Set(ref facets) => {
|
||||
builder.set_filterable_fields(facets.clone().into_iter().collect())
|
||||
}
|
||||
Setting::Reset => builder.reset_filterable_fields(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match settings.sortable_attributes {
|
||||
Setting::Set(ref fields) => {
|
||||
builder.set_sortable_fields(fields.iter().cloned().collect())
|
||||
}
|
||||
Setting::Reset => builder.reset_sortable_fields(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match settings.ranking_rules {
|
||||
Setting::Set(ref criteria) => builder.set_criteria(criteria.clone()),
|
||||
Setting::Reset => builder.reset_criteria(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match settings.stop_words {
|
||||
Setting::Set(ref stop_words) => builder.set_stop_words(stop_words.clone()),
|
||||
Setting::Reset => builder.reset_stop_words(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match settings.synonyms {
|
||||
Setting::Set(ref synonyms) => {
|
||||
builder.set_synonyms(synonyms.clone().into_iter().collect())
|
||||
}
|
||||
Setting::Reset => builder.reset_synonyms(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
|
||||
match settings.distinct_attribute {
|
||||
Setting::Set(ref attr) => builder.set_distinct_field(attr.clone()),
|
||||
Setting::Reset => builder.reset_distinct_field(),
|
||||
Setting::NotSet => (),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue