2021-09-28 11:59:55 +02:00
|
|
|
use std::fs::{create_dir_all, File};
|
|
|
|
use std::io::{BufReader, Seek, SeekFrom, Write};
|
2021-05-31 16:40:59 +02:00
|
|
|
use std::path::Path;
|
|
|
|
|
2021-09-28 11:59:55 +02:00
|
|
|
use anyhow::Context;
|
2021-05-26 22:52:06 +02:00
|
|
|
use indexmap::IndexMap;
|
2021-09-28 11:59:55 +02:00
|
|
|
use milli::documents::DocumentBatchReader;
|
2022-03-16 13:45:58 +01:00
|
|
|
use milli::heed::{EnvOpenOptions, RoTxn};
|
2022-01-19 11:21:19 +01:00
|
|
|
use milli::update::{IndexDocumentsConfig, IndexerConfig};
|
2021-05-26 22:52:06 +02:00
|
|
|
use serde::{Deserialize, Serialize};
|
|
|
|
|
2022-01-13 12:30:35 +01:00
|
|
|
use crate::document_formats::read_ndjson;
|
2021-09-28 11:59:55 +02:00
|
|
|
use crate::index::updates::apply_settings_to_builder;
|
2021-05-26 22:52:06 +02:00
|
|
|
|
2021-06-17 14:36:32 +02:00
|
|
|
use super::error::Result;
|
2021-10-04 12:15:21 +02:00
|
|
|
use super::{index::Index, Settings, Unchecked};
|
2021-05-26 22:52:06 +02:00
|
|
|
|
|
|
|
#[derive(Serialize, Deserialize)]
|
|
|
|
struct DumpMeta {
|
2021-05-27 14:30:20 +02:00
|
|
|
settings: Settings<Unchecked>,
|
2021-05-26 22:52:06 +02:00
|
|
|
primary_key: Option<String>,
|
|
|
|
}
|
|
|
|
|
2021-05-31 16:03:39 +02:00
|
|
|
const META_FILE_NAME: &str = "meta.json";
|
|
|
|
const DATA_FILE_NAME: &str = "documents.jsonl";
|
2021-05-26 22:52:06 +02:00
|
|
|
|
|
|
|
impl Index {
|
2021-06-14 21:26:35 +02:00
|
|
|
pub fn dump(&self, path: impl AsRef<Path>) -> Result<()> {
|
2021-06-01 11:18:37 +02:00
|
|
|
// acquire write txn make sure any ongoing write is finished before we start.
|
2021-05-26 22:52:06 +02:00
|
|
|
let txn = self.env.write_txn()?;
|
2022-01-12 15:57:31 +01:00
|
|
|
let path = path.as_ref().join(format!("indexes/{}", self.uuid));
|
2021-09-28 11:59:55 +02:00
|
|
|
|
|
|
|
create_dir_all(&path)?;
|
2021-05-26 22:52:06 +02:00
|
|
|
|
|
|
|
self.dump_documents(&txn, &path)?;
|
|
|
|
self.dump_meta(&txn, &path)?;
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
2021-06-14 21:26:35 +02:00
|
|
|
fn dump_documents(&self, txn: &RoTxn, path: impl AsRef<Path>) -> Result<()> {
|
2021-05-26 22:52:06 +02:00
|
|
|
let document_file_path = path.as_ref().join(DATA_FILE_NAME);
|
|
|
|
let mut document_file = File::create(&document_file_path)?;
|
|
|
|
|
2021-06-17 14:36:32 +02:00
|
|
|
let documents = self.all_documents(txn)?;
|
2021-05-26 22:52:06 +02:00
|
|
|
let fields_ids_map = self.fields_ids_map(txn)?;
|
|
|
|
|
|
|
|
// dump documents
|
|
|
|
let mut json_map = IndexMap::new();
|
|
|
|
for document in documents {
|
|
|
|
let (_, reader) = document?;
|
|
|
|
|
|
|
|
for (fid, bytes) in reader.iter() {
|
|
|
|
if let Some(name) = fields_ids_map.name(fid) {
|
|
|
|
json_map.insert(name, serde_json::from_slice::<serde_json::Value>(bytes)?);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
serde_json::to_writer(&mut document_file, &json_map)?;
|
2021-05-31 16:40:59 +02:00
|
|
|
document_file.write_all(b"\n")?;
|
2021-05-26 22:52:06 +02:00
|
|
|
|
|
|
|
json_map.clear();
|
|
|
|
}
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
2021-06-14 21:26:35 +02:00
|
|
|
fn dump_meta(&self, txn: &RoTxn, path: impl AsRef<Path>) -> Result<()> {
|
2021-05-26 22:52:06 +02:00
|
|
|
let meta_file_path = path.as_ref().join(META_FILE_NAME);
|
|
|
|
let mut meta_file = File::create(&meta_file_path)?;
|
|
|
|
|
2021-05-27 14:30:20 +02:00
|
|
|
let settings = self.settings_txn(txn)?.into_unchecked();
|
2021-05-26 22:52:06 +02:00
|
|
|
let primary_key = self.primary_key(txn)?.map(String::from);
|
2021-05-31 10:58:51 +02:00
|
|
|
let meta = DumpMeta {
|
|
|
|
settings,
|
|
|
|
primary_key,
|
|
|
|
};
|
2021-05-26 22:52:06 +02:00
|
|
|
|
|
|
|
serde_json::to_writer(&mut meta_file, &meta)?;
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn load_dump(
|
2021-09-28 11:59:55 +02:00
|
|
|
src: impl AsRef<Path>,
|
|
|
|
dst: impl AsRef<Path>,
|
|
|
|
size: usize,
|
2022-01-19 11:21:19 +01:00
|
|
|
indexer_config: &IndexerConfig,
|
2021-06-15 17:39:07 +02:00
|
|
|
) -> anyhow::Result<()> {
|
2021-09-28 11:59:55 +02:00
|
|
|
let dir_name = src
|
|
|
|
.as_ref()
|
|
|
|
.file_name()
|
|
|
|
.with_context(|| format!("invalid dump index: {}", src.as_ref().display()))?;
|
|
|
|
|
|
|
|
let dst_dir_path = dst.as_ref().join("indexes").join(dir_name);
|
|
|
|
create_dir_all(&dst_dir_path)?;
|
|
|
|
|
|
|
|
let meta_path = src.as_ref().join(META_FILE_NAME);
|
2021-09-29 15:41:25 +02:00
|
|
|
let meta_file = File::open(meta_path)?;
|
2021-09-28 11:59:55 +02:00
|
|
|
let DumpMeta {
|
|
|
|
settings,
|
|
|
|
primary_key,
|
2021-09-29 15:41:25 +02:00
|
|
|
} = serde_json::from_reader(meta_file)?;
|
2021-09-28 11:59:55 +02:00
|
|
|
let settings = settings.check();
|
|
|
|
|
|
|
|
let mut options = EnvOpenOptions::new();
|
|
|
|
options.map_size(size);
|
|
|
|
let index = milli::Index::new(options, &dst_dir_path)?;
|
|
|
|
|
|
|
|
let mut txn = index.write_txn()?;
|
|
|
|
|
|
|
|
// Apply settings first
|
2022-01-19 11:21:19 +01:00
|
|
|
let mut builder = milli::update::Settings::new(&mut txn, &index, indexer_config);
|
2021-09-28 11:59:55 +02:00
|
|
|
|
|
|
|
if let Some(primary_key) = primary_key {
|
|
|
|
builder.set_primary_key(primary_key);
|
|
|
|
}
|
|
|
|
|
|
|
|
apply_settings_to_builder(&settings, &mut builder);
|
|
|
|
|
2021-12-02 16:03:26 +01:00
|
|
|
builder.execute(|_| ())?;
|
2021-09-28 11:59:55 +02:00
|
|
|
|
|
|
|
let document_file_path = src.as_ref().join(DATA_FILE_NAME);
|
|
|
|
let reader = BufReader::new(File::open(&document_file_path)?);
|
|
|
|
|
|
|
|
let mut tmp_doc_file = tempfile::tempfile()?;
|
|
|
|
|
2022-01-05 14:56:12 +01:00
|
|
|
let empty = match read_ndjson(reader, &mut tmp_doc_file) {
|
|
|
|
// if there was no document in the file it's because the index was empty
|
2022-01-13 12:30:35 +01:00
|
|
|
Ok(0) => true,
|
2022-01-05 14:56:12 +01:00
|
|
|
Ok(_) => false,
|
|
|
|
Err(e) => return Err(e.into()),
|
|
|
|
};
|
2021-09-28 11:59:55 +02:00
|
|
|
|
2022-01-05 14:56:12 +01:00
|
|
|
if !empty {
|
|
|
|
tmp_doc_file.seek(SeekFrom::Start(0))?;
|
2021-09-28 11:59:55 +02:00
|
|
|
|
2022-01-05 14:56:12 +01:00
|
|
|
let documents_reader = DocumentBatchReader::from_reader(tmp_doc_file)?;
|
2021-09-28 11:59:55 +02:00
|
|
|
|
2022-01-05 14:56:12 +01:00
|
|
|
//If the document file is empty, we don't perform the document addition, to prevent
|
|
|
|
//a primary key error to be thrown.
|
2022-01-19 11:21:19 +01:00
|
|
|
let config = IndexDocumentsConfig::default();
|
|
|
|
let mut builder = milli::update::IndexDocuments::new(
|
|
|
|
&mut txn,
|
|
|
|
&index,
|
|
|
|
indexer_config,
|
|
|
|
config,
|
|
|
|
|_| (),
|
|
|
|
);
|
|
|
|
builder.add_documents(documents_reader)?;
|
|
|
|
builder.execute()?;
|
2021-09-28 11:59:55 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
txn.commit()?;
|
|
|
|
index.prepare_for_closing().wait();
|
|
|
|
|
|
|
|
Ok(())
|
2021-05-26 22:52:06 +02:00
|
|
|
}
|
|
|
|
}
|