restore dump v1

This commit is contained in:
mpostma 2021-09-28 19:49:25 +02:00
parent 3747f5bdd8
commit df4e9f4e1e
2 changed files with 64 additions and 48 deletions

View File

@ -14,7 +14,7 @@ use serde::{Serialize, Deserialize};
use error::Result; use error::Result;
pub use search::{default_crop_length, SearchQuery, SearchResult, DEFAULT_SEARCH_LIMIT}; pub use search::{default_crop_length, SearchQuery, SearchResult, DEFAULT_SEARCH_LIMIT};
pub use updates::{Checked, Facets, Settings, Unchecked}; pub use updates::{Checked, Facets, Settings, Unchecked, apply_settings_to_builder};
use uuid::Uuid; use uuid::Uuid;
use crate::EnvSizer; use crate::EnvSizer;

View File

@ -1,12 +1,19 @@
use std::collections::{BTreeMap, BTreeSet}; use std::collections::{BTreeMap, BTreeSet};
use std::fs::{File, create_dir_all};
use std::io::{BufReader, Seek, SeekFrom};
use std::marker::PhantomData; use std::marker::PhantomData;
use std::path::Path; use std::path::Path;
use heed::EnvOpenOptions;
use log::{error, info, warn}; use log::{error, info, warn};
use milli::documents::DocumentBatchReader;
use milli::update::Setting; use milli::update::Setting;
use serde::{Deserialize, Deserializer, Serialize}; use serde::{Deserialize, Deserializer, Serialize};
use uuid::Uuid; use uuid::Uuid;
use crate::document_formats::read_jsonl;
use crate::index::apply_settings_to_builder;
use crate::index::update_handler::UpdateHandler;
use crate::index_controller::index_resolver::uuid_store::HeedUuidStore; use crate::index_controller::index_resolver::uuid_store::HeedUuidStore;
use crate::index_controller::{self, IndexMetadata}; use crate::index_controller::{self, IndexMetadata};
use crate::index_controller::{asc_ranking_rule, desc_ranking_rule}; use crate::index_controller::{asc_ranking_rule, desc_ranking_rule};
@ -83,57 +90,66 @@ struct Settings {
} }
fn load_index( fn load_index(
_src: impl AsRef<Path>, src: impl AsRef<Path>,
_dst: impl AsRef<Path>, dst: impl AsRef<Path>,
_uuid: Uuid, uuid: Uuid,
_primary_key: Option<&str>, primary_key: Option<&str>,
_size: usize, size: usize,
_indexer_options: &IndexerOpts, indexer_options: &IndexerOpts,
) -> anyhow::Result<()> { ) -> anyhow::Result<()> {
todo!("fix dump obkv documents") let index_path = dst.as_ref().join(&format!("indexes/index-{}", uuid));
//let index_path = dst.as_ref().join(&format!("indexes/index-{}", uuid));
//create_dir_all(&index_path)?; create_dir_all(&index_path)?;
//let mut options = EnvOpenOptions::new(); let mut options = EnvOpenOptions::new();
//options.map_size(size); options.map_size(size);
//let index = milli::Index::new(options, index_path)?; let index = milli::Index::new(options, index_path)?;
//let index = Index(Arc::new(index));
//// extract `settings.json` file and import content let update_handler = UpdateHandler::new(indexer_options)?;
//let settings = import_settings(&src)?;
//let settings: index_controller::Settings<Unchecked> = settings.into();
//let mut txn = index.write_txn()?; let mut txn = index.write_txn()?;
// extract `settings.json` file and import content
let settings = import_settings(&src)?;
let settings: index_controller::Settings<Unchecked> = settings.into();
//let handler = UpdateHandler::new(indexer_options)?; let handler = UpdateHandler::new(indexer_options)?;
//index.update_settings_txn(&mut txn, &settings.check(), handler.update_builder(0))?; let mut builder = handler.update_builder(0).settings(&mut txn, &index);
//let file = File::open(&src.as_ref().join("documents.jsonl"))?; if let Some(primary_key) = primary_key {
//let mut reader = std::io::BufReader::new(file); builder.set_primary_key(primary_key.to_string());
//reader.fill_buf()?; }
//if !reader.buffer().is_empty() {
//index.update_documents_txn(
//&mut txn,
//IndexDocumentsMethod::ReplaceDocuments,
//Some(reader),
//handler.update_builder(0),
//primary_key,
//)?;
//}
//txn.commit()?; apply_settings_to_builder(&settings.check(), &mut builder);
//// Finaly, we extract the original milli::Index and close it builder.execute(|_, _| ())?;
//Arc::try_unwrap(index.0)
//.map_err(|_e| "Couldn't close the index properly")
//.unwrap()
//.prepare_for_closing()
//.wait();
//// Updates are ignored in dumps V1. let reader = BufReader::new(File::open(&src.as_ref().join("documents.jsonl"))?);
//Ok(()) let mut tmp_doc_file = tempfile::tempfile()?;
read_jsonl(reader, &mut tmp_doc_file)?;
tmp_doc_file.seek(SeekFrom::Start(0))?;
let documents_reader = DocumentBatchReader::from_reader(tmp_doc_file)?;
//If the document file is empty, we don't perform the document addition, to prevent
//a primary key error to be thrown.
if !documents_reader.is_empty() {
let builder = update_handler.update_builder(0).index_documents(&mut txn, &index);
builder.execute(documents_reader, |_, _| ())?;
}
txn.commit()?;
// Finaly, we extract the original milli::Index and close it
index
.prepare_for_closing()
.wait();
// Updates are ignored in dumps V1.
Ok(())
} }
/// we need to **always** be able to convert the old settings to the settings currently being used /// we need to **always** be able to convert the old settings to the settings currently being used
@ -201,14 +217,14 @@ impl From<Settings> for index_controller::Settings<Unchecked> {
} }
// /// Extract Settings from `settings.json` file present at provided `dir_path` // /// Extract Settings from `settings.json` file present at provided `dir_path`
//fn import_settings(dir_path: impl AsRef<Path>) -> anyhow::Result<Settings> { fn import_settings(dir_path: impl AsRef<Path>) -> anyhow::Result<Settings> {
//let path = dir_path.as_ref().join("settings.json"); let path = dir_path.as_ref().join("settings.json");
//let file = File::open(path)?; let file = File::open(path)?;
//let reader = std::io::BufReader::new(file); let reader = std::io::BufReader::new(file);
//let metadata = serde_json::from_reader(reader)?; let metadata = serde_json::from_reader(reader)?;
//Ok(metadata) Ok(metadata)
//} }
#[cfg(test)] #[cfg(test)]
mod test { mod test {