implement index dump

This commit is contained in:
Marin Postma 2021-05-24 18:16:35 +02:00
parent 7ad553670f
commit 4acbe8e473
No known key found for this signature in database
GPG key ID: D5241F0C0C865F30
8 changed files with 79 additions and 55 deletions

View file

@ -1,8 +1,11 @@
use std::{collections::{BTreeSet, HashSet}, marker::PhantomData};
use std::{collections::{BTreeSet, HashSet}, io::Write, marker::PhantomData, path::{Path, PathBuf}};
use std::ops::Deref;
use std::sync::Arc;
use std::fs::File;
use anyhow::{bail, Context};
use heed::RoTxn;
use indexmap::IndexMap;
use milli::obkv_to_json;
use serde_json::{Map, Value};
@ -38,7 +41,10 @@ where
impl Index {
pub fn settings(&self) -> anyhow::Result<Settings<Checked>> {
let txn = self.read_txn()?;
self.settings_txn(&txn)
}
pub fn settings_txn(&self, txn: &RoTxn) -> anyhow::Result<Settings<Checked>> {
let displayed_attributes = self
.displayed_fields(&txn)?
.map(|fields| fields.into_iter().map(String::from).collect());
@ -161,4 +167,57 @@ impl Index {
displayed_fields_ids.retain(|fid| attributes_to_retrieve_ids.contains(fid));
Ok(displayed_fields_ids)
}
pub fn dump(&self, path: PathBuf) -> anyhow::Result<()> {
// acquire write txn make sure any ongoing write is finnished before we start.
let txn = self.env.write_txn()?;
self.dump_documents(&txn, &path)?;
self.dump_meta(&txn, &path)?;
Ok(())
}
fn dump_documents(&self, txn: &RoTxn, path: impl AsRef<Path>) -> anyhow::Result<()> {
println!("dumping documents");
let document_file_path = path.as_ref().join("documents.jsonl");
let mut document_file = File::create(&document_file_path)?;
let documents = self.all_documents(txn)?;
let fields_ids_map = self.fields_ids_map(txn)?;
// dump documents
let mut json_map = IndexMap::new();
for document in documents {
let (_, reader) = document?;
for (fid, bytes) in reader.iter() {
if let Some(name) = fields_ids_map.name(fid) {
json_map.insert(name, serde_json::from_slice::<serde_json::Value>(bytes)?);
}
}
serde_json::to_writer(&mut document_file, &json_map)?;
document_file.write(b"\n")?;
json_map.clear();
}
Ok(())
}
fn dump_meta(&self, txn: &RoTxn, path: impl AsRef<Path>) -> anyhow::Result<()> {
println!("dumping settings");
let meta_file_path = path.as_ref().join("meta.json");
let mut meta_file = File::create(&meta_file_path)?;
let settings = self.settings_txn(txn)?;
let json = serde_json::json!({
"settings": settings,
});
serde_json::to_writer(&mut meta_file, &json)?;
Ok(())
}
}