Merge pull request #45 from meilisearch/infos-export-documents

Infos export documents
This commit is contained in:
Clément Renault 2020-12-02 10:50:54 +01:00 committed by GitHub
commit 0a63e69e04
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -135,15 +135,16 @@ enum Command {
word2: String, word2: String,
}, },
/// Outputs the words FST to disk. /// Outputs the words FST to standard output.
/// ///
/// One can use the FST binary helper to dissect and analyze it, /// One can use the FST binary helper to dissect and analyze it,
/// you can install it using `cargo install fst-bin`. /// you can install it using `cargo install fst-bin`.
ExportWordsFst { ExportWordsFst,
/// The path where the FST will be written.
#[structopt(short, long, default_value = "words.fst")] /// Outputs the documents as JSON lines to the standard output.
output: PathBuf, ///
}, /// All of the fields are extracted, not just the displayed ones.
ExportDocuments,
/// A command that patches the old external ids /// A command that patches the old external ids
/// into the new external ids format. /// into the new external ids format.
@ -182,14 +183,15 @@ pub fn run(opt: Opt) -> anyhow::Result<()> {
WordPairProximitiesDocids { full_display, word1, word2 } => { WordPairProximitiesDocids { full_display, word1, word2 } => {
word_pair_proximities_docids(&index, &rtxn, !full_display, word1, word2) word_pair_proximities_docids(&index, &rtxn, !full_display, word1, word2)
}, },
ExportWordsFst { output } => export_words_fst(&index, &rtxn, output), ExportWordsFst => export_words_fst(&index, &rtxn),
ExportDocuments => export_documents(&index, &rtxn),
PatchToNewExternalIds => { PatchToNewExternalIds => {
drop(rtxn); drop(rtxn);
let mut wtxn = index.write_txn()?; let mut wtxn = index.write_txn()?;
let result = patch_to_new_external_ids(&index, &mut wtxn); let result = patch_to_new_external_ids(&index, &mut wtxn);
wtxn.commit()?; wtxn.commit()?;
result result
} },
} }
} }
@ -479,15 +481,34 @@ fn facet_stats(index: &Index, rtxn: &heed::RoTxn, field_name: String) -> anyhow:
Ok(()) Ok(())
} }
fn export_words_fst(index: &Index, rtxn: &heed::RoTxn, output: PathBuf) -> anyhow::Result<()> { fn export_words_fst(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> {
use std::fs::File;
use std::io::Write as _; use std::io::Write as _;
let mut output = File::create(&output) let mut stdout = io::stdout();
.with_context(|| format!("failed to create {} file", output.display()))?;
let words_fst = index.words_fst(rtxn)?; let words_fst = index.words_fst(rtxn)?;
output.write_all(words_fst.as_fst().as_bytes())?; stdout.write_all(words_fst.as_fst().as_bytes())?;
Ok(())
}
fn export_documents(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> {
use std::io::{BufWriter, Write as _};
use crate::obkv_to_json;
let stdout = io::stdout();
let mut out = BufWriter::new(stdout);
let fields_ids_map = index.fields_ids_map(rtxn)?;
let displayed_fields: Vec<_> = fields_ids_map.iter().map(|(id, _name)| id).collect();
for result in index.documents.iter(rtxn)? {
let (_id, obkv) = result?;
let document = obkv_to_json(&displayed_fields, &fields_ids_map, obkv)?;
serde_json::to_writer(&mut out, &document)?;
writeln!(&mut out)?;
}
out.into_inner()?;
Ok(()) Ok(())
} }