mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-24 21:50:07 +01:00
Merge pull request #86 from meilisearch/clean-up-infos-crate
Clean up the infos crate
This commit is contained in:
commit
68102fced8
@ -21,7 +21,6 @@ const DOCID_WORD_POSITIONS_DB_NAME: &str = "docid-word-positions";
|
||||
const WORD_PAIR_PROXIMITY_DOCIDS_DB_NAME: &str = "word-pair-proximity-docids";
|
||||
const WORD_PREFIX_PAIR_PROXIMITY_DOCIDS_DB_NAME: &str = "word-prefix-pair-proximity-docids";
|
||||
const DOCUMENTS_DB_NAME: &str = "documents";
|
||||
const USERS_IDS_DOCUMENTS_IDS: &[u8] = b"users-ids-documents-ids";
|
||||
|
||||
const ALL_DATABASE_NAMES: &[&str] = &[
|
||||
MAIN_DB_NAME,
|
||||
@ -172,25 +171,15 @@ enum Command {
|
||||
/// Outputs the documents as JSON lines to the standard output.
|
||||
///
|
||||
/// All of the fields are extracted, not just the displayed ones.
|
||||
ExportDocuments,
|
||||
|
||||
/// A command that patches the old external ids
|
||||
/// into the new external ids format.
|
||||
PatchToNewExternalIds,
|
||||
ExportDocuments {
|
||||
/// If defined, only retrieve the documents that corresponds to these internal ids.
|
||||
internal_documents_ids: Vec<u32>,
|
||||
},
|
||||
}
|
||||
|
||||
fn main() -> Result<(), ()> {
|
||||
fn main() -> anyhow::Result<()> {
|
||||
let opt = Opt::from_args();
|
||||
match run(opt) {
|
||||
Ok(()) => Ok(()),
|
||||
Err(e) => {
|
||||
eprintln!("{}", e);
|
||||
Err(())
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn run(opt: Opt) -> anyhow::Result<()> {
|
||||
stderrlog::new()
|
||||
.verbosity(opt.verbose)
|
||||
.show_level(false)
|
||||
@ -200,6 +189,11 @@ fn run(opt: Opt) -> anyhow::Result<()> {
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(opt.database_size.get_bytes() as usize);
|
||||
|
||||
// Return an error if the database does not exist.
|
||||
if !opt.database.exists() {
|
||||
anyhow::bail!("The database ({}) does not exist.", opt.database.display());
|
||||
}
|
||||
|
||||
// Open the LMDB database.
|
||||
let index = Index::new(options, opt.database)?;
|
||||
let rtxn = index.read_txn()?;
|
||||
@ -227,33 +221,12 @@ fn run(opt: Opt) -> anyhow::Result<()> {
|
||||
},
|
||||
ExportWordsFst => export_words_fst(&index, &rtxn),
|
||||
ExportWordsPrefixFst => export_words_prefix_fst(&index, &rtxn),
|
||||
ExportDocuments => export_documents(&index, &rtxn),
|
||||
PatchToNewExternalIds => {
|
||||
drop(rtxn);
|
||||
let mut wtxn = index.write_txn()?;
|
||||
let result = patch_to_new_external_ids(&index, &mut wtxn);
|
||||
wtxn.commit()?;
|
||||
result
|
||||
ExportDocuments { internal_documents_ids } => {
|
||||
export_documents(&index, &rtxn, internal_documents_ids)
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn patch_to_new_external_ids(index: &Index, wtxn: &mut heed::RwTxn) -> anyhow::Result<()> {
|
||||
use heed::types::ByteSlice;
|
||||
|
||||
if let Some(documents_ids) = index.main.get::<_, ByteSlice, ByteSlice>(wtxn, USERS_IDS_DOCUMENTS_IDS)? {
|
||||
let documents_ids = documents_ids.to_owned();
|
||||
index.main.put::<_, ByteSlice, ByteSlice>(
|
||||
wtxn,
|
||||
milli::index::HARD_EXTERNAL_DOCUMENTS_IDS_KEY.as_bytes(),
|
||||
&documents_ids,
|
||||
)?;
|
||||
index.main.delete::<_, ByteSlice>(wtxn, USERS_IDS_DOCUMENTS_IDS)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn most_common_words(index: &Index, rtxn: &heed::RoTxn, limit: usize) -> anyhow::Result<()> {
|
||||
use std::collections::BinaryHeap;
|
||||
use std::cmp::Reverse;
|
||||
@ -615,9 +588,9 @@ fn export_words_prefix_fst(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn export_documents(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> {
|
||||
fn export_documents(index: &Index, rtxn: &heed::RoTxn, internal_ids: Vec<u32>) -> anyhow::Result<()> {
|
||||
use std::io::{BufWriter, Write as _};
|
||||
use milli::obkv_to_json;
|
||||
use milli::{BEU32, obkv_to_json};
|
||||
|
||||
let stdout = io::stdout();
|
||||
let mut out = BufWriter::new(stdout);
|
||||
@ -625,8 +598,18 @@ fn export_documents(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> {
|
||||
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||
let displayed_fields: Vec<_> = fields_ids_map.iter().map(|(id, _name)| id).collect();
|
||||
|
||||
for result in index.documents.iter(rtxn)? {
|
||||
let (_id, obkv) = result?;
|
||||
let iter: Box<Iterator<Item = _>> = if internal_ids.is_empty() {
|
||||
Box::new(index.documents.iter(rtxn)?.map(|result| {
|
||||
result.map(|(_id, obkv)| obkv)
|
||||
}))
|
||||
} else {
|
||||
Box::new(internal_ids.into_iter().flat_map(|id| {
|
||||
index.documents.get(rtxn, &BEU32::new(id)).transpose()
|
||||
}))
|
||||
};
|
||||
|
||||
for result in iter {
|
||||
let obkv = result?;
|
||||
let document = obkv_to_json(&displayed_fields, &fields_ids_map, obkv)?;
|
||||
serde_json::to_writer(&mut out, &document)?;
|
||||
writeln!(&mut out)?;
|
||||
|
@ -39,25 +39,20 @@ pub struct Opt {
|
||||
print_facet_distribution: bool,
|
||||
}
|
||||
|
||||
fn main() -> Result<(), ()> {
|
||||
fn main() -> anyhow::Result<()> {
|
||||
let opt = Opt::from_args();
|
||||
match run(opt) {
|
||||
Ok(()) => Ok(()),
|
||||
Err(e) => {
|
||||
eprintln!("{}", e);
|
||||
Err(())
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
fn run(opt: Opt) -> anyhow::Result<()> {
|
||||
stderrlog::new()
|
||||
.verbosity(opt.verbose)
|
||||
.show_level(false)
|
||||
.timestamp(stderrlog::Timestamp::Off)
|
||||
.init()?;
|
||||
|
||||
std::fs::create_dir_all(&opt.database)?;
|
||||
// Return an error if the database does not exist.
|
||||
if !opt.database.exists() {
|
||||
anyhow::bail!("The database ({}) does not exist.", opt.database.display());
|
||||
}
|
||||
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(opt.database_size.get_bytes() as usize);
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user