mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-22 12:54:26 +01:00
Introduce the words-docids command for the infos binary
This commit is contained in:
parent
5664c37539
commit
ad11c5fb3f
1
.gitignore
vendored
1
.gitignore
vendored
@ -1,3 +1,4 @@
|
||||
/target
|
||||
*.csv
|
||||
*.mmdb
|
||||
*.svg
|
||||
|
@ -52,6 +52,16 @@ enum Command {
|
||||
limit: usize,
|
||||
},
|
||||
|
||||
/// Outputs a CSV with the documents ids where the given words appears.
|
||||
WordsDocids {
|
||||
/// Display the whole documents ids in details.
|
||||
#[structopt(long)]
|
||||
full_display: bool,
|
||||
|
||||
/// The words to display the documents ids of.
|
||||
words: Vec<String>,
|
||||
},
|
||||
|
||||
/// Outputs the total size of all the docid-word-positions keys and values.
|
||||
TotalDocidWordPositionsSize,
|
||||
|
||||
@ -93,6 +103,7 @@ fn main() -> anyhow::Result<()> {
|
||||
match opt.command {
|
||||
MostCommonWords { limit } => most_common_words(&index, &rtxn, limit),
|
||||
BiggestValues { limit } => biggest_value_sizes(&index, &rtxn, limit),
|
||||
WordsDocids { full_display, words } => words_docids(&index, &rtxn, !full_display, words),
|
||||
TotalDocidWordPositionsSize => total_docid_word_positions_size(&index, &rtxn),
|
||||
AverageNumberOfWordsByDoc => average_number_of_words_by_doc(&index, &rtxn),
|
||||
AverageNumberOfPositions => average_number_of_positions(&index, &rtxn),
|
||||
@ -176,6 +187,25 @@ fn biggest_value_sizes(index: &Index, rtxn: &heed::RoTxn, limit: usize) -> anyho
|
||||
Ok(wtr.flush()?)
|
||||
}
|
||||
|
||||
fn words_docids(index: &Index, rtxn: &heed::RoTxn, debug: bool, words: Vec<String>) -> anyhow::Result<()> {
|
||||
let stdout = io::stdout();
|
||||
let mut wtr = csv::Writer::from_writer(stdout.lock());
|
||||
wtr.write_record(&["word", "documents_ids"])?;
|
||||
|
||||
for word in words {
|
||||
if let Some(docids) = index.word_docids.get(rtxn, &word)? {
|
||||
let docids = if debug {
|
||||
format!("{:?}", docids)
|
||||
} else {
|
||||
format!("{:?}", docids.iter().collect::<Vec<_>>())
|
||||
};
|
||||
wtr.write_record(&[word, docids])?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(wtr.flush()?)
|
||||
}
|
||||
|
||||
fn export_words_fst(index: &Index, rtxn: &heed::RoTxn, output: PathBuf) -> anyhow::Result<()> {
|
||||
use std::fs::File;
|
||||
use std::io::Write as _;
|
||||
|
Loading…
Reference in New Issue
Block a user