mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-22 12:54:26 +01:00
Introduce the words-docids command for the infos binary
This commit is contained in:
parent
5664c37539
commit
ad11c5fb3f
1
.gitignore
vendored
1
.gitignore
vendored
@ -1,3 +1,4 @@
|
|||||||
/target
|
/target
|
||||||
*.csv
|
*.csv
|
||||||
*.mmdb
|
*.mmdb
|
||||||
|
*.svg
|
||||||
|
@ -52,6 +52,16 @@ enum Command {
|
|||||||
limit: usize,
|
limit: usize,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
/// Outputs a CSV with the documents ids where the given words appears.
|
||||||
|
WordsDocids {
|
||||||
|
/// Display the whole documents ids in details.
|
||||||
|
#[structopt(long)]
|
||||||
|
full_display: bool,
|
||||||
|
|
||||||
|
/// The words to display the documents ids of.
|
||||||
|
words: Vec<String>,
|
||||||
|
},
|
||||||
|
|
||||||
/// Outputs the total size of all the docid-word-positions keys and values.
|
/// Outputs the total size of all the docid-word-positions keys and values.
|
||||||
TotalDocidWordPositionsSize,
|
TotalDocidWordPositionsSize,
|
||||||
|
|
||||||
@ -93,6 +103,7 @@ fn main() -> anyhow::Result<()> {
|
|||||||
match opt.command {
|
match opt.command {
|
||||||
MostCommonWords { limit } => most_common_words(&index, &rtxn, limit),
|
MostCommonWords { limit } => most_common_words(&index, &rtxn, limit),
|
||||||
BiggestValues { limit } => biggest_value_sizes(&index, &rtxn, limit),
|
BiggestValues { limit } => biggest_value_sizes(&index, &rtxn, limit),
|
||||||
|
WordsDocids { full_display, words } => words_docids(&index, &rtxn, !full_display, words),
|
||||||
TotalDocidWordPositionsSize => total_docid_word_positions_size(&index, &rtxn),
|
TotalDocidWordPositionsSize => total_docid_word_positions_size(&index, &rtxn),
|
||||||
AverageNumberOfWordsByDoc => average_number_of_words_by_doc(&index, &rtxn),
|
AverageNumberOfWordsByDoc => average_number_of_words_by_doc(&index, &rtxn),
|
||||||
AverageNumberOfPositions => average_number_of_positions(&index, &rtxn),
|
AverageNumberOfPositions => average_number_of_positions(&index, &rtxn),
|
||||||
@ -176,6 +187,25 @@ fn biggest_value_sizes(index: &Index, rtxn: &heed::RoTxn, limit: usize) -> anyho
|
|||||||
Ok(wtr.flush()?)
|
Ok(wtr.flush()?)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn words_docids(index: &Index, rtxn: &heed::RoTxn, debug: bool, words: Vec<String>) -> anyhow::Result<()> {
|
||||||
|
let stdout = io::stdout();
|
||||||
|
let mut wtr = csv::Writer::from_writer(stdout.lock());
|
||||||
|
wtr.write_record(&["word", "documents_ids"])?;
|
||||||
|
|
||||||
|
for word in words {
|
||||||
|
if let Some(docids) = index.word_docids.get(rtxn, &word)? {
|
||||||
|
let docids = if debug {
|
||||||
|
format!("{:?}", docids)
|
||||||
|
} else {
|
||||||
|
format!("{:?}", docids.iter().collect::<Vec<_>>())
|
||||||
|
};
|
||||||
|
wtr.write_record(&[word, docids])?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(wtr.flush()?)
|
||||||
|
}
|
||||||
|
|
||||||
fn export_words_fst(index: &Index, rtxn: &heed::RoTxn, output: PathBuf) -> anyhow::Result<()> {
|
fn export_words_fst(index: &Index, rtxn: &heed::RoTxn, output: PathBuf) -> anyhow::Result<()> {
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::Write as _;
|
use std::io::Write as _;
|
||||||
|
Loading…
Reference in New Issue
Block a user