From aa4d9882d298eafa24c462b8395fd3869250089c Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Wed, 10 Feb 2021 12:28:46 +0100 Subject: [PATCH] Introduce the new words-prefixes-docids infos subcomand --- infos/src/main.rs | 50 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/infos/src/main.rs b/infos/src/main.rs index e33c2820f..3f41b7d42 100644 --- a/infos/src/main.rs +++ b/infos/src/main.rs @@ -91,6 +91,16 @@ enum Command { words: Vec, }, + /// Outputs a CSV with the documents ids where the given words prefixes appears. + WordsPrefixesDocids { + /// Display the whole documents ids in details. + #[structopt(long)] + full_display: bool, + + /// The prefixes to display the documents ids of. + prefixes: Vec, + }, + /// Outputs a CSV with the documents ids along with the facet values where it appears. FacetValuesDocids { /// Display the whole documents ids in details. @@ -198,6 +208,9 @@ fn run(opt: Opt) -> anyhow::Result<()> { MostCommonWords { limit } => most_common_words(&index, &rtxn, limit), BiggestValues { limit } => biggest_value_sizes(&index, &rtxn, limit), WordsDocids { full_display, words } => words_docids(&index, &rtxn, !full_display, words), + WordsPrefixesDocids { full_display, prefixes } => { + words_prefixes_docids(&index, &rtxn, !full_display, prefixes) + }, FacetValuesDocids { full_display, field_name } => { facet_values_docids(&index, &rtxn, !full_display, field_name) }, @@ -464,6 +477,43 @@ fn words_docids(index: &Index, rtxn: &heed::RoTxn, debug: bool, words: Vec, +) -> anyhow::Result<()> +{ + let stdout = io::stdout(); + let mut wtr = csv::Writer::from_writer(stdout.lock()); + wtr.write_record(&["prefix", "documents_ids"])?; + + if prefixes.is_empty() { + for result in index.word_prefix_docids.iter(rtxn)? { + let (prefix, docids) = result?; + let docids = if debug { + format!("{:?}", docids) + } else { + format!("{:?}", docids.iter().collect::>()) + }; + wtr.write_record(&[prefix, &docids])?; + } + } else { + for prefix in prefixes { + if let Some(docids) = index.word_prefix_docids.get(rtxn, &prefix)? { + let docids = if debug { + format!("{:?}", docids) + } else { + format!("{:?}", docids.iter().collect::>()) + }; + wtr.write_record(&[prefix, docids])?; + } + } + } + + Ok(wtr.flush()?) +} + fn facet_values_docids(index: &Index, rtxn: &heed::RoTxn, debug: bool, field_name: String) -> anyhow::Result<()> { let fields_ids_map = index.fields_ids_map(&rtxn)?; let faceted_fields = index.faceted_fields_ids(&rtxn)?;