mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-23 13:24:27 +01:00
Introduce the docids-words-positions subcommand to the infos crate
This commit is contained in:
parent
bd63da0a0e
commit
3d02b19fbd
@ -114,6 +114,16 @@ enum Command {
|
|||||||
field_name: String,
|
field_name: String,
|
||||||
},
|
},
|
||||||
|
|
||||||
|
/// Outputs a CSV with the documents ids, words and the positions where this word appears.
|
||||||
|
DocidsWordsPositions {
|
||||||
|
/// Display the whole positions in detail.
|
||||||
|
#[structopt(long)]
|
||||||
|
full_display: bool,
|
||||||
|
|
||||||
|
/// If defined, only retrieve the documents that corresponds to these internal ids.
|
||||||
|
internal_documents_ids: Vec<u32>,
|
||||||
|
},
|
||||||
|
|
||||||
/// Outputs some facets statistics for the given facet name.
|
/// Outputs some facets statistics for the given facet name.
|
||||||
FacetStats {
|
FacetStats {
|
||||||
/// The field name in the document.
|
/// The field name in the document.
|
||||||
@ -209,6 +219,9 @@ fn main() -> anyhow::Result<()> {
|
|||||||
FacetValuesDocids { full_display, field_name } => {
|
FacetValuesDocids { full_display, field_name } => {
|
||||||
facet_values_docids(&index, &rtxn, !full_display, field_name)
|
facet_values_docids(&index, &rtxn, !full_display, field_name)
|
||||||
},
|
},
|
||||||
|
DocidsWordsPositions { full_display, internal_documents_ids } => {
|
||||||
|
docids_words_positions(&index, &rtxn, !full_display, internal_documents_ids)
|
||||||
|
},
|
||||||
FacetStats { field_name } => facet_stats(&index, &rtxn, field_name),
|
FacetStats { field_name } => facet_stats(&index, &rtxn, field_name),
|
||||||
AverageNumberOfWordsByDoc => average_number_of_words_by_doc(&index, &rtxn),
|
AverageNumberOfWordsByDoc => average_number_of_words_by_doc(&index, &rtxn),
|
||||||
AverageNumberOfPositionsByWord => {
|
AverageNumberOfPositionsByWord => {
|
||||||
@ -525,6 +538,39 @@ fn facet_values_docids(index: &Index, rtxn: &heed::RoTxn, debug: bool, field_nam
|
|||||||
Ok(wtr.flush()?)
|
Ok(wtr.flush()?)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn docids_words_positions(
|
||||||
|
index: &Index,
|
||||||
|
rtxn: &heed::RoTxn,
|
||||||
|
debug: bool,
|
||||||
|
internal_ids: Vec<u32>,
|
||||||
|
) -> anyhow::Result<()>
|
||||||
|
{
|
||||||
|
let stdout = io::stdout();
|
||||||
|
let mut wtr = csv::Writer::from_writer(stdout.lock());
|
||||||
|
wtr.write_record(&["document_id", "word", "positions"])?;
|
||||||
|
|
||||||
|
let iter: Box<dyn Iterator<Item = _>> = if internal_ids.is_empty() {
|
||||||
|
Box::new(index.docid_word_positions.iter(rtxn)?)
|
||||||
|
} else {
|
||||||
|
let vec: heed::Result<Vec<_>> = internal_ids.into_iter().map(|id| {
|
||||||
|
index.docid_word_positions.prefix_iter(rtxn, &(id, ""))
|
||||||
|
}).collect();
|
||||||
|
Box::new(vec?.into_iter().flatten())
|
||||||
|
};
|
||||||
|
|
||||||
|
for result in iter {
|
||||||
|
let ((id, word), positions) = result?;
|
||||||
|
let positions = if debug {
|
||||||
|
format!("{:?}", positions)
|
||||||
|
} else {
|
||||||
|
format!("{:?}", positions.iter().collect::<Vec<_>>())
|
||||||
|
};
|
||||||
|
wtr.write_record(&[&id.to_string(), word, &positions])?;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(wtr.flush()?)
|
||||||
|
}
|
||||||
|
|
||||||
fn facet_stats(index: &Index, rtxn: &heed::RoTxn, field_name: String) -> anyhow::Result<()> {
|
fn facet_stats(index: &Index, rtxn: &heed::RoTxn, field_name: String) -> anyhow::Result<()> {
|
||||||
let fields_ids_map = index.fields_ids_map(&rtxn)?;
|
let fields_ids_map = index.fields_ids_map(&rtxn)?;
|
||||||
let faceted_fields = index.faceted_fields_ids(&rtxn)?;
|
let faceted_fields = index.faceted_fields_ids(&rtxn)?;
|
||||||
|
Loading…
Reference in New Issue
Block a user