mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 23:04:26 +01:00
Introduce the average-number-of-positions infos subcommand
This commit is contained in:
parent
ea605b499c
commit
3e2250423c
@ -58,6 +58,9 @@ enum Command {
|
|||||||
/// Outputs the average number of *different* words by document.
|
/// Outputs the average number of *different* words by document.
|
||||||
AverageNumberOfWordsByDoc,
|
AverageNumberOfWordsByDoc,
|
||||||
|
|
||||||
|
/// Outputs the average number of positions for each document words.
|
||||||
|
AverageNumberOfPositions,
|
||||||
|
|
||||||
/// Outputs the words FST to disk.
|
/// Outputs the words FST to disk.
|
||||||
///
|
///
|
||||||
/// One can use the FST binary helper to dissect and analyze it,
|
/// One can use the FST binary helper to dissect and analyze it,
|
||||||
@ -92,6 +95,7 @@ fn main() -> anyhow::Result<()> {
|
|||||||
BiggestValues { limit } => biggest_value_sizes(&index, &rtxn, limit),
|
BiggestValues { limit } => biggest_value_sizes(&index, &rtxn, limit),
|
||||||
TotalDocidWordPositionsSize => total_docid_word_positions_size(&index, &rtxn),
|
TotalDocidWordPositionsSize => total_docid_word_positions_size(&index, &rtxn),
|
||||||
AverageNumberOfWordsByDoc => average_number_of_words_by_doc(&index, &rtxn),
|
AverageNumberOfWordsByDoc => average_number_of_words_by_doc(&index, &rtxn),
|
||||||
|
AverageNumberOfPositions => average_number_of_positions(&index, &rtxn),
|
||||||
ExportWordsFst { output } => export_words_fst(&index, &rtxn, output),
|
ExportWordsFst { output } => export_words_fst(&index, &rtxn, output),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -250,3 +254,25 @@ fn average_number_of_words_by_doc(index: &Index, rtxn: &heed::RoTxn) -> anyhow::
|
|||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn average_number_of_positions(index: &Index, rtxn: &heed::RoTxn) -> anyhow::Result<()> {
|
||||||
|
use heed::types::DecodeIgnore;
|
||||||
|
use milli::RoaringBitmapCodec;
|
||||||
|
|
||||||
|
let mut values_length = Vec::new();
|
||||||
|
let mut count = 0;
|
||||||
|
|
||||||
|
let iter = index.docid_word_positions.as_polymorph().iter::<_, DecodeIgnore, RoaringBitmapCodec>(rtxn)?;
|
||||||
|
for result in iter {
|
||||||
|
let ((), val) = result?;
|
||||||
|
values_length.push(val.len() as u32);
|
||||||
|
count += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
let values_length_sum = values_length.into_iter().map(|c| c as usize).sum::<usize>() as f64;
|
||||||
|
let count = count as f64;
|
||||||
|
|
||||||
|
println!("average number of positions by word: {}", values_length_sum / count);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user