mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-25 06:00:08 +01:00
Introduce the average-number-of-document-by-word-pair-proximity infos subcommand
This commit is contained in:
parent
991be8950e
commit
58237bd67f
@ -71,6 +71,8 @@ enum Command {
|
|||||||
/// Outputs the average number of positions for each document words.
|
/// Outputs the average number of positions for each document words.
|
||||||
AverageNumberOfPositionsByWord,
|
AverageNumberOfPositionsByWord,
|
||||||
|
|
||||||
|
/// Outputs the average number of documents for each words pair.
|
||||||
|
AverageNumberOfDocumentByWordPairProximity,
|
||||||
|
|
||||||
/// Outputs a CSV with the proximities for the two specidied words and
|
/// Outputs a CSV with the proximities for the two specidied words and
|
||||||
/// the documents ids where these relations appears.
|
/// the documents ids where these relations appears.
|
||||||
@ -128,6 +130,9 @@ fn main() -> anyhow::Result<()> {
|
|||||||
AverageNumberOfPositionsByWord => {
|
AverageNumberOfPositionsByWord => {
|
||||||
average_number_of_positions_by_word(&index, &rtxn)
|
average_number_of_positions_by_word(&index, &rtxn)
|
||||||
},
|
},
|
||||||
|
AverageNumberOfDocumentByWordPairProximity => {
|
||||||
|
average_number_of_document_by_word_pair_proximity(&index, &rtxn)
|
||||||
|
}
|
||||||
WordPairProximitiesDocids { full_display, word1, word2 } => {
|
WordPairProximitiesDocids { full_display, word1, word2 } => {
|
||||||
word_pair_proximities_docids(&index, &rtxn, !full_display, word1, word2)
|
word_pair_proximities_docids(&index, &rtxn, !full_display, word1, word2)
|
||||||
},
|
},
|
||||||
@ -331,6 +336,32 @@ fn average_number_of_positions_by_word(index: &Index, rtxn: &heed::RoTxn) -> any
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn average_number_of_document_by_word_pair_proximity(
|
||||||
|
index: &Index,
|
||||||
|
rtxn: &heed::RoTxn,
|
||||||
|
) -> anyhow::Result<()>
|
||||||
|
{
|
||||||
|
use heed::types::DecodeIgnore;
|
||||||
|
use milli::RoaringBitmapCodec;
|
||||||
|
|
||||||
|
let mut values_length = Vec::new();
|
||||||
|
let mut count = 0;
|
||||||
|
|
||||||
|
let db = index.word_pair_proximity_docids.as_polymorph();
|
||||||
|
for result in db.iter::<_, DecodeIgnore, RoaringBitmapCodec>(rtxn)? {
|
||||||
|
let ((), val) = result?;
|
||||||
|
values_length.push(val.len() as u32);
|
||||||
|
count += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
let values_length_sum = values_length.into_iter().map(|c| c as usize).sum::<usize>() as f64;
|
||||||
|
let count = count as f64;
|
||||||
|
|
||||||
|
println!("average number of documents by words pairs proximities: {}", values_length_sum / count);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
fn word_pair_proximities_docids(
|
fn word_pair_proximities_docids(
|
||||||
index: &Index,
|
index: &Index,
|
||||||
rtxn: &heed::RoTxn,
|
rtxn: &heed::RoTxn,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user