mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-12 06:24:29 +01:00
Introduce the size_of_database infos subcommand
This commit is contained in:
parent
c6b883289c
commit
bc35c9a598
@ -12,6 +12,20 @@ use Command::*;
|
||||
#[global_allocator]
|
||||
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
|
||||
|
||||
const MAIN_DB_NAME: &str = "main";
|
||||
const WORD_DOCIDS_DB_NAME: &str = "word-docids";
|
||||
const DOCID_WORD_POSITIONS_DB_NAME: &str = "docid-word-positions";
|
||||
const WORD_PAIR_PROXIMITY_DOCIDS_DB_NAME: &str = "word-pair-proximity-docids";
|
||||
const DOCUMENTS_DB_NAME: &str = "documents";
|
||||
|
||||
const DATABASE_NAMES: &[&str] = &[
|
||||
MAIN_DB_NAME,
|
||||
WORD_DOCIDS_DB_NAME,
|
||||
DOCID_WORD_POSITIONS_DB_NAME,
|
||||
WORD_PAIR_PROXIMITY_DOCIDS_DB_NAME,
|
||||
DOCUMENTS_DB_NAME,
|
||||
];
|
||||
|
||||
#[derive(Debug, StructOpt)]
|
||||
#[structopt(name = "milli-info", about = "A stats crawler for milli.")]
|
||||
struct Opt {
|
||||
@ -74,6 +88,12 @@ enum Command {
|
||||
/// Outputs the average number of documents for each words pair.
|
||||
AverageNumberOfDocumentByWordPairProximity,
|
||||
|
||||
/// Outputs the size in bytes of the specified database.
|
||||
SizeOfDatabase {
|
||||
#[structopt(possible_values = DATABASE_NAMES)]
|
||||
database: String,
|
||||
},
|
||||
|
||||
/// Outputs a CSV with the proximities for the two specidied words and
|
||||
/// the documents ids where these relations appears.
|
||||
///
|
||||
@ -130,6 +150,7 @@ fn main() -> anyhow::Result<()> {
|
||||
AverageNumberOfPositionsByWord => {
|
||||
average_number_of_positions_by_word(&index, &rtxn)
|
||||
},
|
||||
SizeOfDatabase { database } => size_of_database(&index, &rtxn, &database),
|
||||
AverageNumberOfDocumentByWordPairProximity => {
|
||||
average_number_of_document_by_word_pair_proximity(&index, &rtxn)
|
||||
}
|
||||
@ -336,6 +357,33 @@ fn average_number_of_positions_by_word(index: &Index, rtxn: &heed::RoTxn) -> any
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn size_of_database(index: &Index, rtxn: &heed::RoTxn, name: &str) -> anyhow::Result<()> {
|
||||
use heed::types::ByteSlice;
|
||||
|
||||
let database = match name {
|
||||
MAIN_DB_NAME => &index.main,
|
||||
WORD_DOCIDS_DB_NAME => index.word_docids.as_polymorph(),
|
||||
DOCID_WORD_POSITIONS_DB_NAME => index.docid_word_positions.as_polymorph(),
|
||||
WORD_PAIR_PROXIMITY_DOCIDS_DB_NAME => index.word_pair_proximity_docids.as_polymorph(),
|
||||
DOCUMENTS_DB_NAME => index.documents.as_polymorph(),
|
||||
otherwise => anyhow::bail!("unknown database {:?}", otherwise),
|
||||
};
|
||||
|
||||
let mut key_size: u64 = 0;
|
||||
let mut val_size: u64 = 0;
|
||||
for result in database.iter::<_, ByteSlice, ByteSlice>(rtxn)? {
|
||||
let (k, v) = result?;
|
||||
key_size += k.len() as u64;
|
||||
val_size += v.len() as u64;
|
||||
}
|
||||
|
||||
eprintln!("The {} database weigh {} bytes in terms of keys and {} bytes in terms of values.",
|
||||
name, key_size, val_size,
|
||||
);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn average_number_of_document_by_word_pair_proximity(
|
||||
index: &Index,
|
||||
rtxn: &heed::RoTxn,
|
||||
|
@ -43,10 +43,10 @@ pub struct Index {
|
||||
pub word_docids: Database<Str, RoaringBitmapCodec>,
|
||||
/// Maps a word and a document id (u32) to all the positions where the given word appears.
|
||||
pub docid_word_positions: Database<BEU32StrCodec, ByteorderXRoaringBitmapCodec>,
|
||||
/// Maps the document id to the document as a CSV line.
|
||||
pub documents: Database<OwnedType<BEU32>, ByteSlice>,
|
||||
/// Maps the proximity between a pair of words with all the docids where this relation appears.
|
||||
pub word_pair_proximity_docids: Database<StrStrU8Codec, RoaringBitmapCodec>,
|
||||
/// Maps the document id to the document as a CSV line.
|
||||
pub documents: Database<OwnedType<BEU32>, ByteSlice>,
|
||||
}
|
||||
|
||||
impl Index {
|
||||
@ -55,8 +55,8 @@ impl Index {
|
||||
main: env.create_poly_database(None)?,
|
||||
word_docids: env.create_database(Some("word-docids"))?,
|
||||
docid_word_positions: env.create_database(Some("docid-word-positions"))?,
|
||||
documents: env.create_database(Some("documents"))?,
|
||||
word_pair_proximity_docids: env.create_database(Some("word-pair-proximity-docids"))?,
|
||||
documents: env.create_database(Some("documents"))?,
|
||||
})
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user