mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-25 14:10:06 +01:00
Introduce the size_of_database infos subcommand
This commit is contained in:
parent
c6b883289c
commit
bc35c9a598
@ -12,6 +12,20 @@ use Command::*;
|
|||||||
#[global_allocator]
|
#[global_allocator]
|
||||||
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
|
static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
|
||||||
|
|
||||||
|
const MAIN_DB_NAME: &str = "main";
|
||||||
|
const WORD_DOCIDS_DB_NAME: &str = "word-docids";
|
||||||
|
const DOCID_WORD_POSITIONS_DB_NAME: &str = "docid-word-positions";
|
||||||
|
const WORD_PAIR_PROXIMITY_DOCIDS_DB_NAME: &str = "word-pair-proximity-docids";
|
||||||
|
const DOCUMENTS_DB_NAME: &str = "documents";
|
||||||
|
|
||||||
|
const DATABASE_NAMES: &[&str] = &[
|
||||||
|
MAIN_DB_NAME,
|
||||||
|
WORD_DOCIDS_DB_NAME,
|
||||||
|
DOCID_WORD_POSITIONS_DB_NAME,
|
||||||
|
WORD_PAIR_PROXIMITY_DOCIDS_DB_NAME,
|
||||||
|
DOCUMENTS_DB_NAME,
|
||||||
|
];
|
||||||
|
|
||||||
#[derive(Debug, StructOpt)]
|
#[derive(Debug, StructOpt)]
|
||||||
#[structopt(name = "milli-info", about = "A stats crawler for milli.")]
|
#[structopt(name = "milli-info", about = "A stats crawler for milli.")]
|
||||||
struct Opt {
|
struct Opt {
|
||||||
@ -74,6 +88,12 @@ enum Command {
|
|||||||
/// Outputs the average number of documents for each words pair.
|
/// Outputs the average number of documents for each words pair.
|
||||||
AverageNumberOfDocumentByWordPairProximity,
|
AverageNumberOfDocumentByWordPairProximity,
|
||||||
|
|
||||||
|
/// Outputs the size in bytes of the specified database.
|
||||||
|
SizeOfDatabase {
|
||||||
|
#[structopt(possible_values = DATABASE_NAMES)]
|
||||||
|
database: String,
|
||||||
|
},
|
||||||
|
|
||||||
/// Outputs a CSV with the proximities for the two specidied words and
|
/// Outputs a CSV with the proximities for the two specidied words and
|
||||||
/// the documents ids where these relations appears.
|
/// the documents ids where these relations appears.
|
||||||
///
|
///
|
||||||
@ -130,6 +150,7 @@ fn main() -> anyhow::Result<()> {
|
|||||||
AverageNumberOfPositionsByWord => {
|
AverageNumberOfPositionsByWord => {
|
||||||
average_number_of_positions_by_word(&index, &rtxn)
|
average_number_of_positions_by_word(&index, &rtxn)
|
||||||
},
|
},
|
||||||
|
SizeOfDatabase { database } => size_of_database(&index, &rtxn, &database),
|
||||||
AverageNumberOfDocumentByWordPairProximity => {
|
AverageNumberOfDocumentByWordPairProximity => {
|
||||||
average_number_of_document_by_word_pair_proximity(&index, &rtxn)
|
average_number_of_document_by_word_pair_proximity(&index, &rtxn)
|
||||||
}
|
}
|
||||||
@ -336,6 +357,33 @@ fn average_number_of_positions_by_word(index: &Index, rtxn: &heed::RoTxn) -> any
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn size_of_database(index: &Index, rtxn: &heed::RoTxn, name: &str) -> anyhow::Result<()> {
|
||||||
|
use heed::types::ByteSlice;
|
||||||
|
|
||||||
|
let database = match name {
|
||||||
|
MAIN_DB_NAME => &index.main,
|
||||||
|
WORD_DOCIDS_DB_NAME => index.word_docids.as_polymorph(),
|
||||||
|
DOCID_WORD_POSITIONS_DB_NAME => index.docid_word_positions.as_polymorph(),
|
||||||
|
WORD_PAIR_PROXIMITY_DOCIDS_DB_NAME => index.word_pair_proximity_docids.as_polymorph(),
|
||||||
|
DOCUMENTS_DB_NAME => index.documents.as_polymorph(),
|
||||||
|
otherwise => anyhow::bail!("unknown database {:?}", otherwise),
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut key_size: u64 = 0;
|
||||||
|
let mut val_size: u64 = 0;
|
||||||
|
for result in database.iter::<_, ByteSlice, ByteSlice>(rtxn)? {
|
||||||
|
let (k, v) = result?;
|
||||||
|
key_size += k.len() as u64;
|
||||||
|
val_size += v.len() as u64;
|
||||||
|
}
|
||||||
|
|
||||||
|
eprintln!("The {} database weigh {} bytes in terms of keys and {} bytes in terms of values.",
|
||||||
|
name, key_size, val_size,
|
||||||
|
);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
fn average_number_of_document_by_word_pair_proximity(
|
fn average_number_of_document_by_word_pair_proximity(
|
||||||
index: &Index,
|
index: &Index,
|
||||||
rtxn: &heed::RoTxn,
|
rtxn: &heed::RoTxn,
|
||||||
|
@ -43,10 +43,10 @@ pub struct Index {
|
|||||||
pub word_docids: Database<Str, RoaringBitmapCodec>,
|
pub word_docids: Database<Str, RoaringBitmapCodec>,
|
||||||
/// Maps a word and a document id (u32) to all the positions where the given word appears.
|
/// Maps a word and a document id (u32) to all the positions where the given word appears.
|
||||||
pub docid_word_positions: Database<BEU32StrCodec, ByteorderXRoaringBitmapCodec>,
|
pub docid_word_positions: Database<BEU32StrCodec, ByteorderXRoaringBitmapCodec>,
|
||||||
/// Maps the document id to the document as a CSV line.
|
|
||||||
pub documents: Database<OwnedType<BEU32>, ByteSlice>,
|
|
||||||
/// Maps the proximity between a pair of words with all the docids where this relation appears.
|
/// Maps the proximity between a pair of words with all the docids where this relation appears.
|
||||||
pub word_pair_proximity_docids: Database<StrStrU8Codec, RoaringBitmapCodec>,
|
pub word_pair_proximity_docids: Database<StrStrU8Codec, RoaringBitmapCodec>,
|
||||||
|
/// Maps the document id to the document as a CSV line.
|
||||||
|
pub documents: Database<OwnedType<BEU32>, ByteSlice>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Index {
|
impl Index {
|
||||||
@ -55,8 +55,8 @@ impl Index {
|
|||||||
main: env.create_poly_database(None)?,
|
main: env.create_poly_database(None)?,
|
||||||
word_docids: env.create_database(Some("word-docids"))?,
|
word_docids: env.create_database(Some("word-docids"))?,
|
||||||
docid_word_positions: env.create_database(Some("docid-word-positions"))?,
|
docid_word_positions: env.create_database(Some("docid-word-positions"))?,
|
||||||
documents: env.create_database(Some("documents"))?,
|
|
||||||
word_pair_proximity_docids: env.create_database(Some("word-pair-proximity-docids"))?,
|
word_pair_proximity_docids: env.create_database(Some("word-pair-proximity-docids"))?,
|
||||||
|
documents: env.create_database(Some("documents"))?,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user