mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-07 12:04:30 +01:00
Create a small tool to measure the size of inernal databases
This commit is contained in:
parent
040b5a5b6f
commit
ef9875256b
44
Cargo.lock
generated
44
Cargo.lock
generated
@ -309,6 +309,15 @@ version = "0.1.6"
|
|||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
|
checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "ansi_term"
|
||||||
|
version = "0.12.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2"
|
||||||
|
dependencies = [
|
||||||
|
"winapi",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "anstream"
|
name = "anstream"
|
||||||
version = "0.3.2"
|
version = "0.3.2"
|
||||||
@ -778,9 +787,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "clap"
|
name = "clap"
|
||||||
version = "4.3.0"
|
version = "4.3.6"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "93aae7a4192245f70fe75dd9157fc7b4a5bf53e88d30bd4396f7d8f9284d5acc"
|
checksum = "6320c6d1c98b6981da7bb2dcecbd0be9dc98d42165fa8326b21000f7dbfde6d0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"clap_builder",
|
"clap_builder",
|
||||||
"clap_derive",
|
"clap_derive",
|
||||||
@ -789,9 +798,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "clap_builder"
|
name = "clap_builder"
|
||||||
version = "4.3.0"
|
version = "4.3.5"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "4f423e341edefb78c9caba2d9c7f7687d0e72e89df3ce3394554754393ac3990"
|
checksum = "2e53afce1efce6ed1f633cf0e57612fe51db54a1ee4fd8f8503d078fe02d69ae"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"anstream",
|
"anstream",
|
||||||
"anstyle",
|
"anstyle",
|
||||||
@ -802,9 +811,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "clap_derive"
|
name = "clap_derive"
|
||||||
version = "4.3.0"
|
version = "4.3.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "191d9573962933b4027f932c600cd252ce27a8ad5979418fe78e43c07996f27b"
|
checksum = "b8cd2b2a819ad6eec39e8f1d6b53001af1e5469f8c177579cdaeb313115b825f"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"heck",
|
"heck",
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
@ -1599,7 +1608,7 @@ name = "fuzzers"
|
|||||||
version = "1.2.0"
|
version = "1.2.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"arbitrary",
|
"arbitrary",
|
||||||
"clap 4.3.0",
|
"clap 4.3.6",
|
||||||
"fastrand",
|
"fastrand",
|
||||||
"milli",
|
"milli",
|
||||||
"serde",
|
"serde",
|
||||||
@ -1949,6 +1958,16 @@ dependencies = [
|
|||||||
"uuid 1.3.3",
|
"uuid 1.3.3",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "index-stats"
|
||||||
|
version = "0.1.0"
|
||||||
|
dependencies = [
|
||||||
|
"anyhow",
|
||||||
|
"clap 4.3.6",
|
||||||
|
"milli",
|
||||||
|
"piechart",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "indexmap"
|
name = "indexmap"
|
||||||
version = "1.9.3"
|
version = "1.9.3"
|
||||||
@ -2534,7 +2553,7 @@ dependencies = [
|
|||||||
"byte-unit",
|
"byte-unit",
|
||||||
"bytes",
|
"bytes",
|
||||||
"cargo_toml",
|
"cargo_toml",
|
||||||
"clap 4.3.0",
|
"clap 4.3.6",
|
||||||
"crossbeam-channel",
|
"crossbeam-channel",
|
||||||
"deserr",
|
"deserr",
|
||||||
"dump",
|
"dump",
|
||||||
@ -3097,6 +3116,15 @@ dependencies = [
|
|||||||
"siphasher",
|
"siphasher",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "piechart"
|
||||||
|
version = "1.0.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "79730372879e285c066c9289e164f4033ff665a866396dfa478f58f5adcd4089"
|
||||||
|
dependencies = [
|
||||||
|
"ansi_term",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pin-project-lite"
|
name = "pin-project-lite"
|
||||||
version = "0.2.9"
|
version = "0.2.9"
|
||||||
|
@ -10,6 +10,7 @@ members = [
|
|||||||
"file-store",
|
"file-store",
|
||||||
"permissive-json-pointer",
|
"permissive-json-pointer",
|
||||||
"milli",
|
"milli",
|
||||||
|
"index-stats",
|
||||||
"filter-parser",
|
"filter-parser",
|
||||||
"flatten-serde-json",
|
"flatten-serde-json",
|
||||||
"json-depth-checker",
|
"json-depth-checker",
|
||||||
|
12
index-stats/Cargo.toml
Normal file
12
index-stats/Cargo.toml
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
[package]
|
||||||
|
name = "index-stats"
|
||||||
|
description = "A small program that computes internal stats of a Meilisearch index"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
publish = false
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
anyhow = "1.0.71"
|
||||||
|
clap = { version = "4.3.5", features = ["derive"] }
|
||||||
|
milli = { path = "../milli" }
|
||||||
|
piechart = "1.0.0"
|
200
index-stats/src/main.rs
Normal file
200
index-stats/src/main.rs
Normal file
@ -0,0 +1,200 @@
|
|||||||
|
use std::cmp::Reverse;
|
||||||
|
use std::path::PathBuf;
|
||||||
|
|
||||||
|
use clap::Parser;
|
||||||
|
use milli::heed::{types::ByteSlice, EnvOpenOptions, PolyDatabase, RoTxn};
|
||||||
|
use milli::index::db_name::*;
|
||||||
|
use milli::index::Index;
|
||||||
|
use piechart::{Chart, Color, Data};
|
||||||
|
|
||||||
|
/// Simple program to greet a person
|
||||||
|
#[derive(Parser, Debug)]
|
||||||
|
#[command(author, version, about, long_about = None)]
|
||||||
|
struct Args {
|
||||||
|
/// The path to the LMDB Meilisearch index database.
|
||||||
|
path: PathBuf,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn main() -> anyhow::Result<()> {
|
||||||
|
let Args { path } = Args::parse();
|
||||||
|
let env = EnvOpenOptions::new().max_dbs(24).open(path)?;
|
||||||
|
|
||||||
|
// TODO not sure to keep that...
|
||||||
|
// if removed put the pub(crate) back in the Index struct
|
||||||
|
matches!(
|
||||||
|
Option::<Index>::None,
|
||||||
|
Some(Index {
|
||||||
|
env: _,
|
||||||
|
main: _,
|
||||||
|
word_docids: _,
|
||||||
|
exact_word_docids: _,
|
||||||
|
word_prefix_docids: _,
|
||||||
|
exact_word_prefix_docids: _,
|
||||||
|
word_pair_proximity_docids: _,
|
||||||
|
word_prefix_pair_proximity_docids: _,
|
||||||
|
prefix_word_pair_proximity_docids: _,
|
||||||
|
word_position_docids: _,
|
||||||
|
word_fid_docids: _,
|
||||||
|
field_id_word_count_docids: _,
|
||||||
|
word_prefix_position_docids: _,
|
||||||
|
word_prefix_fid_docids: _,
|
||||||
|
script_language_docids: _,
|
||||||
|
facet_id_exists_docids: _,
|
||||||
|
facet_id_is_null_docids: _,
|
||||||
|
facet_id_is_empty_docids: _,
|
||||||
|
facet_id_f64_docids: _,
|
||||||
|
facet_id_string_docids: _,
|
||||||
|
field_id_docid_facet_f64s: _,
|
||||||
|
field_id_docid_facet_strings: _,
|
||||||
|
documents: _,
|
||||||
|
})
|
||||||
|
);
|
||||||
|
|
||||||
|
let mut wtxn = env.write_txn()?;
|
||||||
|
let main = env.create_poly_database(&mut wtxn, Some(MAIN))?;
|
||||||
|
let word_docids = env.create_poly_database(&mut wtxn, Some(WORD_DOCIDS))?;
|
||||||
|
let exact_word_docids = env.create_poly_database(&mut wtxn, Some(EXACT_WORD_DOCIDS))?;
|
||||||
|
let word_prefix_docids = env.create_poly_database(&mut wtxn, Some(WORD_PREFIX_DOCIDS))?;
|
||||||
|
let exact_word_prefix_docids =
|
||||||
|
env.create_poly_database(&mut wtxn, Some(EXACT_WORD_PREFIX_DOCIDS))?;
|
||||||
|
let word_pair_proximity_docids =
|
||||||
|
env.create_poly_database(&mut wtxn, Some(WORD_PAIR_PROXIMITY_DOCIDS))?;
|
||||||
|
let script_language_docids =
|
||||||
|
env.create_poly_database(&mut wtxn, Some(SCRIPT_LANGUAGE_DOCIDS))?;
|
||||||
|
let word_prefix_pair_proximity_docids =
|
||||||
|
env.create_poly_database(&mut wtxn, Some(WORD_PREFIX_PAIR_PROXIMITY_DOCIDS))?;
|
||||||
|
let prefix_word_pair_proximity_docids =
|
||||||
|
env.create_poly_database(&mut wtxn, Some(PREFIX_WORD_PAIR_PROXIMITY_DOCIDS))?;
|
||||||
|
let word_position_docids = env.create_poly_database(&mut wtxn, Some(WORD_POSITION_DOCIDS))?;
|
||||||
|
let word_fid_docids = env.create_poly_database(&mut wtxn, Some(WORD_FIELD_ID_DOCIDS))?;
|
||||||
|
let field_id_word_count_docids =
|
||||||
|
env.create_poly_database(&mut wtxn, Some(FIELD_ID_WORD_COUNT_DOCIDS))?;
|
||||||
|
let word_prefix_position_docids =
|
||||||
|
env.create_poly_database(&mut wtxn, Some(WORD_PREFIX_POSITION_DOCIDS))?;
|
||||||
|
let word_prefix_fid_docids =
|
||||||
|
env.create_poly_database(&mut wtxn, Some(WORD_PREFIX_FIELD_ID_DOCIDS))?;
|
||||||
|
let facet_id_f64_docids = env.create_poly_database(&mut wtxn, Some(FACET_ID_F64_DOCIDS))?;
|
||||||
|
let facet_id_string_docids =
|
||||||
|
env.create_poly_database(&mut wtxn, Some(FACET_ID_STRING_DOCIDS))?;
|
||||||
|
let facet_id_exists_docids =
|
||||||
|
env.create_poly_database(&mut wtxn, Some(FACET_ID_EXISTS_DOCIDS))?;
|
||||||
|
let facet_id_is_null_docids =
|
||||||
|
env.create_poly_database(&mut wtxn, Some(FACET_ID_IS_NULL_DOCIDS))?;
|
||||||
|
let facet_id_is_empty_docids =
|
||||||
|
env.create_poly_database(&mut wtxn, Some(FACET_ID_IS_EMPTY_DOCIDS))?;
|
||||||
|
let field_id_docid_facet_f64s =
|
||||||
|
env.create_poly_database(&mut wtxn, Some(FIELD_ID_DOCID_FACET_F64S))?;
|
||||||
|
let field_id_docid_facet_strings =
|
||||||
|
env.create_poly_database(&mut wtxn, Some(FIELD_ID_DOCID_FACET_STRINGS))?;
|
||||||
|
let documents = env.create_poly_database(&mut wtxn, Some(DOCUMENTS))?;
|
||||||
|
wtxn.commit()?;
|
||||||
|
|
||||||
|
let list = [
|
||||||
|
(main, MAIN),
|
||||||
|
(word_docids, WORD_DOCIDS),
|
||||||
|
(exact_word_docids, EXACT_WORD_DOCIDS),
|
||||||
|
(word_prefix_docids, WORD_PREFIX_DOCIDS),
|
||||||
|
(exact_word_prefix_docids, EXACT_WORD_PREFIX_DOCIDS),
|
||||||
|
(word_pair_proximity_docids, WORD_PAIR_PROXIMITY_DOCIDS),
|
||||||
|
(script_language_docids, SCRIPT_LANGUAGE_DOCIDS),
|
||||||
|
(word_prefix_pair_proximity_docids, WORD_PREFIX_PAIR_PROXIMITY_DOCIDS),
|
||||||
|
(prefix_word_pair_proximity_docids, PREFIX_WORD_PAIR_PROXIMITY_DOCIDS),
|
||||||
|
(word_position_docids, WORD_POSITION_DOCIDS),
|
||||||
|
(word_fid_docids, WORD_FIELD_ID_DOCIDS),
|
||||||
|
(field_id_word_count_docids, FIELD_ID_WORD_COUNT_DOCIDS),
|
||||||
|
(word_prefix_position_docids, WORD_PREFIX_POSITION_DOCIDS),
|
||||||
|
(word_prefix_fid_docids, WORD_PREFIX_FIELD_ID_DOCIDS),
|
||||||
|
(facet_id_f64_docids, FACET_ID_F64_DOCIDS),
|
||||||
|
(facet_id_string_docids, FACET_ID_STRING_DOCIDS),
|
||||||
|
(facet_id_exists_docids, FACET_ID_EXISTS_DOCIDS),
|
||||||
|
(facet_id_is_null_docids, FACET_ID_IS_NULL_DOCIDS),
|
||||||
|
(facet_id_is_empty_docids, FACET_ID_IS_EMPTY_DOCIDS),
|
||||||
|
(field_id_docid_facet_f64s, FIELD_ID_DOCID_FACET_F64S),
|
||||||
|
(field_id_docid_facet_strings, FIELD_ID_DOCID_FACET_STRINGS),
|
||||||
|
(documents, DOCUMENTS),
|
||||||
|
];
|
||||||
|
|
||||||
|
let rtxn = env.read_txn()?;
|
||||||
|
let result: Result<Vec<_>, _> =
|
||||||
|
list.into_iter().map(|(db, name)| compute_stats(&rtxn, db).map(|s| (s, name))).collect();
|
||||||
|
let mut stats = result?;
|
||||||
|
|
||||||
|
println!("{:>30} Number of Entries", "");
|
||||||
|
stats.sort_by_key(|(s, _)| Reverse(s.number_of_entries));
|
||||||
|
let data = compute_graph_data(stats.iter().map(|(s, n)| (s.number_of_entries as f32, *n)));
|
||||||
|
Chart::new().radius(20).aspect_ratio(6).legend(true).draw(&data);
|
||||||
|
print!("\r\n\r\n\r\n");
|
||||||
|
|
||||||
|
println!("{:>30} Size of Entries", "");
|
||||||
|
stats.sort_by_key(|(s, _)| Reverse(s.size_of_entries));
|
||||||
|
let data = compute_graph_data(stats.iter().map(|(s, n)| (s.size_of_entries as f32, *n)));
|
||||||
|
Chart::new().radius(20).aspect_ratio(6).legend(true).draw(&data);
|
||||||
|
print!("\r\n\r\n\r\n");
|
||||||
|
|
||||||
|
println!("{:>30} Size of Data", "");
|
||||||
|
stats.sort_by_key(|(s, _)| Reverse(s.size_of_data));
|
||||||
|
let data = compute_graph_data(stats.iter().map(|(s, n)| (s.size_of_data as f32, *n)));
|
||||||
|
Chart::new().radius(20).aspect_ratio(6).legend(true).draw(&data);
|
||||||
|
print!("\r\n\r\n\r\n");
|
||||||
|
|
||||||
|
println!("{:>30} Size of Keys", "");
|
||||||
|
stats.sort_by_key(|(s, _)| Reverse(s.size_of_keys));
|
||||||
|
let data = compute_graph_data(stats.iter().map(|(s, n)| (s.size_of_keys as f32, *n)));
|
||||||
|
Chart::new().radius(20).aspect_ratio(6).legend(true).draw(&data);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn compute_graph_data<'a>(stats: impl IntoIterator<Item = (f32, &'a str)>) -> Vec<Data> {
|
||||||
|
let mut colors = [
|
||||||
|
Color::Red,
|
||||||
|
Color::Green,
|
||||||
|
Color::Yellow,
|
||||||
|
Color::Blue,
|
||||||
|
Color::Purple,
|
||||||
|
Color::Cyan,
|
||||||
|
Color::White,
|
||||||
|
]
|
||||||
|
.into_iter()
|
||||||
|
.cycle();
|
||||||
|
|
||||||
|
let mut characters = ['▴', '▵', '▾', '▿', '▪', '▫', '•', '◦'].into_iter().cycle();
|
||||||
|
|
||||||
|
stats
|
||||||
|
.into_iter()
|
||||||
|
.map(|(value, name)| Data {
|
||||||
|
label: (*name).into(),
|
||||||
|
value,
|
||||||
|
color: Some(colors.next().unwrap().into()),
|
||||||
|
fill: characters.next().unwrap(),
|
||||||
|
})
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct Stats {
|
||||||
|
pub number_of_entries: u64,
|
||||||
|
pub size_of_keys: u64,
|
||||||
|
pub size_of_data: u64,
|
||||||
|
pub size_of_entries: u64,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn compute_stats(rtxn: &RoTxn, db: PolyDatabase) -> anyhow::Result<Stats> {
|
||||||
|
let mut number_of_entries = 0;
|
||||||
|
let mut size_of_keys = 0;
|
||||||
|
let mut size_of_data = 0;
|
||||||
|
|
||||||
|
for result in db.iter::<_, ByteSlice, ByteSlice>(rtxn)? {
|
||||||
|
let (key, data) = result?;
|
||||||
|
number_of_entries += 1;
|
||||||
|
size_of_keys += key.len() as u64;
|
||||||
|
size_of_data += data.len() as u64;
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(Stats {
|
||||||
|
number_of_entries,
|
||||||
|
size_of_keys,
|
||||||
|
size_of_data,
|
||||||
|
size_of_entries: size_of_keys + size_of_data,
|
||||||
|
})
|
||||||
|
}
|
@ -93,10 +93,10 @@ pub mod db_name {
|
|||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct Index {
|
pub struct Index {
|
||||||
/// The LMDB environment which this index is associated with.
|
/// The LMDB environment which this index is associated with.
|
||||||
pub(crate) env: heed::Env,
|
pub env: heed::Env,
|
||||||
|
|
||||||
/// Contains many different types (e.g. the fields ids map).
|
/// Contains many different types (e.g. the fields ids map).
|
||||||
pub(crate) main: PolyDatabase,
|
pub main: PolyDatabase,
|
||||||
|
|
||||||
/// A word and all the documents ids containing the word.
|
/// A word and all the documents ids containing the word.
|
||||||
pub word_docids: Database<Str, RoaringBitmapCodec>,
|
pub word_docids: Database<Str, RoaringBitmapCodec>,
|
||||||
@ -150,7 +150,7 @@ pub struct Index {
|
|||||||
pub field_id_docid_facet_strings: Database<FieldDocIdFacetStringCodec, Str>,
|
pub field_id_docid_facet_strings: Database<FieldDocIdFacetStringCodec, Str>,
|
||||||
|
|
||||||
/// Maps the document id to the document as an obkv store.
|
/// Maps the document id to the document as an obkv store.
|
||||||
pub(crate) documents: Database<OwnedType<BEU32>, ObkvCodec>,
|
pub documents: Database<OwnedType<BEU32>, ObkvCodec>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Index {
|
impl Index {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user