mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-04-18 07:57:59 +02:00
Merge pull request #5465 from meilisearch/improve-stats-perf
Improve documents stats performances
This commit is contained in:
commit
cf68713145
@ -518,7 +518,7 @@ impl From<index_scheduler::IndexStats> for IndexStats {
|
|||||||
.inner_stats
|
.inner_stats
|
||||||
.number_of_documents
|
.number_of_documents
|
||||||
.unwrap_or(stats.inner_stats.documents_database_stats.number_of_entries()),
|
.unwrap_or(stats.inner_stats.documents_database_stats.number_of_entries()),
|
||||||
raw_document_db_size: stats.inner_stats.documents_database_stats.total_value_size(),
|
raw_document_db_size: stats.inner_stats.documents_database_stats.total_size(),
|
||||||
avg_document_size: stats.inner_stats.documents_database_stats.average_value_size(),
|
avg_document_size: stats.inner_stats.documents_database_stats.average_value_size(),
|
||||||
is_indexing: stats.is_indexing,
|
is_indexing: stats.is_indexing,
|
||||||
number_of_embeddings: stats.inner_stats.number_of_embeddings,
|
number_of_embeddings: stats.inner_stats.number_of_embeddings,
|
||||||
|
@ -157,11 +157,14 @@ async fn delete_document_by_filter() {
|
|||||||
index.wait_task(task.uid()).await.succeeded();
|
index.wait_task(task.uid()).await.succeeded();
|
||||||
|
|
||||||
let (stats, _) = index.stats().await;
|
let (stats, _) = index.stats().await;
|
||||||
snapshot!(json_string!(stats), @r###"
|
snapshot!(json_string!(stats, {
|
||||||
|
".rawDocumentDbSize" => "[size]",
|
||||||
|
".avgDocumentSize" => "[size]",
|
||||||
|
}), @r###"
|
||||||
{
|
{
|
||||||
"numberOfDocuments": 4,
|
"numberOfDocuments": 4,
|
||||||
"rawDocumentDbSize": 42,
|
"rawDocumentDbSize": "[size]",
|
||||||
"avgDocumentSize": 10,
|
"avgDocumentSize": "[size]",
|
||||||
"isIndexing": false,
|
"isIndexing": false,
|
||||||
"numberOfEmbeddings": 0,
|
"numberOfEmbeddings": 0,
|
||||||
"numberOfEmbeddedDocuments": 0,
|
"numberOfEmbeddedDocuments": 0,
|
||||||
@ -208,11 +211,14 @@ async fn delete_document_by_filter() {
|
|||||||
"###);
|
"###);
|
||||||
|
|
||||||
let (stats, _) = index.stats().await;
|
let (stats, _) = index.stats().await;
|
||||||
snapshot!(json_string!(stats), @r###"
|
snapshot!(json_string!(stats, {
|
||||||
|
".rawDocumentDbSize" => "[size]",
|
||||||
|
".avgDocumentSize" => "[size]",
|
||||||
|
}), @r###"
|
||||||
{
|
{
|
||||||
"numberOfDocuments": 2,
|
"numberOfDocuments": 2,
|
||||||
"rawDocumentDbSize": 16,
|
"rawDocumentDbSize": "[size]",
|
||||||
"avgDocumentSize": 8,
|
"avgDocumentSize": "[size]",
|
||||||
"isIndexing": false,
|
"isIndexing": false,
|
||||||
"numberOfEmbeddings": 0,
|
"numberOfEmbeddings": 0,
|
||||||
"numberOfEmbeddedDocuments": 0,
|
"numberOfEmbeddedDocuments": 0,
|
||||||
@ -278,11 +284,14 @@ async fn delete_document_by_filter() {
|
|||||||
"###);
|
"###);
|
||||||
|
|
||||||
let (stats, _) = index.stats().await;
|
let (stats, _) = index.stats().await;
|
||||||
snapshot!(json_string!(stats), @r###"
|
snapshot!(json_string!(stats, {
|
||||||
|
".rawDocumentDbSize" => "[size]",
|
||||||
|
".avgDocumentSize" => "[size]",
|
||||||
|
}), @r###"
|
||||||
{
|
{
|
||||||
"numberOfDocuments": 1,
|
"numberOfDocuments": 1,
|
||||||
"rawDocumentDbSize": 12,
|
"rawDocumentDbSize": "[size]",
|
||||||
"avgDocumentSize": 12,
|
"avgDocumentSize": "[size]",
|
||||||
"isIndexing": false,
|
"isIndexing": false,
|
||||||
"numberOfEmbeddings": 0,
|
"numberOfEmbeddings": 0,
|
||||||
"numberOfEmbeddedDocuments": 0,
|
"numberOfEmbeddedDocuments": 0,
|
||||||
|
@ -28,12 +28,15 @@ async fn import_dump_v1_movie_raw() {
|
|||||||
let (stats, code) = index.stats().await;
|
let (stats, code) = index.stats().await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(
|
snapshot!(
|
||||||
json_string!(stats),
|
json_string!(stats, {
|
||||||
|
".rawDocumentDbSize" => "[size]",
|
||||||
|
".avgDocumentSize" => "[size]",
|
||||||
|
}),
|
||||||
@r###"
|
@r###"
|
||||||
{
|
{
|
||||||
"numberOfDocuments": 53,
|
"numberOfDocuments": 53,
|
||||||
"rawDocumentDbSize": 21965,
|
"rawDocumentDbSize": "[size]",
|
||||||
"avgDocumentSize": 414,
|
"avgDocumentSize": "[size]",
|
||||||
"isIndexing": false,
|
"isIndexing": false,
|
||||||
"numberOfEmbeddings": 0,
|
"numberOfEmbeddings": 0,
|
||||||
"numberOfEmbeddedDocuments": 0,
|
"numberOfEmbeddedDocuments": 0,
|
||||||
@ -185,12 +188,15 @@ async fn import_dump_v1_movie_with_settings() {
|
|||||||
let (stats, code) = index.stats().await;
|
let (stats, code) = index.stats().await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(
|
snapshot!(
|
||||||
json_string!(stats),
|
json_string!(stats, {
|
||||||
|
".rawDocumentDbSize" => "[size]",
|
||||||
|
".avgDocumentSize" => "[size]",
|
||||||
|
}),
|
||||||
@r###"
|
@r###"
|
||||||
{
|
{
|
||||||
"numberOfDocuments": 53,
|
"numberOfDocuments": 53,
|
||||||
"rawDocumentDbSize": 21965,
|
"rawDocumentDbSize": "[size]",
|
||||||
"avgDocumentSize": 414,
|
"avgDocumentSize": "[size]",
|
||||||
"isIndexing": false,
|
"isIndexing": false,
|
||||||
"numberOfEmbeddings": 0,
|
"numberOfEmbeddings": 0,
|
||||||
"numberOfEmbeddedDocuments": 0,
|
"numberOfEmbeddedDocuments": 0,
|
||||||
@ -355,12 +361,15 @@ async fn import_dump_v1_rubygems_with_settings() {
|
|||||||
let (stats, code) = index.stats().await;
|
let (stats, code) = index.stats().await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(
|
snapshot!(
|
||||||
json_string!(stats),
|
json_string!(stats, {
|
||||||
|
".rawDocumentDbSize" => "[size]",
|
||||||
|
".avgDocumentSize" => "[size]",
|
||||||
|
}),
|
||||||
@r###"
|
@r###"
|
||||||
{
|
{
|
||||||
"numberOfDocuments": 53,
|
"numberOfDocuments": 53,
|
||||||
"rawDocumentDbSize": 8606,
|
"rawDocumentDbSize": "[size]",
|
||||||
"avgDocumentSize": 162,
|
"avgDocumentSize": "[size]",
|
||||||
"isIndexing": false,
|
"isIndexing": false,
|
||||||
"numberOfEmbeddings": 0,
|
"numberOfEmbeddings": 0,
|
||||||
"numberOfEmbeddedDocuments": 0,
|
"numberOfEmbeddedDocuments": 0,
|
||||||
@ -522,12 +531,15 @@ async fn import_dump_v2_movie_raw() {
|
|||||||
let (stats, code) = index.stats().await;
|
let (stats, code) = index.stats().await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(
|
snapshot!(
|
||||||
json_string!(stats),
|
json_string!(stats, {
|
||||||
|
".rawDocumentDbSize" => "[size]",
|
||||||
|
".avgDocumentSize" => "[size]",
|
||||||
|
}),
|
||||||
@r###"
|
@r###"
|
||||||
{
|
{
|
||||||
"numberOfDocuments": 53,
|
"numberOfDocuments": 53,
|
||||||
"rawDocumentDbSize": 21965,
|
"rawDocumentDbSize": "[size]",
|
||||||
"avgDocumentSize": 414,
|
"avgDocumentSize": "[size]",
|
||||||
"isIndexing": false,
|
"isIndexing": false,
|
||||||
"numberOfEmbeddings": 0,
|
"numberOfEmbeddings": 0,
|
||||||
"numberOfEmbeddedDocuments": 0,
|
"numberOfEmbeddedDocuments": 0,
|
||||||
@ -679,12 +691,15 @@ async fn import_dump_v2_movie_with_settings() {
|
|||||||
let (stats, code) = index.stats().await;
|
let (stats, code) = index.stats().await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(
|
snapshot!(
|
||||||
json_string!(stats),
|
json_string!(stats, {
|
||||||
|
".rawDocumentDbSize" => "[size]",
|
||||||
|
".avgDocumentSize" => "[size]",
|
||||||
|
}),
|
||||||
@r###"
|
@r###"
|
||||||
{
|
{
|
||||||
"numberOfDocuments": 53,
|
"numberOfDocuments": 53,
|
||||||
"rawDocumentDbSize": 21965,
|
"rawDocumentDbSize": "[size]",
|
||||||
"avgDocumentSize": 414,
|
"avgDocumentSize": "[size]",
|
||||||
"isIndexing": false,
|
"isIndexing": false,
|
||||||
"numberOfEmbeddings": 0,
|
"numberOfEmbeddings": 0,
|
||||||
"numberOfEmbeddedDocuments": 0,
|
"numberOfEmbeddedDocuments": 0,
|
||||||
@ -846,12 +861,15 @@ async fn import_dump_v2_rubygems_with_settings() {
|
|||||||
let (stats, code) = index.stats().await;
|
let (stats, code) = index.stats().await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(
|
snapshot!(
|
||||||
json_string!(stats),
|
json_string!(stats, {
|
||||||
|
".rawDocumentDbSize" => "[size]",
|
||||||
|
".avgDocumentSize" => "[size]",
|
||||||
|
}),
|
||||||
@r###"
|
@r###"
|
||||||
{
|
{
|
||||||
"numberOfDocuments": 53,
|
"numberOfDocuments": 53,
|
||||||
"rawDocumentDbSize": 8606,
|
"rawDocumentDbSize": "[size]",
|
||||||
"avgDocumentSize": 162,
|
"avgDocumentSize": "[size]",
|
||||||
"isIndexing": false,
|
"isIndexing": false,
|
||||||
"numberOfEmbeddings": 0,
|
"numberOfEmbeddings": 0,
|
||||||
"numberOfEmbeddedDocuments": 0,
|
"numberOfEmbeddedDocuments": 0,
|
||||||
@ -1010,12 +1028,15 @@ async fn import_dump_v3_movie_raw() {
|
|||||||
let (stats, code) = index.stats().await;
|
let (stats, code) = index.stats().await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(
|
snapshot!(
|
||||||
json_string!(stats),
|
json_string!(stats, {
|
||||||
|
".rawDocumentDbSize" => "[size]",
|
||||||
|
".avgDocumentSize" => "[size]",
|
||||||
|
}),
|
||||||
@r###"
|
@r###"
|
||||||
{
|
{
|
||||||
"numberOfDocuments": 53,
|
"numberOfDocuments": 53,
|
||||||
"rawDocumentDbSize": 21965,
|
"rawDocumentDbSize": "[size]",
|
||||||
"avgDocumentSize": 414,
|
"avgDocumentSize": "[size]",
|
||||||
"isIndexing": false,
|
"isIndexing": false,
|
||||||
"numberOfEmbeddings": 0,
|
"numberOfEmbeddings": 0,
|
||||||
"numberOfEmbeddedDocuments": 0,
|
"numberOfEmbeddedDocuments": 0,
|
||||||
@ -1167,12 +1188,15 @@ async fn import_dump_v3_movie_with_settings() {
|
|||||||
let (stats, code) = index.stats().await;
|
let (stats, code) = index.stats().await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(
|
snapshot!(
|
||||||
json_string!(stats),
|
json_string!(stats, {
|
||||||
|
".rawDocumentDbSize" => "[size]",
|
||||||
|
".avgDocumentSize" => "[size]",
|
||||||
|
}),
|
||||||
@r###"
|
@r###"
|
||||||
{
|
{
|
||||||
"numberOfDocuments": 53,
|
"numberOfDocuments": 53,
|
||||||
"rawDocumentDbSize": 21965,
|
"rawDocumentDbSize": "[size]",
|
||||||
"avgDocumentSize": 414,
|
"avgDocumentSize": "[size]",
|
||||||
"isIndexing": false,
|
"isIndexing": false,
|
||||||
"numberOfEmbeddings": 0,
|
"numberOfEmbeddings": 0,
|
||||||
"numberOfEmbeddedDocuments": 0,
|
"numberOfEmbeddedDocuments": 0,
|
||||||
@ -1334,12 +1358,15 @@ async fn import_dump_v3_rubygems_with_settings() {
|
|||||||
let (stats, code) = index.stats().await;
|
let (stats, code) = index.stats().await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(
|
snapshot!(
|
||||||
json_string!(stats),
|
json_string!(stats, {
|
||||||
|
".rawDocumentDbSize" => "[size]",
|
||||||
|
".avgDocumentSize" => "[size]",
|
||||||
|
}),
|
||||||
@r###"
|
@r###"
|
||||||
{
|
{
|
||||||
"numberOfDocuments": 53,
|
"numberOfDocuments": 53,
|
||||||
"rawDocumentDbSize": 8606,
|
"rawDocumentDbSize": "[size]",
|
||||||
"avgDocumentSize": 162,
|
"avgDocumentSize": "[size]",
|
||||||
"isIndexing": false,
|
"isIndexing": false,
|
||||||
"numberOfEmbeddings": 0,
|
"numberOfEmbeddings": 0,
|
||||||
"numberOfEmbeddedDocuments": 0,
|
"numberOfEmbeddedDocuments": 0,
|
||||||
@ -1498,12 +1525,15 @@ async fn import_dump_v4_movie_raw() {
|
|||||||
let (stats, code) = index.stats().await;
|
let (stats, code) = index.stats().await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(
|
snapshot!(
|
||||||
json_string!(stats),
|
json_string!(stats, {
|
||||||
|
".rawDocumentDbSize" => "[size]",
|
||||||
|
".avgDocumentSize" => "[size]",
|
||||||
|
}),
|
||||||
@r###"
|
@r###"
|
||||||
{
|
{
|
||||||
"numberOfDocuments": 53,
|
"numberOfDocuments": 53,
|
||||||
"rawDocumentDbSize": 21965,
|
"rawDocumentDbSize": "[size]",
|
||||||
"avgDocumentSize": 414,
|
"avgDocumentSize": "[size]",
|
||||||
"isIndexing": false,
|
"isIndexing": false,
|
||||||
"numberOfEmbeddings": 0,
|
"numberOfEmbeddings": 0,
|
||||||
"numberOfEmbeddedDocuments": 0,
|
"numberOfEmbeddedDocuments": 0,
|
||||||
@ -1655,12 +1685,15 @@ async fn import_dump_v4_movie_with_settings() {
|
|||||||
let (stats, code) = index.stats().await;
|
let (stats, code) = index.stats().await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(
|
snapshot!(
|
||||||
json_string!(stats),
|
json_string!(stats, {
|
||||||
|
".rawDocumentDbSize" => "[size]",
|
||||||
|
".avgDocumentSize" => "[size]",
|
||||||
|
}),
|
||||||
@r###"
|
@r###"
|
||||||
{
|
{
|
||||||
"numberOfDocuments": 53,
|
"numberOfDocuments": 53,
|
||||||
"rawDocumentDbSize": 21965,
|
"rawDocumentDbSize": "[size]",
|
||||||
"avgDocumentSize": 414,
|
"avgDocumentSize": "[size]",
|
||||||
"isIndexing": false,
|
"isIndexing": false,
|
||||||
"numberOfEmbeddings": 0,
|
"numberOfEmbeddings": 0,
|
||||||
"numberOfEmbeddedDocuments": 0,
|
"numberOfEmbeddedDocuments": 0,
|
||||||
@ -1822,12 +1855,15 @@ async fn import_dump_v4_rubygems_with_settings() {
|
|||||||
let (stats, code) = index.stats().await;
|
let (stats, code) = index.stats().await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(
|
snapshot!(
|
||||||
json_string!(stats),
|
json_string!(stats, {
|
||||||
|
".rawDocumentDbSize" => "[size]",
|
||||||
|
".avgDocumentSize" => "[size]",
|
||||||
|
}),
|
||||||
@r###"
|
@r###"
|
||||||
{
|
{
|
||||||
"numberOfDocuments": 53,
|
"numberOfDocuments": 53,
|
||||||
"rawDocumentDbSize": 8606,
|
"rawDocumentDbSize": "[size]",
|
||||||
"avgDocumentSize": 162,
|
"avgDocumentSize": "[size]",
|
||||||
"isIndexing": false,
|
"isIndexing": false,
|
||||||
"numberOfEmbeddings": 0,
|
"numberOfEmbeddings": 0,
|
||||||
"numberOfEmbeddedDocuments": 0,
|
"numberOfEmbeddedDocuments": 0,
|
||||||
@ -1994,11 +2030,14 @@ async fn import_dump_v5() {
|
|||||||
|
|
||||||
let (stats, code) = index1.stats().await;
|
let (stats, code) = index1.stats().await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(json_string!(stats), @r###"
|
snapshot!(json_string!(stats, {
|
||||||
|
".rawDocumentDbSize" => "[size]",
|
||||||
|
".avgDocumentSize" => "[size]",
|
||||||
|
}), @r###"
|
||||||
{
|
{
|
||||||
"numberOfDocuments": 10,
|
"numberOfDocuments": 10,
|
||||||
"rawDocumentDbSize": 6782,
|
"rawDocumentDbSize": "[size]",
|
||||||
"avgDocumentSize": 678,
|
"avgDocumentSize": "[size]",
|
||||||
"isIndexing": false,
|
"isIndexing": false,
|
||||||
"numberOfEmbeddings": 0,
|
"numberOfEmbeddings": 0,
|
||||||
"numberOfEmbeddedDocuments": 0,
|
"numberOfEmbeddedDocuments": 0,
|
||||||
@ -2031,12 +2070,15 @@ async fn import_dump_v5() {
|
|||||||
let (stats, code) = index2.stats().await;
|
let (stats, code) = index2.stats().await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
snapshot!(
|
snapshot!(
|
||||||
json_string!(stats),
|
json_string!(stats, {
|
||||||
|
".rawDocumentDbSize" => "[size]",
|
||||||
|
".avgDocumentSize" => "[size]",
|
||||||
|
}),
|
||||||
@r###"
|
@r###"
|
||||||
{
|
{
|
||||||
"numberOfDocuments": 10,
|
"numberOfDocuments": 10,
|
||||||
"rawDocumentDbSize": 6782,
|
"rawDocumentDbSize": "[size]",
|
||||||
"avgDocumentSize": 678,
|
"avgDocumentSize": "[size]",
|
||||||
"isIndexing": false,
|
"isIndexing": false,
|
||||||
"numberOfEmbeddings": 0,
|
"numberOfEmbeddings": 0,
|
||||||
"numberOfEmbeddedDocuments": 0,
|
"numberOfEmbeddedDocuments": 0,
|
||||||
|
@ -110,11 +110,14 @@ async fn add_remove_embeddings() {
|
|||||||
index.wait_task(response.uid()).await.succeeded();
|
index.wait_task(response.uid()).await.succeeded();
|
||||||
|
|
||||||
let (stats, _code) = index.stats().await;
|
let (stats, _code) = index.stats().await;
|
||||||
snapshot!(json_string!(stats), @r###"
|
snapshot!(json_string!(stats, {
|
||||||
|
".rawDocumentDbSize" => "[size]",
|
||||||
|
".avgDocumentSize" => "[size]",
|
||||||
|
}), @r###"
|
||||||
{
|
{
|
||||||
"numberOfDocuments": 2,
|
"numberOfDocuments": 2,
|
||||||
"rawDocumentDbSize": 27,
|
"rawDocumentDbSize": "[size]",
|
||||||
"avgDocumentSize": 13,
|
"avgDocumentSize": "[size]",
|
||||||
"isIndexing": false,
|
"isIndexing": false,
|
||||||
"numberOfEmbeddings": 5,
|
"numberOfEmbeddings": 5,
|
||||||
"numberOfEmbeddedDocuments": 2,
|
"numberOfEmbeddedDocuments": 2,
|
||||||
@ -135,11 +138,14 @@ async fn add_remove_embeddings() {
|
|||||||
index.wait_task(response.uid()).await.succeeded();
|
index.wait_task(response.uid()).await.succeeded();
|
||||||
|
|
||||||
let (stats, _code) = index.stats().await;
|
let (stats, _code) = index.stats().await;
|
||||||
snapshot!(json_string!(stats), @r###"
|
snapshot!(json_string!(stats, {
|
||||||
|
".rawDocumentDbSize" => "[size]",
|
||||||
|
".avgDocumentSize" => "[size]",
|
||||||
|
}), @r###"
|
||||||
{
|
{
|
||||||
"numberOfDocuments": 2,
|
"numberOfDocuments": 2,
|
||||||
"rawDocumentDbSize": 27,
|
"rawDocumentDbSize": "[size]",
|
||||||
"avgDocumentSize": 13,
|
"avgDocumentSize": "[size]",
|
||||||
"isIndexing": false,
|
"isIndexing": false,
|
||||||
"numberOfEmbeddings": 3,
|
"numberOfEmbeddings": 3,
|
||||||
"numberOfEmbeddedDocuments": 2,
|
"numberOfEmbeddedDocuments": 2,
|
||||||
@ -160,11 +166,14 @@ async fn add_remove_embeddings() {
|
|||||||
index.wait_task(response.uid()).await.succeeded();
|
index.wait_task(response.uid()).await.succeeded();
|
||||||
|
|
||||||
let (stats, _code) = index.stats().await;
|
let (stats, _code) = index.stats().await;
|
||||||
snapshot!(json_string!(stats), @r###"
|
snapshot!(json_string!(stats, {
|
||||||
|
".rawDocumentDbSize" => "[size]",
|
||||||
|
".avgDocumentSize" => "[size]",
|
||||||
|
}), @r###"
|
||||||
{
|
{
|
||||||
"numberOfDocuments": 2,
|
"numberOfDocuments": 2,
|
||||||
"rawDocumentDbSize": 27,
|
"rawDocumentDbSize": "[size]",
|
||||||
"avgDocumentSize": 13,
|
"avgDocumentSize": "[size]",
|
||||||
"isIndexing": false,
|
"isIndexing": false,
|
||||||
"numberOfEmbeddings": 2,
|
"numberOfEmbeddings": 2,
|
||||||
"numberOfEmbeddedDocuments": 2,
|
"numberOfEmbeddedDocuments": 2,
|
||||||
@ -186,11 +195,14 @@ async fn add_remove_embeddings() {
|
|||||||
index.wait_task(response.uid()).await.succeeded();
|
index.wait_task(response.uid()).await.succeeded();
|
||||||
|
|
||||||
let (stats, _code) = index.stats().await;
|
let (stats, _code) = index.stats().await;
|
||||||
snapshot!(json_string!(stats), @r###"
|
snapshot!(json_string!(stats, {
|
||||||
|
".rawDocumentDbSize" => "[size]",
|
||||||
|
".avgDocumentSize" => "[size]",
|
||||||
|
}), @r###"
|
||||||
{
|
{
|
||||||
"numberOfDocuments": 2,
|
"numberOfDocuments": 2,
|
||||||
"rawDocumentDbSize": 27,
|
"rawDocumentDbSize": "[size]",
|
||||||
"avgDocumentSize": 13,
|
"avgDocumentSize": "[size]",
|
||||||
"isIndexing": false,
|
"isIndexing": false,
|
||||||
"numberOfEmbeddings": 2,
|
"numberOfEmbeddings": 2,
|
||||||
"numberOfEmbeddedDocuments": 1,
|
"numberOfEmbeddedDocuments": 1,
|
||||||
@ -236,11 +248,14 @@ async fn add_remove_embedded_documents() {
|
|||||||
index.wait_task(response.uid()).await.succeeded();
|
index.wait_task(response.uid()).await.succeeded();
|
||||||
|
|
||||||
let (stats, _code) = index.stats().await;
|
let (stats, _code) = index.stats().await;
|
||||||
snapshot!(json_string!(stats), @r###"
|
snapshot!(json_string!(stats, {
|
||||||
|
".rawDocumentDbSize" => "[size]",
|
||||||
|
".avgDocumentSize" => "[size]",
|
||||||
|
}), @r###"
|
||||||
{
|
{
|
||||||
"numberOfDocuments": 2,
|
"numberOfDocuments": 2,
|
||||||
"rawDocumentDbSize": 27,
|
"rawDocumentDbSize": "[size]",
|
||||||
"avgDocumentSize": 13,
|
"avgDocumentSize": "[size]",
|
||||||
"isIndexing": false,
|
"isIndexing": false,
|
||||||
"numberOfEmbeddings": 5,
|
"numberOfEmbeddings": 5,
|
||||||
"numberOfEmbeddedDocuments": 2,
|
"numberOfEmbeddedDocuments": 2,
|
||||||
@ -257,11 +272,14 @@ async fn add_remove_embedded_documents() {
|
|||||||
index.wait_task(response.uid()).await.succeeded();
|
index.wait_task(response.uid()).await.succeeded();
|
||||||
|
|
||||||
let (stats, _code) = index.stats().await;
|
let (stats, _code) = index.stats().await;
|
||||||
snapshot!(json_string!(stats), @r###"
|
snapshot!(json_string!(stats, {
|
||||||
|
".rawDocumentDbSize" => "[size]",
|
||||||
|
".avgDocumentSize" => "[size]",
|
||||||
|
}), @r###"
|
||||||
{
|
{
|
||||||
"numberOfDocuments": 1,
|
"numberOfDocuments": 1,
|
||||||
"rawDocumentDbSize": 13,
|
"rawDocumentDbSize": "[size]",
|
||||||
"avgDocumentSize": 13,
|
"avgDocumentSize": "[size]",
|
||||||
"isIndexing": false,
|
"isIndexing": false,
|
||||||
"numberOfEmbeddings": 3,
|
"numberOfEmbeddings": 3,
|
||||||
"numberOfEmbeddedDocuments": 1,
|
"numberOfEmbeddedDocuments": 1,
|
||||||
@ -290,11 +308,14 @@ async fn update_embedder_settings() {
|
|||||||
index.wait_task(response.uid()).await.succeeded();
|
index.wait_task(response.uid()).await.succeeded();
|
||||||
|
|
||||||
let (stats, _code) = index.stats().await;
|
let (stats, _code) = index.stats().await;
|
||||||
snapshot!(json_string!(stats), @r###"
|
snapshot!(json_string!(stats, {
|
||||||
|
".rawDocumentDbSize" => "[size]",
|
||||||
|
".avgDocumentSize" => "[size]",
|
||||||
|
}), @r###"
|
||||||
{
|
{
|
||||||
"numberOfDocuments": 2,
|
"numberOfDocuments": 2,
|
||||||
"rawDocumentDbSize": 108,
|
"rawDocumentDbSize": "[size]",
|
||||||
"avgDocumentSize": 54,
|
"avgDocumentSize": "[size]",
|
||||||
"isIndexing": false,
|
"isIndexing": false,
|
||||||
"numberOfEmbeddings": 0,
|
"numberOfEmbeddings": 0,
|
||||||
"numberOfEmbeddedDocuments": 0,
|
"numberOfEmbeddedDocuments": 0,
|
||||||
@ -326,11 +347,14 @@ async fn update_embedder_settings() {
|
|||||||
server.wait_task(response.uid()).await.succeeded();
|
server.wait_task(response.uid()).await.succeeded();
|
||||||
|
|
||||||
let (stats, _code) = index.stats().await;
|
let (stats, _code) = index.stats().await;
|
||||||
snapshot!(json_string!(stats), @r###"
|
snapshot!(json_string!(stats, {
|
||||||
|
".rawDocumentDbSize" => "[size]",
|
||||||
|
".avgDocumentSize" => "[size]",
|
||||||
|
}), @r###"
|
||||||
{
|
{
|
||||||
"numberOfDocuments": 2,
|
"numberOfDocuments": 2,
|
||||||
"rawDocumentDbSize": 108,
|
"rawDocumentDbSize": "[size]",
|
||||||
"avgDocumentSize": 54,
|
"avgDocumentSize": "[size]",
|
||||||
"isIndexing": false,
|
"isIndexing": false,
|
||||||
"numberOfEmbeddings": 3,
|
"numberOfEmbeddings": 3,
|
||||||
"numberOfEmbeddedDocuments": 2,
|
"numberOfEmbeddedDocuments": 2,
|
||||||
|
@ -133,7 +133,9 @@ async fn check_the_index_scheduler(server: &Server) {
|
|||||||
let (stats, _) = server.stats().await;
|
let (stats, _) = server.stats().await;
|
||||||
assert_json_snapshot!(stats, {
|
assert_json_snapshot!(stats, {
|
||||||
".databaseSize" => "[bytes]",
|
".databaseSize" => "[bytes]",
|
||||||
".usedDatabaseSize" => "[bytes]"
|
".usedDatabaseSize" => "[bytes]",
|
||||||
|
".indexes.kefir.rawDocumentDbSize" => "[bytes]",
|
||||||
|
".indexes.kefir.avgDocumentSize" => "[bytes]",
|
||||||
},
|
},
|
||||||
@r###"
|
@r###"
|
||||||
{
|
{
|
||||||
@ -143,8 +145,8 @@ async fn check_the_index_scheduler(server: &Server) {
|
|||||||
"indexes": {
|
"indexes": {
|
||||||
"kefir": {
|
"kefir": {
|
||||||
"numberOfDocuments": 1,
|
"numberOfDocuments": 1,
|
||||||
"rawDocumentDbSize": 109,
|
"rawDocumentDbSize": "[bytes]",
|
||||||
"avgDocumentSize": 109,
|
"avgDocumentSize": "[bytes]",
|
||||||
"isIndexing": false,
|
"isIndexing": false,
|
||||||
"numberOfEmbeddings": 0,
|
"numberOfEmbeddings": 0,
|
||||||
"numberOfEmbeddedDocuments": 0,
|
"numberOfEmbeddedDocuments": 0,
|
||||||
@ -217,7 +219,9 @@ async fn check_the_index_scheduler(server: &Server) {
|
|||||||
let (stats, _) = server.stats().await;
|
let (stats, _) = server.stats().await;
|
||||||
assert_json_snapshot!(stats, {
|
assert_json_snapshot!(stats, {
|
||||||
".databaseSize" => "[bytes]",
|
".databaseSize" => "[bytes]",
|
||||||
".usedDatabaseSize" => "[bytes]"
|
".usedDatabaseSize" => "[bytes]",
|
||||||
|
".indexes.kefir.rawDocumentDbSize" => "[bytes]",
|
||||||
|
".indexes.kefir.avgDocumentSize" => "[bytes]",
|
||||||
},
|
},
|
||||||
@r###"
|
@r###"
|
||||||
{
|
{
|
||||||
@ -227,8 +231,8 @@ async fn check_the_index_scheduler(server: &Server) {
|
|||||||
"indexes": {
|
"indexes": {
|
||||||
"kefir": {
|
"kefir": {
|
||||||
"numberOfDocuments": 1,
|
"numberOfDocuments": 1,
|
||||||
"rawDocumentDbSize": 109,
|
"rawDocumentDbSize": "[bytes]",
|
||||||
"avgDocumentSize": 109,
|
"avgDocumentSize": "[bytes]",
|
||||||
"isIndexing": false,
|
"isIndexing": false,
|
||||||
"numberOfEmbeddings": 0,
|
"numberOfEmbeddings": 0,
|
||||||
"numberOfEmbeddedDocuments": 0,
|
"numberOfEmbeddedDocuments": 0,
|
||||||
@ -245,11 +249,14 @@ async fn check_the_index_scheduler(server: &Server) {
|
|||||||
"###);
|
"###);
|
||||||
let index = server.index("kefir");
|
let index = server.index("kefir");
|
||||||
let (stats, _) = index.stats().await;
|
let (stats, _) = index.stats().await;
|
||||||
snapshot!(stats, @r###"
|
snapshot!(json_string!(stats, {
|
||||||
|
".rawDocumentDbSize" => "[bytes]",
|
||||||
|
".avgDocumentSize" => "[bytes]",
|
||||||
|
}), @r###"
|
||||||
{
|
{
|
||||||
"numberOfDocuments": 1,
|
"numberOfDocuments": 1,
|
||||||
"rawDocumentDbSize": 109,
|
"rawDocumentDbSize": "[bytes]",
|
||||||
"avgDocumentSize": 109,
|
"avgDocumentSize": "[bytes]",
|
||||||
"isIndexing": false,
|
"isIndexing": false,
|
||||||
"numberOfEmbeddings": 0,
|
"numberOfEmbeddings": 0,
|
||||||
"numberOfEmbeddedDocuments": 0,
|
"numberOfEmbeddedDocuments": 0,
|
||||||
|
@ -1,8 +1,13 @@
|
|||||||
use heed::types::Bytes;
|
use std::mem;
|
||||||
|
|
||||||
use heed::Database;
|
use heed::Database;
|
||||||
|
use heed::DatabaseStat;
|
||||||
use heed::RoTxn;
|
use heed::RoTxn;
|
||||||
|
use heed::Unspecified;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
use crate::BEU32;
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
|
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
|
||||||
#[serde(rename_all = "camelCase")]
|
#[serde(rename_all = "camelCase")]
|
||||||
/// The stats of a database.
|
/// The stats of a database.
|
||||||
@ -20,58 +25,24 @@ impl DatabaseStats {
|
|||||||
///
|
///
|
||||||
/// This function iterates over the whole database and computes the stats.
|
/// This function iterates over the whole database and computes the stats.
|
||||||
/// It is not efficient and should be cached somewhere.
|
/// It is not efficient and should be cached somewhere.
|
||||||
pub(crate) fn new(database: Database<Bytes, Bytes>, rtxn: &RoTxn<'_>) -> heed::Result<Self> {
|
pub(crate) fn new(
|
||||||
let mut database_stats =
|
database: Database<BEU32, Unspecified>,
|
||||||
Self { number_of_entries: 0, total_key_size: 0, total_value_size: 0 };
|
rtxn: &RoTxn<'_>,
|
||||||
|
) -> heed::Result<Self> {
|
||||||
|
let DatabaseStat { page_size, depth: _, branch_pages, leaf_pages, overflow_pages, entries } =
|
||||||
|
database.stat(rtxn)?;
|
||||||
|
|
||||||
let mut iter = database.iter(rtxn)?;
|
// We first take the total size without overflow pages as the overflow pages contains the values and only that.
|
||||||
while let Some((key, value)) = iter.next().transpose()? {
|
let total_size = (branch_pages + leaf_pages + overflow_pages) * page_size as usize;
|
||||||
let key_size = key.len() as u64;
|
// We compute an estimated size for the keys.
|
||||||
let value_size = value.len() as u64;
|
let total_key_size = entries * (mem::size_of::<u32>() + 4);
|
||||||
database_stats.total_key_size += key_size;
|
let total_value_size = total_size - total_key_size;
|
||||||
database_stats.total_value_size += value_size;
|
|
||||||
}
|
|
||||||
|
|
||||||
database_stats.number_of_entries = database.len(rtxn)?;
|
Ok(Self {
|
||||||
|
number_of_entries: entries as u64,
|
||||||
Ok(database_stats)
|
total_key_size: total_key_size as u64,
|
||||||
}
|
total_value_size: total_value_size as u64,
|
||||||
|
})
|
||||||
/// Recomputes the stats of the database and returns the new stats.
|
|
||||||
///
|
|
||||||
/// This function is used to update the stats of the database when some keys are modified.
|
|
||||||
/// It is more efficient than the `new` function because it does not iterate over the whole database but only the modified keys comparing the before and after states.
|
|
||||||
pub(crate) fn recompute<I, K>(
|
|
||||||
mut stats: Self,
|
|
||||||
database: Database<Bytes, Bytes>,
|
|
||||||
before_rtxn: &RoTxn<'_>,
|
|
||||||
after_rtxn: &RoTxn<'_>,
|
|
||||||
modified_keys: I,
|
|
||||||
) -> heed::Result<Self>
|
|
||||||
where
|
|
||||||
I: IntoIterator<Item = K>,
|
|
||||||
K: AsRef<[u8]>,
|
|
||||||
{
|
|
||||||
for key in modified_keys {
|
|
||||||
let key = key.as_ref();
|
|
||||||
if let Some(value) = database.get(after_rtxn, key)? {
|
|
||||||
let key_size = key.len() as u64;
|
|
||||||
let value_size = value.len() as u64;
|
|
||||||
stats.total_key_size = stats.total_key_size.saturating_add(key_size);
|
|
||||||
stats.total_value_size = stats.total_value_size.saturating_add(value_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
if let Some(value) = database.get(before_rtxn, key)? {
|
|
||||||
let key_size = key.len() as u64;
|
|
||||||
let value_size = value.len() as u64;
|
|
||||||
stats.total_key_size = stats.total_key_size.saturating_sub(key_size);
|
|
||||||
stats.total_value_size = stats.total_value_size.saturating_sub(value_size);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
stats.number_of_entries = database.len(after_rtxn)?;
|
|
||||||
|
|
||||||
Ok(stats)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn average_key_size(&self) -> u64 {
|
pub fn average_key_size(&self) -> u64 {
|
||||||
@ -86,6 +57,10 @@ impl DatabaseStats {
|
|||||||
self.number_of_entries
|
self.number_of_entries
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn total_size(&self) -> u64 {
|
||||||
|
self.total_key_size + self.total_value_size
|
||||||
|
}
|
||||||
|
|
||||||
pub fn total_key_size(&self) -> u64 {
|
pub fn total_key_size(&self) -> u64 {
|
||||||
self.total_key_size
|
self.total_key_size
|
||||||
}
|
}
|
||||||
|
@ -411,38 +411,6 @@ impl Index {
|
|||||||
Ok(count.unwrap_or_default())
|
Ok(count.unwrap_or_default())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Updates the stats of the documents database based on the previous stats and the modified docids.
|
|
||||||
pub fn update_documents_stats(
|
|
||||||
&self,
|
|
||||||
wtxn: &mut RwTxn<'_>,
|
|
||||||
modified_docids: roaring::RoaringBitmap,
|
|
||||||
) -> Result<()> {
|
|
||||||
let before_rtxn = self.read_txn()?;
|
|
||||||
let document_stats = match self.documents_stats(&before_rtxn)? {
|
|
||||||
Some(before_stats) => DatabaseStats::recompute(
|
|
||||||
before_stats,
|
|
||||||
self.documents.remap_types(),
|
|
||||||
&before_rtxn,
|
|
||||||
wtxn,
|
|
||||||
modified_docids.iter().map(|docid| docid.to_be_bytes()),
|
|
||||||
)?,
|
|
||||||
None => {
|
|
||||||
// This should never happen when there are already documents in the index, the documents stats should be present.
|
|
||||||
// If it happens, it means that the index was not properly initialized/upgraded.
|
|
||||||
debug_assert_eq!(
|
|
||||||
self.documents.len(&before_rtxn)?,
|
|
||||||
0,
|
|
||||||
"The documents stats should be present when there are documents in the index"
|
|
||||||
);
|
|
||||||
tracing::warn!("No documents stats found, creating new ones");
|
|
||||||
DatabaseStats::new(self.documents.remap_types(), &*wtxn)?
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
self.put_documents_stats(wtxn, document_stats)?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Writes the stats of the documents database.
|
/// Writes the stats of the documents database.
|
||||||
pub fn put_documents_stats(
|
pub fn put_documents_stats(
|
||||||
&self,
|
&self,
|
||||||
|
@ -28,6 +28,7 @@ pub use self::helpers::*;
|
|||||||
pub use self::transform::{Transform, TransformOutput};
|
pub use self::transform::{Transform, TransformOutput};
|
||||||
use super::facet::clear_facet_levels_based_on_settings_diff;
|
use super::facet::clear_facet_levels_based_on_settings_diff;
|
||||||
use super::new::StdResult;
|
use super::new::StdResult;
|
||||||
|
use crate::database_stats::DatabaseStats;
|
||||||
use crate::documents::{obkv_to_object, DocumentsBatchReader};
|
use crate::documents::{obkv_to_object, DocumentsBatchReader};
|
||||||
use crate::error::{Error, InternalError};
|
use crate::error::{Error, InternalError};
|
||||||
use crate::index::{PrefixSearch, PrefixSettings};
|
use crate::index::{PrefixSearch, PrefixSettings};
|
||||||
@ -476,7 +477,8 @@ where
|
|||||||
|
|
||||||
if !settings_diff.settings_update_only {
|
if !settings_diff.settings_update_only {
|
||||||
// Update the stats of the documents database when there is a document update.
|
// Update the stats of the documents database when there is a document update.
|
||||||
self.index.update_documents_stats(self.wtxn, modified_docids)?;
|
let stats = DatabaseStats::new(self.index.documents.remap_data_type(), self.wtxn)?;
|
||||||
|
self.index.put_documents_stats(self.wtxn, stats)?;
|
||||||
}
|
}
|
||||||
// We write the field distribution into the main database
|
// We write the field distribution into the main database
|
||||||
self.index.put_field_distribution(self.wtxn, &field_distribution)?;
|
self.index.put_field_distribution(self.wtxn, &field_distribution)?;
|
||||||
|
@ -234,7 +234,6 @@ where
|
|||||||
embedders,
|
embedders,
|
||||||
field_distribution,
|
field_distribution,
|
||||||
document_ids,
|
document_ids,
|
||||||
modified_docids,
|
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
Ok(congestion)
|
Ok(congestion)
|
||||||
|
@ -7,6 +7,7 @@ use rand::SeedableRng as _;
|
|||||||
use time::OffsetDateTime;
|
use time::OffsetDateTime;
|
||||||
|
|
||||||
use super::super::channel::*;
|
use super::super::channel::*;
|
||||||
|
use crate::database_stats::DatabaseStats;
|
||||||
use crate::documents::PrimaryKey;
|
use crate::documents::PrimaryKey;
|
||||||
use crate::fields_ids_map::metadata::FieldIdMapWithMetadata;
|
use crate::fields_ids_map::metadata::FieldIdMapWithMetadata;
|
||||||
use crate::index::IndexEmbeddingConfig;
|
use crate::index::IndexEmbeddingConfig;
|
||||||
@ -142,7 +143,6 @@ pub(super) fn update_index(
|
|||||||
embedders: EmbeddingConfigs,
|
embedders: EmbeddingConfigs,
|
||||||
field_distribution: std::collections::BTreeMap<String, u64>,
|
field_distribution: std::collections::BTreeMap<String, u64>,
|
||||||
document_ids: roaring::RoaringBitmap,
|
document_ids: roaring::RoaringBitmap,
|
||||||
modified_docids: roaring::RoaringBitmap,
|
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
index.put_fields_ids_map(wtxn, new_fields_ids_map.as_fields_ids_map())?;
|
index.put_fields_ids_map(wtxn, new_fields_ids_map.as_fields_ids_map())?;
|
||||||
if let Some(new_primary_key) = new_primary_key {
|
if let Some(new_primary_key) = new_primary_key {
|
||||||
@ -153,7 +153,8 @@ pub(super) fn update_index(
|
|||||||
index.put_field_distribution(wtxn, &field_distribution)?;
|
index.put_field_distribution(wtxn, &field_distribution)?;
|
||||||
index.put_documents_ids(wtxn, &document_ids)?;
|
index.put_documents_ids(wtxn, &document_ids)?;
|
||||||
index.set_updated_at(wtxn, &OffsetDateTime::now_utc())?;
|
index.set_updated_at(wtxn, &OffsetDateTime::now_utc())?;
|
||||||
index.update_documents_stats(wtxn, modified_docids)?;
|
let stats = DatabaseStats::new(index.documents.remap_data_type(), wtxn)?;
|
||||||
|
index.put_documents_stats(wtxn, stats)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user