mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-02-15 06:53:35 +01:00
Merge #5341
5341: Embeddings stats r=ManyTheFish a=ManyTheFish # Pull Request ## Related issue Fixes #5321 ## What does this PR do? - Add embedding stats - force dumpless upgrade to recompute stats - add tests Co-authored-by: ManyTheFish <many@meilisearch.com>
This commit is contained in:
commit
885710a07b
@ -106,6 +106,12 @@ pub struct IndexStats {
|
|||||||
/// are not returned to the disk after a deletion, this number is typically larger than
|
/// are not returned to the disk after a deletion, this number is typically larger than
|
||||||
/// `used_database_size` that only includes the size of the used pages.
|
/// `used_database_size` that only includes the size of the used pages.
|
||||||
pub database_size: u64,
|
pub database_size: u64,
|
||||||
|
/// Number of embeddings in the index.
|
||||||
|
/// Option: retrocompatible with the stats of the pre-v1.13.0 versions of meilisearch
|
||||||
|
pub number_of_embeddings: Option<u64>,
|
||||||
|
/// Number of embedded documents in the index.
|
||||||
|
/// Option: retrocompatible with the stats of the pre-v1.13.0 versions of meilisearch
|
||||||
|
pub number_of_embedded_documents: Option<u64>,
|
||||||
/// Size taken by the used pages of the index' DB, in bytes.
|
/// Size taken by the used pages of the index' DB, in bytes.
|
||||||
///
|
///
|
||||||
/// As the DB backend does not return to the disk the pages that are not currently used by the DB,
|
/// As the DB backend does not return to the disk the pages that are not currently used by the DB,
|
||||||
@ -130,8 +136,11 @@ impl IndexStats {
|
|||||||
///
|
///
|
||||||
/// - rtxn: a RO transaction for the index, obtained from `Index::read_txn()`.
|
/// - rtxn: a RO transaction for the index, obtained from `Index::read_txn()`.
|
||||||
pub fn new(index: &Index, rtxn: &RoTxn) -> milli::Result<Self> {
|
pub fn new(index: &Index, rtxn: &RoTxn) -> milli::Result<Self> {
|
||||||
|
let arroy_stats = index.arroy_stats(rtxn)?;
|
||||||
Ok(IndexStats {
|
Ok(IndexStats {
|
||||||
number_of_documents: index.number_of_documents(rtxn)?,
|
number_of_documents: index.number_of_documents(rtxn)?,
|
||||||
|
number_of_embeddings: Some(arroy_stats.number_of_embeddings),
|
||||||
|
number_of_embedded_documents: Some(arroy_stats.documents.len()),
|
||||||
database_size: index.on_disk_size()?,
|
database_size: index.on_disk_size()?,
|
||||||
used_database_size: index.used_size()?,
|
used_database_size: index.used_size()?,
|
||||||
primary_key: index.primary_key(rtxn)?.map(|s| s.to_string()),
|
primary_key: index.primary_key(rtxn)?.map(|s| s.to_string()),
|
||||||
|
@ -903,7 +903,7 @@ fn create_and_list_index() {
|
|||||||
|
|
||||||
index_scheduler.index("kefir").unwrap();
|
index_scheduler.index("kefir").unwrap();
|
||||||
let list = index_scheduler.get_paginated_indexes_stats(&AuthFilter::default(), 0, 20).unwrap();
|
let list = index_scheduler.get_paginated_indexes_stats(&AuthFilter::default(), 0, 20).unwrap();
|
||||||
snapshot!(json_string!(list, { "[1][0][1].created_at" => "[date]", "[1][0][1].updated_at" => "[date]", "[1][0][1].used_database_size" => "[bytes]", "[1][0][1].database_size" => "[bytes]" }), @r#"
|
snapshot!(json_string!(list, { "[1][0][1].created_at" => "[date]", "[1][0][1].updated_at" => "[date]", "[1][0][1].used_database_size" => "[bytes]", "[1][0][1].database_size" => "[bytes]" }), @r###"
|
||||||
[
|
[
|
||||||
1,
|
1,
|
||||||
[
|
[
|
||||||
@ -912,6 +912,8 @@ fn create_and_list_index() {
|
|||||||
{
|
{
|
||||||
"number_of_documents": 0,
|
"number_of_documents": 0,
|
||||||
"database_size": "[bytes]",
|
"database_size": "[bytes]",
|
||||||
|
"number_of_embeddings": 0,
|
||||||
|
"number_of_embedded_documents": 0,
|
||||||
"used_database_size": "[bytes]",
|
"used_database_size": "[bytes]",
|
||||||
"primary_key": null,
|
"primary_key": null,
|
||||||
"field_distribution": {},
|
"field_distribution": {},
|
||||||
@ -921,5 +923,5 @@ fn create_and_list_index() {
|
|||||||
]
|
]
|
||||||
]
|
]
|
||||||
]
|
]
|
||||||
"#);
|
"###);
|
||||||
}
|
}
|
||||||
|
@ -496,6 +496,12 @@ pub struct IndexStats {
|
|||||||
pub number_of_documents: u64,
|
pub number_of_documents: u64,
|
||||||
/// Whether or not the index is currently ingesting document
|
/// Whether or not the index is currently ingesting document
|
||||||
pub is_indexing: bool,
|
pub is_indexing: bool,
|
||||||
|
/// Number of embeddings in the index
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub number_of_embeddings: Option<u64>,
|
||||||
|
/// Number of embedded documents in the index
|
||||||
|
#[serde(skip_serializing_if = "Option::is_none")]
|
||||||
|
pub number_of_embedded_documents: Option<u64>,
|
||||||
/// Association of every field name with the number of times it occurs in the documents.
|
/// Association of every field name with the number of times it occurs in the documents.
|
||||||
#[schema(value_type = HashMap<String, u64>)]
|
#[schema(value_type = HashMap<String, u64>)]
|
||||||
pub field_distribution: FieldDistribution,
|
pub field_distribution: FieldDistribution,
|
||||||
@ -506,6 +512,8 @@ impl From<index_scheduler::IndexStats> for IndexStats {
|
|||||||
IndexStats {
|
IndexStats {
|
||||||
number_of_documents: stats.inner_stats.number_of_documents,
|
number_of_documents: stats.inner_stats.number_of_documents,
|
||||||
is_indexing: stats.is_indexing,
|
is_indexing: stats.is_indexing,
|
||||||
|
number_of_embeddings: stats.inner_stats.number_of_embeddings,
|
||||||
|
number_of_embedded_documents: stats.inner_stats.number_of_embedded_documents,
|
||||||
field_distribution: stats.inner_stats.field_distribution,
|
field_distribution: stats.inner_stats.field_distribution,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -524,6 +532,8 @@ impl From<index_scheduler::IndexStats> for IndexStats {
|
|||||||
(status = OK, description = "The stats of the index", body = IndexStats, content_type = "application/json", example = json!(
|
(status = OK, description = "The stats of the index", body = IndexStats, content_type = "application/json", example = json!(
|
||||||
{
|
{
|
||||||
"numberOfDocuments": 10,
|
"numberOfDocuments": 10,
|
||||||
|
"numberOfEmbeddings": 10,
|
||||||
|
"numberOfEmbeddedDocuments": 10,
|
||||||
"isIndexing": true,
|
"isIndexing": true,
|
||||||
"fieldDistribution": {
|
"fieldDistribution": {
|
||||||
"genre": 10,
|
"genre": 10,
|
||||||
|
@ -161,6 +161,8 @@ async fn delete_document_by_filter() {
|
|||||||
{
|
{
|
||||||
"numberOfDocuments": 4,
|
"numberOfDocuments": 4,
|
||||||
"isIndexing": false,
|
"isIndexing": false,
|
||||||
|
"numberOfEmbeddings": 0,
|
||||||
|
"numberOfEmbeddedDocuments": 0,
|
||||||
"fieldDistribution": {
|
"fieldDistribution": {
|
||||||
"color": 3,
|
"color": 3,
|
||||||
"id": 4
|
"id": 4
|
||||||
@ -208,6 +210,8 @@ async fn delete_document_by_filter() {
|
|||||||
{
|
{
|
||||||
"numberOfDocuments": 2,
|
"numberOfDocuments": 2,
|
||||||
"isIndexing": false,
|
"isIndexing": false,
|
||||||
|
"numberOfEmbeddings": 0,
|
||||||
|
"numberOfEmbeddedDocuments": 0,
|
||||||
"fieldDistribution": {
|
"fieldDistribution": {
|
||||||
"color": 1,
|
"color": 1,
|
||||||
"id": 2
|
"id": 2
|
||||||
@ -274,6 +278,8 @@ async fn delete_document_by_filter() {
|
|||||||
{
|
{
|
||||||
"numberOfDocuments": 1,
|
"numberOfDocuments": 1,
|
||||||
"isIndexing": false,
|
"isIndexing": false,
|
||||||
|
"numberOfEmbeddings": 0,
|
||||||
|
"numberOfEmbeddedDocuments": 0,
|
||||||
"fieldDistribution": {
|
"fieldDistribution": {
|
||||||
"color": 1,
|
"color": 1,
|
||||||
"id": 1
|
"id": 1
|
||||||
|
@ -27,9 +27,24 @@ async fn import_dump_v1_movie_raw() {
|
|||||||
|
|
||||||
let (stats, code) = index.stats().await;
|
let (stats, code) = index.stats().await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
assert_eq!(
|
snapshot!(
|
||||||
stats,
|
json_string!(stats),
|
||||||
json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }})
|
@r###"
|
||||||
|
{
|
||||||
|
"numberOfDocuments": 53,
|
||||||
|
"isIndexing": false,
|
||||||
|
"numberOfEmbeddings": 0,
|
||||||
|
"numberOfEmbeddedDocuments": 0,
|
||||||
|
"fieldDistribution": {
|
||||||
|
"genres": 53,
|
||||||
|
"id": 53,
|
||||||
|
"overview": 53,
|
||||||
|
"poster": 53,
|
||||||
|
"release_date": 53,
|
||||||
|
"title": 53
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"###
|
||||||
);
|
);
|
||||||
|
|
||||||
let (settings, code) = index.settings().await;
|
let (settings, code) = index.settings().await;
|
||||||
@ -173,6 +188,8 @@ async fn import_dump_v1_movie_with_settings() {
|
|||||||
{
|
{
|
||||||
"numberOfDocuments": 53,
|
"numberOfDocuments": 53,
|
||||||
"isIndexing": false,
|
"isIndexing": false,
|
||||||
|
"numberOfEmbeddings": 0,
|
||||||
|
"numberOfEmbeddedDocuments": 0,
|
||||||
"fieldDistribution": {
|
"fieldDistribution": {
|
||||||
"genres": 53,
|
"genres": 53,
|
||||||
"id": 53,
|
"id": 53,
|
||||||
@ -333,9 +350,24 @@ async fn import_dump_v1_rubygems_with_settings() {
|
|||||||
|
|
||||||
let (stats, code) = index.stats().await;
|
let (stats, code) = index.stats().await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
assert_eq!(
|
snapshot!(
|
||||||
stats,
|
json_string!(stats),
|
||||||
json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"description": 53, "id": 53, "name": 53, "summary": 53, "total_downloads": 53, "version": 53 }})
|
@r###"
|
||||||
|
{
|
||||||
|
"numberOfDocuments": 53,
|
||||||
|
"isIndexing": false,
|
||||||
|
"numberOfEmbeddings": 0,
|
||||||
|
"numberOfEmbeddedDocuments": 0,
|
||||||
|
"fieldDistribution": {
|
||||||
|
"description": 53,
|
||||||
|
"id": 53,
|
||||||
|
"name": 53,
|
||||||
|
"summary": 53,
|
||||||
|
"total_downloads": 53,
|
||||||
|
"version": 53
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"###
|
||||||
);
|
);
|
||||||
|
|
||||||
let (settings, code) = index.settings().await;
|
let (settings, code) = index.settings().await;
|
||||||
@ -483,9 +515,24 @@ async fn import_dump_v2_movie_raw() {
|
|||||||
|
|
||||||
let (stats, code) = index.stats().await;
|
let (stats, code) = index.stats().await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
assert_eq!(
|
snapshot!(
|
||||||
stats,
|
json_string!(stats),
|
||||||
json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }})
|
@r###"
|
||||||
|
{
|
||||||
|
"numberOfDocuments": 53,
|
||||||
|
"isIndexing": false,
|
||||||
|
"numberOfEmbeddings": 0,
|
||||||
|
"numberOfEmbeddedDocuments": 0,
|
||||||
|
"fieldDistribution": {
|
||||||
|
"genres": 53,
|
||||||
|
"id": 53,
|
||||||
|
"overview": 53,
|
||||||
|
"poster": 53,
|
||||||
|
"release_date": 53,
|
||||||
|
"title": 53
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"###
|
||||||
);
|
);
|
||||||
|
|
||||||
let (settings, code) = index.settings().await;
|
let (settings, code) = index.settings().await;
|
||||||
@ -623,9 +670,24 @@ async fn import_dump_v2_movie_with_settings() {
|
|||||||
|
|
||||||
let (stats, code) = index.stats().await;
|
let (stats, code) = index.stats().await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
assert_eq!(
|
snapshot!(
|
||||||
stats,
|
json_string!(stats),
|
||||||
json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }})
|
@r###"
|
||||||
|
{
|
||||||
|
"numberOfDocuments": 53,
|
||||||
|
"isIndexing": false,
|
||||||
|
"numberOfEmbeddings": 0,
|
||||||
|
"numberOfEmbeddedDocuments": 0,
|
||||||
|
"fieldDistribution": {
|
||||||
|
"genres": 53,
|
||||||
|
"id": 53,
|
||||||
|
"overview": 53,
|
||||||
|
"poster": 53,
|
||||||
|
"release_date": 53,
|
||||||
|
"title": 53
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"###
|
||||||
);
|
);
|
||||||
|
|
||||||
let (settings, code) = index.settings().await;
|
let (settings, code) = index.settings().await;
|
||||||
@ -773,9 +835,24 @@ async fn import_dump_v2_rubygems_with_settings() {
|
|||||||
|
|
||||||
let (stats, code) = index.stats().await;
|
let (stats, code) = index.stats().await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
assert_eq!(
|
snapshot!(
|
||||||
stats,
|
json_string!(stats),
|
||||||
json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"description": 53, "id": 53, "name": 53, "summary": 53, "total_downloads": 53, "version": 53 }})
|
@r###"
|
||||||
|
{
|
||||||
|
"numberOfDocuments": 53,
|
||||||
|
"isIndexing": false,
|
||||||
|
"numberOfEmbeddings": 0,
|
||||||
|
"numberOfEmbeddedDocuments": 0,
|
||||||
|
"fieldDistribution": {
|
||||||
|
"description": 53,
|
||||||
|
"id": 53,
|
||||||
|
"name": 53,
|
||||||
|
"summary": 53,
|
||||||
|
"total_downloads": 53,
|
||||||
|
"version": 53
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"###
|
||||||
);
|
);
|
||||||
|
|
||||||
let (settings, code) = index.settings().await;
|
let (settings, code) = index.settings().await;
|
||||||
@ -920,9 +997,24 @@ async fn import_dump_v3_movie_raw() {
|
|||||||
|
|
||||||
let (stats, code) = index.stats().await;
|
let (stats, code) = index.stats().await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
assert_eq!(
|
snapshot!(
|
||||||
stats,
|
json_string!(stats),
|
||||||
json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }})
|
@r###"
|
||||||
|
{
|
||||||
|
"numberOfDocuments": 53,
|
||||||
|
"isIndexing": false,
|
||||||
|
"numberOfEmbeddings": 0,
|
||||||
|
"numberOfEmbeddedDocuments": 0,
|
||||||
|
"fieldDistribution": {
|
||||||
|
"genres": 53,
|
||||||
|
"id": 53,
|
||||||
|
"overview": 53,
|
||||||
|
"poster": 53,
|
||||||
|
"release_date": 53,
|
||||||
|
"title": 53
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"###
|
||||||
);
|
);
|
||||||
|
|
||||||
let (settings, code) = index.settings().await;
|
let (settings, code) = index.settings().await;
|
||||||
@ -1060,9 +1152,24 @@ async fn import_dump_v3_movie_with_settings() {
|
|||||||
|
|
||||||
let (stats, code) = index.stats().await;
|
let (stats, code) = index.stats().await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
assert_eq!(
|
snapshot!(
|
||||||
stats,
|
json_string!(stats),
|
||||||
json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }})
|
@r###"
|
||||||
|
{
|
||||||
|
"numberOfDocuments": 53,
|
||||||
|
"isIndexing": false,
|
||||||
|
"numberOfEmbeddings": 0,
|
||||||
|
"numberOfEmbeddedDocuments": 0,
|
||||||
|
"fieldDistribution": {
|
||||||
|
"genres": 53,
|
||||||
|
"id": 53,
|
||||||
|
"overview": 53,
|
||||||
|
"poster": 53,
|
||||||
|
"release_date": 53,
|
||||||
|
"title": 53
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"###
|
||||||
);
|
);
|
||||||
|
|
||||||
let (settings, code) = index.settings().await;
|
let (settings, code) = index.settings().await;
|
||||||
@ -1210,9 +1317,24 @@ async fn import_dump_v3_rubygems_with_settings() {
|
|||||||
|
|
||||||
let (stats, code) = index.stats().await;
|
let (stats, code) = index.stats().await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
assert_eq!(
|
snapshot!(
|
||||||
stats,
|
json_string!(stats),
|
||||||
json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"description": 53, "id": 53, "name": 53, "summary": 53, "total_downloads": 53, "version": 53 }})
|
@r###"
|
||||||
|
{
|
||||||
|
"numberOfDocuments": 53,
|
||||||
|
"isIndexing": false,
|
||||||
|
"numberOfEmbeddings": 0,
|
||||||
|
"numberOfEmbeddedDocuments": 0,
|
||||||
|
"fieldDistribution": {
|
||||||
|
"description": 53,
|
||||||
|
"id": 53,
|
||||||
|
"name": 53,
|
||||||
|
"summary": 53,
|
||||||
|
"total_downloads": 53,
|
||||||
|
"version": 53
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"###
|
||||||
);
|
);
|
||||||
|
|
||||||
let (settings, code) = index.settings().await;
|
let (settings, code) = index.settings().await;
|
||||||
@ -1357,9 +1479,24 @@ async fn import_dump_v4_movie_raw() {
|
|||||||
|
|
||||||
let (stats, code) = index.stats().await;
|
let (stats, code) = index.stats().await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
assert_eq!(
|
snapshot!(
|
||||||
stats,
|
json_string!(stats),
|
||||||
json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }})
|
@r###"
|
||||||
|
{
|
||||||
|
"numberOfDocuments": 53,
|
||||||
|
"isIndexing": false,
|
||||||
|
"numberOfEmbeddings": 0,
|
||||||
|
"numberOfEmbeddedDocuments": 0,
|
||||||
|
"fieldDistribution": {
|
||||||
|
"genres": 53,
|
||||||
|
"id": 53,
|
||||||
|
"overview": 53,
|
||||||
|
"poster": 53,
|
||||||
|
"release_date": 53,
|
||||||
|
"title": 53
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"###
|
||||||
);
|
);
|
||||||
|
|
||||||
let (settings, code) = index.settings().await;
|
let (settings, code) = index.settings().await;
|
||||||
@ -1497,9 +1634,24 @@ async fn import_dump_v4_movie_with_settings() {
|
|||||||
|
|
||||||
let (stats, code) = index.stats().await;
|
let (stats, code) = index.stats().await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
assert_eq!(
|
snapshot!(
|
||||||
stats,
|
json_string!(stats),
|
||||||
json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"genres": 53, "id": 53, "overview": 53, "poster": 53, "release_date": 53, "title": 53 }})
|
@r###"
|
||||||
|
{
|
||||||
|
"numberOfDocuments": 53,
|
||||||
|
"isIndexing": false,
|
||||||
|
"numberOfEmbeddings": 0,
|
||||||
|
"numberOfEmbeddedDocuments": 0,
|
||||||
|
"fieldDistribution": {
|
||||||
|
"genres": 53,
|
||||||
|
"id": 53,
|
||||||
|
"overview": 53,
|
||||||
|
"poster": 53,
|
||||||
|
"release_date": 53,
|
||||||
|
"title": 53
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"###
|
||||||
);
|
);
|
||||||
|
|
||||||
let (settings, code) = index.settings().await;
|
let (settings, code) = index.settings().await;
|
||||||
@ -1647,9 +1799,24 @@ async fn import_dump_v4_rubygems_with_settings() {
|
|||||||
|
|
||||||
let (stats, code) = index.stats().await;
|
let (stats, code) = index.stats().await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
assert_eq!(
|
snapshot!(
|
||||||
stats,
|
json_string!(stats),
|
||||||
json!({ "numberOfDocuments": 53, "isIndexing": false, "fieldDistribution": {"description": 53, "id": 53, "name": 53, "summary": 53, "total_downloads": 53, "version": 53 }})
|
@r###"
|
||||||
|
{
|
||||||
|
"numberOfDocuments": 53,
|
||||||
|
"isIndexing": false,
|
||||||
|
"numberOfEmbeddings": 0,
|
||||||
|
"numberOfEmbeddedDocuments": 0,
|
||||||
|
"fieldDistribution": {
|
||||||
|
"description": 53,
|
||||||
|
"id": 53,
|
||||||
|
"name": 53,
|
||||||
|
"summary": 53,
|
||||||
|
"total_downloads": 53,
|
||||||
|
"version": 53
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"###
|
||||||
);
|
);
|
||||||
|
|
||||||
let (settings, code) = index.settings().await;
|
let (settings, code) = index.settings().await;
|
||||||
@ -1798,33 +1965,35 @@ async fn import_dump_v5() {
|
|||||||
server.wait_task(task["uid"].as_u64().unwrap()).await;
|
server.wait_task(task["uid"].as_u64().unwrap()).await;
|
||||||
}
|
}
|
||||||
|
|
||||||
let expected_stats = json!({
|
|
||||||
"numberOfDocuments": 10,
|
|
||||||
"isIndexing": false,
|
|
||||||
"fieldDistribution": {
|
|
||||||
"cast": 10,
|
|
||||||
"director": 10,
|
|
||||||
"genres": 10,
|
|
||||||
"id": 10,
|
|
||||||
"overview": 10,
|
|
||||||
"popularity": 10,
|
|
||||||
"poster_path": 10,
|
|
||||||
"producer": 10,
|
|
||||||
"production_companies": 10,
|
|
||||||
"release_date": 10,
|
|
||||||
"tagline": 10,
|
|
||||||
"title": 10,
|
|
||||||
"vote_average": 10,
|
|
||||||
"vote_count": 10
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
let index1 = server.index("test");
|
let index1 = server.index("test");
|
||||||
let index2 = server.index("test2");
|
let index2 = server.index("test2");
|
||||||
|
|
||||||
let (stats, code) = index1.stats().await;
|
let (stats, code) = index1.stats().await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
assert_eq!(stats, expected_stats);
|
snapshot!(json_string!(stats), @r###"
|
||||||
|
{
|
||||||
|
"numberOfDocuments": 10,
|
||||||
|
"isIndexing": false,
|
||||||
|
"numberOfEmbeddings": 0,
|
||||||
|
"numberOfEmbeddedDocuments": 0,
|
||||||
|
"fieldDistribution": {
|
||||||
|
"cast": 10,
|
||||||
|
"director": 10,
|
||||||
|
"genres": 10,
|
||||||
|
"id": 10,
|
||||||
|
"overview": 10,
|
||||||
|
"popularity": 10,
|
||||||
|
"poster_path": 10,
|
||||||
|
"producer": 10,
|
||||||
|
"production_companies": 10,
|
||||||
|
"release_date": 10,
|
||||||
|
"tagline": 10,
|
||||||
|
"title": 10,
|
||||||
|
"vote_average": 10,
|
||||||
|
"vote_count": 10
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
let (docs, code) = index2.get_all_documents(GetAllDocumentsOptions::default()).await;
|
let (docs, code) = index2.get_all_documents(GetAllDocumentsOptions::default()).await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
@ -1835,7 +2004,32 @@ async fn import_dump_v5() {
|
|||||||
|
|
||||||
let (stats, code) = index2.stats().await;
|
let (stats, code) = index2.stats().await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
assert_eq!(stats, expected_stats);
|
snapshot!(
|
||||||
|
json_string!(stats),
|
||||||
|
@r###"
|
||||||
|
{
|
||||||
|
"numberOfDocuments": 10,
|
||||||
|
"isIndexing": false,
|
||||||
|
"numberOfEmbeddings": 0,
|
||||||
|
"numberOfEmbeddedDocuments": 0,
|
||||||
|
"fieldDistribution": {
|
||||||
|
"cast": 10,
|
||||||
|
"director": 10,
|
||||||
|
"genres": 10,
|
||||||
|
"id": 10,
|
||||||
|
"overview": 10,
|
||||||
|
"popularity": 10,
|
||||||
|
"poster_path": 10,
|
||||||
|
"producer": 10,
|
||||||
|
"production_companies": 10,
|
||||||
|
"release_date": 10,
|
||||||
|
"tagline": 10,
|
||||||
|
"title": 10,
|
||||||
|
"vote_average": 10,
|
||||||
|
"vote_count": 10
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
let (keys, code) = server.list_api_keys("").await;
|
let (keys, code) = server.list_api_keys("").await;
|
||||||
snapshot!(code, @"200 OK");
|
snapshot!(code, @"200 OK");
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
use meili_snap::{json_string, snapshot};
|
||||||
use time::format_description::well_known::Rfc3339;
|
use time::format_description::well_known::Rfc3339;
|
||||||
use time::OffsetDateTime;
|
use time::OffsetDateTime;
|
||||||
|
|
||||||
@ -74,3 +75,253 @@ async fn stats() {
|
|||||||
assert_eq!(response["indexes"]["test"]["fieldDistribution"]["name"], 1);
|
assert_eq!(response["indexes"]["test"]["fieldDistribution"]["name"], 1);
|
||||||
assert_eq!(response["indexes"]["test"]["fieldDistribution"]["age"], 1);
|
assert_eq!(response["indexes"]["test"]["fieldDistribution"]["age"], 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn add_remove_embeddings() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("doggo");
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"manual": {
|
||||||
|
"source": "userProvided",
|
||||||
|
"dimensions": 3,
|
||||||
|
},
|
||||||
|
"handcrafted": {
|
||||||
|
"source": "userProvided",
|
||||||
|
"dimensions": 3,
|
||||||
|
},
|
||||||
|
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
server.wait_task(response.uid()).await.succeeded();
|
||||||
|
|
||||||
|
// 2 embedded documents for 5 embeddings in total
|
||||||
|
let documents = json!([
|
||||||
|
{"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0], "handcrafted": [0, 0, 0] }},
|
||||||
|
{"id": 1, "name": "echo", "_vectors": { "manual": [1, 1, 1], "handcrafted": [[1, 1, 1], [2, 2, 2]] }},
|
||||||
|
]);
|
||||||
|
|
||||||
|
let (response, code) = index.add_documents(documents, None).await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
index.wait_task(response.uid()).await.succeeded();
|
||||||
|
|
||||||
|
let (stats, _code) = index.stats().await;
|
||||||
|
snapshot!(json_string!(stats), @r###"
|
||||||
|
{
|
||||||
|
"numberOfDocuments": 2,
|
||||||
|
"isIndexing": false,
|
||||||
|
"numberOfEmbeddings": 5,
|
||||||
|
"numberOfEmbeddedDocuments": 2,
|
||||||
|
"fieldDistribution": {
|
||||||
|
"id": 2,
|
||||||
|
"name": 2
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
// 2 embedded documents for 3 embeddings in total
|
||||||
|
let documents = json!([
|
||||||
|
{"id": 1, "name": "echo", "_vectors": { "manual": [1, 1, 1], "handcrafted": null }},
|
||||||
|
]);
|
||||||
|
|
||||||
|
let (response, code) = index.update_documents(documents, None).await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
index.wait_task(response.uid()).await.succeeded();
|
||||||
|
|
||||||
|
let (stats, _code) = index.stats().await;
|
||||||
|
snapshot!(json_string!(stats), @r###"
|
||||||
|
{
|
||||||
|
"numberOfDocuments": 2,
|
||||||
|
"isIndexing": false,
|
||||||
|
"numberOfEmbeddings": 3,
|
||||||
|
"numberOfEmbeddedDocuments": 2,
|
||||||
|
"fieldDistribution": {
|
||||||
|
"id": 2,
|
||||||
|
"name": 2
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
// 2 embedded documents for 2 embeddings in total
|
||||||
|
let documents = json!([
|
||||||
|
{"id": 0, "name": "kefir", "_vectors": { "manual": null, "handcrafted": [0, 0, 0] }},
|
||||||
|
]);
|
||||||
|
|
||||||
|
let (response, code) = index.update_documents(documents, None).await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
index.wait_task(response.uid()).await.succeeded();
|
||||||
|
|
||||||
|
let (stats, _code) = index.stats().await;
|
||||||
|
snapshot!(json_string!(stats), @r###"
|
||||||
|
{
|
||||||
|
"numberOfDocuments": 2,
|
||||||
|
"isIndexing": false,
|
||||||
|
"numberOfEmbeddings": 2,
|
||||||
|
"numberOfEmbeddedDocuments": 2,
|
||||||
|
"fieldDistribution": {
|
||||||
|
"id": 2,
|
||||||
|
"name": 2
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
// 1 embedded documents for 2 embeddings in total
|
||||||
|
let documents = json!([
|
||||||
|
{"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0], "handcrafted": [0, 0, 0] }},
|
||||||
|
{"id": 1, "name": "echo", "_vectors": { "manual": null, "handcrafted": null }},
|
||||||
|
]);
|
||||||
|
|
||||||
|
let (response, code) = index.update_documents(documents, None).await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
index.wait_task(response.uid()).await.succeeded();
|
||||||
|
|
||||||
|
let (stats, _code) = index.stats().await;
|
||||||
|
snapshot!(json_string!(stats), @r###"
|
||||||
|
{
|
||||||
|
"numberOfDocuments": 2,
|
||||||
|
"isIndexing": false,
|
||||||
|
"numberOfEmbeddings": 2,
|
||||||
|
"numberOfEmbeddedDocuments": 1,
|
||||||
|
"fieldDistribution": {
|
||||||
|
"id": 2,
|
||||||
|
"name": 2
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn add_remove_embedded_documents() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("doggo");
|
||||||
|
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"manual": {
|
||||||
|
"source": "userProvided",
|
||||||
|
"dimensions": 3,
|
||||||
|
},
|
||||||
|
"handcrafted": {
|
||||||
|
"source": "userProvided",
|
||||||
|
"dimensions": 3,
|
||||||
|
},
|
||||||
|
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
server.wait_task(response.uid()).await.succeeded();
|
||||||
|
|
||||||
|
// 2 embedded documents for 5 embeddings in total
|
||||||
|
let documents = json!([
|
||||||
|
{"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0], "handcrafted": [0, 0, 0] }},
|
||||||
|
{"id": 1, "name": "echo", "_vectors": { "manual": [1, 1, 1], "handcrafted": [[1, 1, 1], [2, 2, 2]] }},
|
||||||
|
]);
|
||||||
|
|
||||||
|
let (response, code) = index.add_documents(documents, None).await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
index.wait_task(response.uid()).await.succeeded();
|
||||||
|
|
||||||
|
let (stats, _code) = index.stats().await;
|
||||||
|
snapshot!(json_string!(stats), @r###"
|
||||||
|
{
|
||||||
|
"numberOfDocuments": 2,
|
||||||
|
"isIndexing": false,
|
||||||
|
"numberOfEmbeddings": 5,
|
||||||
|
"numberOfEmbeddedDocuments": 2,
|
||||||
|
"fieldDistribution": {
|
||||||
|
"id": 2,
|
||||||
|
"name": 2
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
// delete one embedded document, remaining 1 embedded documents for 3 embeddings in total
|
||||||
|
let (response, code) = index.delete_document(0).await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
index.wait_task(response.uid()).await.succeeded();
|
||||||
|
|
||||||
|
let (stats, _code) = index.stats().await;
|
||||||
|
snapshot!(json_string!(stats), @r###"
|
||||||
|
{
|
||||||
|
"numberOfDocuments": 1,
|
||||||
|
"isIndexing": false,
|
||||||
|
"numberOfEmbeddings": 3,
|
||||||
|
"numberOfEmbeddedDocuments": 1,
|
||||||
|
"fieldDistribution": {
|
||||||
|
"id": 1,
|
||||||
|
"name": 1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[actix_rt::test]
|
||||||
|
async fn update_embedder_settings() {
|
||||||
|
let server = Server::new().await;
|
||||||
|
let index = server.index("doggo");
|
||||||
|
|
||||||
|
// 2 embedded documents for 3 embeddings in total
|
||||||
|
// but no embedders are added in the settings yet so we expect 0 embedded documents for 0 embeddings in total
|
||||||
|
let documents = json!([
|
||||||
|
{"id": 0, "name": "kefir", "_vectors": { "manual": [0, 0, 0], "handcrafted": [0, 0, 0] }},
|
||||||
|
{"id": 1, "name": "echo", "_vectors": { "manual": [1, 1, 1], "handcrafted": null }},
|
||||||
|
]);
|
||||||
|
|
||||||
|
let (response, code) = index.add_documents(documents, None).await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
index.wait_task(response.uid()).await.succeeded();
|
||||||
|
|
||||||
|
let (stats, _code) = index.stats().await;
|
||||||
|
snapshot!(json_string!(stats), @r###"
|
||||||
|
{
|
||||||
|
"numberOfDocuments": 2,
|
||||||
|
"isIndexing": false,
|
||||||
|
"numberOfEmbeddings": 0,
|
||||||
|
"numberOfEmbeddedDocuments": 0,
|
||||||
|
"fieldDistribution": {
|
||||||
|
"id": 2,
|
||||||
|
"name": 2
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
// add embedders to the settings
|
||||||
|
// 2 embedded documents for 3 embeddings in total
|
||||||
|
let (response, code) = index
|
||||||
|
.update_settings(json!({
|
||||||
|
"embedders": {
|
||||||
|
"manual": {
|
||||||
|
"source": "userProvided",
|
||||||
|
"dimensions": 3,
|
||||||
|
},
|
||||||
|
"handcrafted": {
|
||||||
|
"source": "userProvided",
|
||||||
|
"dimensions": 3,
|
||||||
|
},
|
||||||
|
|
||||||
|
},
|
||||||
|
}))
|
||||||
|
.await;
|
||||||
|
snapshot!(code, @"202 Accepted");
|
||||||
|
server.wait_task(response.uid()).await.succeeded();
|
||||||
|
|
||||||
|
let (stats, _code) = index.stats().await;
|
||||||
|
snapshot!(json_string!(stats), @r###"
|
||||||
|
{
|
||||||
|
"numberOfDocuments": 2,
|
||||||
|
"isIndexing": false,
|
||||||
|
"numberOfEmbeddings": 3,
|
||||||
|
"numberOfEmbeddedDocuments": 2,
|
||||||
|
"fieldDistribution": {
|
||||||
|
"id": 2,
|
||||||
|
"name": 2
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
@ -135,6 +135,8 @@ async fn check_the_index_scheduler(server: &Server) {
|
|||||||
"kefir": {
|
"kefir": {
|
||||||
"numberOfDocuments": 1,
|
"numberOfDocuments": 1,
|
||||||
"isIndexing": false,
|
"isIndexing": false,
|
||||||
|
"numberOfEmbeddings": 0,
|
||||||
|
"numberOfEmbeddedDocuments": 0,
|
||||||
"fieldDistribution": {
|
"fieldDistribution": {
|
||||||
"age": 1,
|
"age": 1,
|
||||||
"description": 1,
|
"description": 1,
|
||||||
@ -215,6 +217,8 @@ async fn check_the_index_scheduler(server: &Server) {
|
|||||||
"kefir": {
|
"kefir": {
|
||||||
"numberOfDocuments": 1,
|
"numberOfDocuments": 1,
|
||||||
"isIndexing": false,
|
"isIndexing": false,
|
||||||
|
"numberOfEmbeddings": 0,
|
||||||
|
"numberOfEmbeddedDocuments": 0,
|
||||||
"fieldDistribution": {
|
"fieldDistribution": {
|
||||||
"age": 1,
|
"age": 1,
|
||||||
"description": 1,
|
"description": 1,
|
||||||
@ -228,10 +232,12 @@ async fn check_the_index_scheduler(server: &Server) {
|
|||||||
"###);
|
"###);
|
||||||
let index = server.index("kefir");
|
let index = server.index("kefir");
|
||||||
let (stats, _) = index.stats().await;
|
let (stats, _) = index.stats().await;
|
||||||
snapshot!(stats, @r#"
|
snapshot!(stats, @r###"
|
||||||
{
|
{
|
||||||
"numberOfDocuments": 1,
|
"numberOfDocuments": 1,
|
||||||
"isIndexing": false,
|
"isIndexing": false,
|
||||||
|
"numberOfEmbeddings": 0,
|
||||||
|
"numberOfEmbeddedDocuments": 0,
|
||||||
"fieldDistribution": {
|
"fieldDistribution": {
|
||||||
"age": 1,
|
"age": 1,
|
||||||
"description": 1,
|
"description": 1,
|
||||||
@ -240,7 +246,7 @@ async fn check_the_index_scheduler(server: &Server) {
|
|||||||
"surname": 1
|
"surname": 1
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
"#);
|
"###);
|
||||||
|
|
||||||
// Delete all the tasks of a specific batch
|
// Delete all the tasks of a specific batch
|
||||||
let (task, _) = server.delete_tasks("batchUids=10").await;
|
let (task, _) = server.delete_tasks("batchUids=10").await;
|
||||||
|
@ -22,7 +22,7 @@ use crate::heed_codec::version::VersionCodec;
|
|||||||
use crate::heed_codec::{BEU16StrCodec, FstSetCodec, StrBEU16Codec, StrRefCodec};
|
use crate::heed_codec::{BEU16StrCodec, FstSetCodec, StrBEU16Codec, StrRefCodec};
|
||||||
use crate::order_by_map::OrderByMap;
|
use crate::order_by_map::OrderByMap;
|
||||||
use crate::proximity::ProximityPrecision;
|
use crate::proximity::ProximityPrecision;
|
||||||
use crate::vector::{ArroyWrapper, Embedding, EmbeddingConfig};
|
use crate::vector::{ArroyStats, ArroyWrapper, Embedding, EmbeddingConfig};
|
||||||
use crate::{
|
use crate::{
|
||||||
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
|
default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
|
||||||
FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec,
|
FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec,
|
||||||
@ -1731,6 +1731,18 @@ impl Index {
|
|||||||
let compute_prefixes = self.prefix_search(rtxn)?.unwrap_or_default();
|
let compute_prefixes = self.prefix_search(rtxn)?.unwrap_or_default();
|
||||||
Ok(PrefixSettings { compute_prefixes, max_prefix_length: 4, prefix_count_threshold: 100 })
|
Ok(PrefixSettings { compute_prefixes, max_prefix_length: 4, prefix_count_threshold: 100 })
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn arroy_stats(&self, rtxn: &RoTxn<'_>) -> Result<ArroyStats> {
|
||||||
|
let mut stats = ArroyStats::default();
|
||||||
|
let embedding_configs = self.embedding_configs(rtxn)?;
|
||||||
|
for config in embedding_configs {
|
||||||
|
let embedder_id = self.embedder_category_id.get(rtxn, &config.name)?.unwrap();
|
||||||
|
let reader =
|
||||||
|
ArroyWrapper::new(self.vector_arroy, embedder_id, config.config.quantized());
|
||||||
|
reader.aggregate_stats(rtxn, &mut stats)?;
|
||||||
|
}
|
||||||
|
Ok(stats)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Deserialize, Serialize)]
|
#[derive(Debug, Deserialize, Serialize)]
|
||||||
|
@ -1,7 +1,9 @@
|
|||||||
mod v1_12;
|
mod v1_12;
|
||||||
|
mod v1_13;
|
||||||
|
|
||||||
use heed::RwTxn;
|
use heed::RwTxn;
|
||||||
use v1_12::{V1_12_3_To_Current, V1_12_To_V1_12_3};
|
use v1_12::{V1_12_3_To_V1_13_0, V1_12_To_V1_12_3};
|
||||||
|
use v1_13::V1_13_0_To_Current;
|
||||||
|
|
||||||
use crate::progress::{Progress, VariableNameStep};
|
use crate::progress::{Progress, VariableNameStep};
|
||||||
use crate::{Index, InternalError, Result};
|
use crate::{Index, InternalError, Result};
|
||||||
@ -26,11 +28,13 @@ pub fn upgrade(
|
|||||||
progress: Progress,
|
progress: Progress,
|
||||||
) -> Result<bool> {
|
) -> Result<bool> {
|
||||||
let from = index.get_version(wtxn)?.unwrap_or(db_version);
|
let from = index.get_version(wtxn)?.unwrap_or(db_version);
|
||||||
let upgrade_functions: &[&dyn UpgradeIndex] = &[&V1_12_To_V1_12_3 {}, &V1_12_3_To_Current()];
|
let upgrade_functions: &[&dyn UpgradeIndex] =
|
||||||
|
&[&V1_12_To_V1_12_3 {}, &V1_12_3_To_V1_13_0 {}, &V1_13_0_To_Current()];
|
||||||
|
|
||||||
let start = match from {
|
let start = match from {
|
||||||
(1, 12, 0..=2) => 0,
|
(1, 12, 0..=2) => 0,
|
||||||
(1, 12, 3..) => 1,
|
(1, 12, 3..) => 1,
|
||||||
|
(1, 13, 0) => 2,
|
||||||
// We must handle the current version in the match because in case of a failure some index may have been upgraded but not other.
|
// We must handle the current version in the match because in case of a failure some index may have been upgraded but not other.
|
||||||
(1, 13, _) => return Ok(false),
|
(1, 13, _) => return Ok(false),
|
||||||
(major, minor, patch) => {
|
(major, minor, patch) => {
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
use heed::RwTxn;
|
use heed::RwTxn;
|
||||||
|
|
||||||
use super::UpgradeIndex;
|
use super::UpgradeIndex;
|
||||||
use crate::constants::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
|
|
||||||
use crate::progress::Progress;
|
use crate::progress::Progress;
|
||||||
use crate::{make_enum_progress, Index, Result};
|
use crate::{make_enum_progress, Index, Result};
|
||||||
|
|
||||||
@ -32,9 +31,9 @@ impl UpgradeIndex for V1_12_To_V1_12_3 {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[allow(non_camel_case_types)]
|
#[allow(non_camel_case_types)]
|
||||||
pub(super) struct V1_12_3_To_Current();
|
pub(super) struct V1_12_3_To_V1_13_0 {}
|
||||||
|
|
||||||
impl UpgradeIndex for V1_12_3_To_Current {
|
impl UpgradeIndex for V1_12_3_To_V1_13_0 {
|
||||||
fn upgrade(
|
fn upgrade(
|
||||||
&self,
|
&self,
|
||||||
_wtxn: &mut RwTxn,
|
_wtxn: &mut RwTxn,
|
||||||
@ -42,14 +41,11 @@ impl UpgradeIndex for V1_12_3_To_Current {
|
|||||||
_original: (u32, u32, u32),
|
_original: (u32, u32, u32),
|
||||||
_progress: Progress,
|
_progress: Progress,
|
||||||
) -> Result<bool> {
|
) -> Result<bool> {
|
||||||
Ok(false)
|
// recompute the indexes stats
|
||||||
|
Ok(true)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn target_version(&self) -> (u32, u32, u32) {
|
fn target_version(&self) -> (u32, u32, u32) {
|
||||||
(
|
(1, 13, 0)
|
||||||
VERSION_MAJOR.parse().unwrap(),
|
|
||||||
VERSION_MINOR.parse().unwrap(),
|
|
||||||
VERSION_PATCH.parse().unwrap(),
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
29
crates/milli/src/update/upgrade/v1_13.rs
Normal file
29
crates/milli/src/update/upgrade/v1_13.rs
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
use heed::RwTxn;
|
||||||
|
|
||||||
|
use super::UpgradeIndex;
|
||||||
|
use crate::constants::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
|
||||||
|
use crate::progress::Progress;
|
||||||
|
use crate::{Index, Result};
|
||||||
|
|
||||||
|
#[allow(non_camel_case_types)]
|
||||||
|
pub(super) struct V1_13_0_To_Current();
|
||||||
|
|
||||||
|
impl UpgradeIndex for V1_13_0_To_Current {
|
||||||
|
fn upgrade(
|
||||||
|
&self,
|
||||||
|
_wtxn: &mut RwTxn,
|
||||||
|
_index: &Index,
|
||||||
|
_original: (u32, u32, u32),
|
||||||
|
_progress: Progress,
|
||||||
|
) -> Result<bool> {
|
||||||
|
Ok(false)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn target_version(&self) -> (u32, u32, u32) {
|
||||||
|
(
|
||||||
|
VERSION_MAJOR.parse().unwrap(),
|
||||||
|
VERSION_MINOR.parse().unwrap(),
|
||||||
|
VERSION_PATCH.parse().unwrap(),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
@ -410,8 +410,43 @@ impl ArroyWrapper {
|
|||||||
fn quantized_db(&self) -> arroy::Database<BinaryQuantizedCosine> {
|
fn quantized_db(&self) -> arroy::Database<BinaryQuantizedCosine> {
|
||||||
self.database.remap_data_type()
|
self.database.remap_data_type()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn aggregate_stats(
|
||||||
|
&self,
|
||||||
|
rtxn: &RoTxn,
|
||||||
|
stats: &mut ArroyStats,
|
||||||
|
) -> Result<(), arroy::Error> {
|
||||||
|
if self.quantized {
|
||||||
|
for reader in self.readers(rtxn, self.quantized_db()) {
|
||||||
|
let reader = reader?;
|
||||||
|
let documents = reader.item_ids();
|
||||||
|
if documents.is_empty() {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
stats.documents |= documents;
|
||||||
|
stats.number_of_embeddings += documents.len();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
for reader in self.readers(rtxn, self.angular_db()) {
|
||||||
|
let reader = reader?;
|
||||||
|
let documents = reader.item_ids();
|
||||||
|
if documents.is_empty() {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
stats.documents |= documents;
|
||||||
|
stats.number_of_embeddings += documents.len();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Default, Clone)]
|
||||||
|
pub struct ArroyStats {
|
||||||
|
pub number_of_embeddings: u64,
|
||||||
|
pub documents: RoaringBitmap,
|
||||||
|
}
|
||||||
/// One or multiple embeddings stored consecutively in a flat vector.
|
/// One or multiple embeddings stored consecutively in a flat vector.
|
||||||
pub struct Embeddings<F> {
|
pub struct Embeddings<F> {
|
||||||
data: Vec<F>,
|
data: Vec<F>,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user