From 80caa8b60d71b5e9530d41f8b5a2d1912037159a Mon Sep 17 00:00:00 2001 From: Quentin de Quelen Date: Tue, 17 Sep 2019 15:41:20 +0200 Subject: [PATCH] feat: add the documents fields repartition into stats --- meilidb-data/Cargo.toml | 1 + .../src/database/index/documents_index.rs | 34 +++++++++- meilidb-data/src/database/index/mod.rs | 7 +- meilidb-data/tests/updates.rs | 66 +++++++++++++++++++ 4 files changed, 105 insertions(+), 3 deletions(-) diff --git a/meilidb-data/Cargo.toml b/meilidb-data/Cargo.toml index 6535b56ae..d08f3664d 100644 --- a/meilidb-data/Cargo.toml +++ b/meilidb-data/Cargo.toml @@ -37,3 +37,4 @@ branch = "arc-byte-slice" [dev-dependencies] tempfile = "3.1.0" +maplit = "1.0.2" diff --git a/meilidb-data/src/database/index/documents_index.rs b/meilidb-data/src/database/index/documents_index.rs index b209f1ed0..bde8531c7 100644 --- a/meilidb-data/src/database/index/documents_index.rs +++ b/meilidb-data/src/database/index/documents_index.rs @@ -1,7 +1,8 @@ use std::convert::TryInto; +use std::collections::HashMap; use meilidb_core::DocumentId; -use meilidb_schema::SchemaAttr; +use meilidb_schema::{Schema, SchemaAttr}; use rocksdb::DBVector; use crate::document_attr_key::DocumentAttrKey; @@ -54,6 +55,20 @@ impl DocumentsIndex { Ok(DocumentFieldsIter(iter)) } + pub fn documents_fields_repartition(&self, schema: Schema) -> RocksDbResult> { + let iter = self.0.iter()?; + let mut repartition_attributes_id = HashMap::new(); + for key in DocumentsKeysIter(iter) { + let counter = repartition_attributes_id.entry(key.attribute).or_insert(0); + *counter += 1u64; + } + let mut repartition_with_attribute_name = HashMap::new(); + for (key, val) in repartition_attributes_id { + repartition_with_attribute_name.insert(schema.attribute_name(key).to_owned(), val); + } + Ok(repartition_with_attribute_name) + } + pub fn len(&self) -> RocksDbResult { let mut last_document_id = None; let mut count = 0; @@ -88,3 +103,20 @@ impl Iterator for DocumentFieldsIter<'_> { } } } + +pub struct DocumentsKeysIter<'a>(crate::CfIter<'a>); + +impl Iterator for DocumentsKeysIter<'_> { + type Item = DocumentAttrKey; + + fn next(&mut self) -> Option { + match self.0.next() { + Some((key, _)) => { + let array = key.as_ref().try_into().unwrap(); + let key = DocumentAttrKey::from_be_bytes(array); + Some(key) + }, + None => None, + } + } +} diff --git a/meilidb-data/src/database/index/mod.rs b/meilidb-data/src/database/index/mod.rs index 1f04fe934..1bcfaec95 100644 --- a/meilidb-data/src/database/index/mod.rs +++ b/meilidb-data/src/database/index/mod.rs @@ -1,4 +1,4 @@ -use std::collections::{HashSet, BTreeMap}; +use std::collections::{HashMap, HashSet, BTreeMap}; use std::convert::TryInto; use std::sync::Arc; use std::sync::atomic::{AtomicU64, Ordering}; @@ -155,11 +155,12 @@ fn last_update_id( Ok(uikey.max(urikey).unwrap_or(0)) } -#[derive(Copy, Clone)] +#[derive(Clone)] pub struct IndexStats { pub number_of_words: usize, pub number_of_documents: u64, pub number_attrs_in_ranked_map: usize, + pub documents_fields_repartition: HashMap, } #[derive(Clone)] @@ -275,10 +276,12 @@ impl Index { pub fn stats(&self) -> RocksDbResult { let cache = self.cache.load(); + let documents_fields_repartition = self.documents_index.documents_fields_repartition(cache.schema.clone())?; Ok(IndexStats { number_of_words: cache.words.len(), number_of_documents: cache.number_of_documents, number_attrs_in_ranked_map: cache.ranked_map.len(), + documents_fields_repartition, }) } diff --git a/meilidb-data/tests/updates.rs b/meilidb-data/tests/updates.rs index 576e11583..7afbbc343 100644 --- a/meilidb-data/tests/updates.rs +++ b/meilidb-data/tests/updates.rs @@ -1,3 +1,5 @@ +#[macro_use] extern crate maplit; + use std::sync::atomic::{AtomicBool, Ordering::Relaxed}; use std::sync::Arc; @@ -94,3 +96,67 @@ fn replace_document() { assert_eq!(docs.len(), 1); assert_eq!(index.document(None, docs[0].id).unwrap().as_ref(), Some(&doc2)); } + +#[test] +fn database_stats() { + let tmp_dir = tempfile::tempdir().unwrap(); + let database = Database::open(&tmp_dir).unwrap(); + + let as_been_updated = Arc::new(AtomicBool::new(false)); + + let schema = simple_schema(); + let index = database.create_index("hello", schema).unwrap(); + + let as_been_updated_clone = as_been_updated.clone(); + index.set_update_callback(move |_| as_been_updated_clone.store(true, Relaxed)); + + let doc1 = json!({ "objectId": 123, "title": "hello" }); + + let mut addition = index.documents_addition(); + addition.update_document(&doc1); + let update_id = addition.finalize().unwrap(); + let status = index.update_status_blocking(update_id).unwrap(); + assert!(as_been_updated.swap(false, Relaxed)); + assert!(status.result.is_ok()); + let stats = index.stats().unwrap(); + let repartition = hashmap!{ + "objectId".to_string() => 1u64, + "title".to_string() => 1u64, + }; + assert_eq!(stats.number_of_documents, 1); + assert_eq!(stats.documents_fields_repartition, repartition); + + let doc2 = json!({ "objectId": 456, "title": "world" }); + + let mut addition = index.documents_addition(); + addition.update_document(&doc2); + let update_id = addition.finalize().unwrap(); + let status = index.update_status_blocking(update_id).unwrap(); + assert!(as_been_updated.swap(false, Relaxed)); + assert!(status.result.is_ok()); + let stats = index.stats().unwrap(); + let repartition = hashmap!{ + "objectId".to_string() => 2u64, + "title".to_string() => 2u64, + }; + assert_eq!(stats.number_of_documents, 2); + assert_eq!(stats.documents_fields_repartition, repartition); + + + let doc3 = json!({ "objectId": 789 }); + + let mut addition = index.documents_addition(); + addition.update_document(&doc3); + let update_id = addition.finalize().unwrap(); + let status = index.update_status_blocking(update_id).unwrap(); + assert!(as_been_updated.swap(false, Relaxed)); + assert!(status.result.is_ok()); + let stats = index.stats().unwrap(); + let repartition = hashmap!{ + "objectId".to_string() => 3u64, + "title".to_string() => 2u64, + }; + assert_eq!(stats.number_of_documents, 3); + assert_eq!(stats.documents_fields_repartition, repartition); + +}