mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-30 08:44:27 +01:00
feat: add the documents fields repartition into stats
This commit is contained in:
parent
97cf5cca2a
commit
80caa8b60d
@ -37,3 +37,4 @@ branch = "arc-byte-slice"
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = "3.1.0"
|
||||
maplit = "1.0.2"
|
||||
|
@ -1,7 +1,8 @@
|
||||
use std::convert::TryInto;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use meilidb_core::DocumentId;
|
||||
use meilidb_schema::SchemaAttr;
|
||||
use meilidb_schema::{Schema, SchemaAttr};
|
||||
use rocksdb::DBVector;
|
||||
|
||||
use crate::document_attr_key::DocumentAttrKey;
|
||||
@ -54,6 +55,20 @@ impl DocumentsIndex {
|
||||
Ok(DocumentFieldsIter(iter))
|
||||
}
|
||||
|
||||
pub fn documents_fields_repartition(&self, schema: Schema) -> RocksDbResult<HashMap<String, u64>> {
|
||||
let iter = self.0.iter()?;
|
||||
let mut repartition_attributes_id = HashMap::new();
|
||||
for key in DocumentsKeysIter(iter) {
|
||||
let counter = repartition_attributes_id.entry(key.attribute).or_insert(0);
|
||||
*counter += 1u64;
|
||||
}
|
||||
let mut repartition_with_attribute_name = HashMap::new();
|
||||
for (key, val) in repartition_attributes_id {
|
||||
repartition_with_attribute_name.insert(schema.attribute_name(key).to_owned(), val);
|
||||
}
|
||||
Ok(repartition_with_attribute_name)
|
||||
}
|
||||
|
||||
pub fn len(&self) -> RocksDbResult<u64> {
|
||||
let mut last_document_id = None;
|
||||
let mut count = 0;
|
||||
@ -88,3 +103,20 @@ impl Iterator for DocumentFieldsIter<'_> {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub struct DocumentsKeysIter<'a>(crate::CfIter<'a>);
|
||||
|
||||
impl Iterator for DocumentsKeysIter<'_> {
|
||||
type Item = DocumentAttrKey;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
match self.0.next() {
|
||||
Some((key, _)) => {
|
||||
let array = key.as_ref().try_into().unwrap();
|
||||
let key = DocumentAttrKey::from_be_bytes(array);
|
||||
Some(key)
|
||||
},
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,4 +1,4 @@
|
||||
use std::collections::{HashSet, BTreeMap};
|
||||
use std::collections::{HashMap, HashSet, BTreeMap};
|
||||
use std::convert::TryInto;
|
||||
use std::sync::Arc;
|
||||
use std::sync::atomic::{AtomicU64, Ordering};
|
||||
@ -155,11 +155,12 @@ fn last_update_id(
|
||||
Ok(uikey.max(urikey).unwrap_or(0))
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
#[derive(Clone)]
|
||||
pub struct IndexStats {
|
||||
pub number_of_words: usize,
|
||||
pub number_of_documents: u64,
|
||||
pub number_attrs_in_ranked_map: usize,
|
||||
pub documents_fields_repartition: HashMap<String, u64>,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
@ -275,10 +276,12 @@ impl Index {
|
||||
|
||||
pub fn stats(&self) -> RocksDbResult<IndexStats> {
|
||||
let cache = self.cache.load();
|
||||
let documents_fields_repartition = self.documents_index.documents_fields_repartition(cache.schema.clone())?;
|
||||
Ok(IndexStats {
|
||||
number_of_words: cache.words.len(),
|
||||
number_of_documents: cache.number_of_documents,
|
||||
number_attrs_in_ranked_map: cache.ranked_map.len(),
|
||||
documents_fields_repartition,
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -1,3 +1,5 @@
|
||||
#[macro_use] extern crate maplit;
|
||||
|
||||
use std::sync::atomic::{AtomicBool, Ordering::Relaxed};
|
||||
use std::sync::Arc;
|
||||
|
||||
@ -94,3 +96,67 @@ fn replace_document() {
|
||||
assert_eq!(docs.len(), 1);
|
||||
assert_eq!(index.document(None, docs[0].id).unwrap().as_ref(), Some(&doc2));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn database_stats() {
|
||||
let tmp_dir = tempfile::tempdir().unwrap();
|
||||
let database = Database::open(&tmp_dir).unwrap();
|
||||
|
||||
let as_been_updated = Arc::new(AtomicBool::new(false));
|
||||
|
||||
let schema = simple_schema();
|
||||
let index = database.create_index("hello", schema).unwrap();
|
||||
|
||||
let as_been_updated_clone = as_been_updated.clone();
|
||||
index.set_update_callback(move |_| as_been_updated_clone.store(true, Relaxed));
|
||||
|
||||
let doc1 = json!({ "objectId": 123, "title": "hello" });
|
||||
|
||||
let mut addition = index.documents_addition();
|
||||
addition.update_document(&doc1);
|
||||
let update_id = addition.finalize().unwrap();
|
||||
let status = index.update_status_blocking(update_id).unwrap();
|
||||
assert!(as_been_updated.swap(false, Relaxed));
|
||||
assert!(status.result.is_ok());
|
||||
let stats = index.stats().unwrap();
|
||||
let repartition = hashmap!{
|
||||
"objectId".to_string() => 1u64,
|
||||
"title".to_string() => 1u64,
|
||||
};
|
||||
assert_eq!(stats.number_of_documents, 1);
|
||||
assert_eq!(stats.documents_fields_repartition, repartition);
|
||||
|
||||
let doc2 = json!({ "objectId": 456, "title": "world" });
|
||||
|
||||
let mut addition = index.documents_addition();
|
||||
addition.update_document(&doc2);
|
||||
let update_id = addition.finalize().unwrap();
|
||||
let status = index.update_status_blocking(update_id).unwrap();
|
||||
assert!(as_been_updated.swap(false, Relaxed));
|
||||
assert!(status.result.is_ok());
|
||||
let stats = index.stats().unwrap();
|
||||
let repartition = hashmap!{
|
||||
"objectId".to_string() => 2u64,
|
||||
"title".to_string() => 2u64,
|
||||
};
|
||||
assert_eq!(stats.number_of_documents, 2);
|
||||
assert_eq!(stats.documents_fields_repartition, repartition);
|
||||
|
||||
|
||||
let doc3 = json!({ "objectId": 789 });
|
||||
|
||||
let mut addition = index.documents_addition();
|
||||
addition.update_document(&doc3);
|
||||
let update_id = addition.finalize().unwrap();
|
||||
let status = index.update_status_blocking(update_id).unwrap();
|
||||
assert!(as_been_updated.swap(false, Relaxed));
|
||||
assert!(status.result.is_ok());
|
||||
let stats = index.stats().unwrap();
|
||||
let repartition = hashmap!{
|
||||
"objectId".to_string() => 3u64,
|
||||
"title".to_string() => 2u64,
|
||||
};
|
||||
assert_eq!(stats.number_of_documents, 3);
|
||||
assert_eq!(stats.documents_fields_repartition, repartition);
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user