mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 23:04:26 +01:00
Merge pull request #189 from meilisearch/documents-fields-repartition
Add the documents fields repartition into stats
This commit is contained in:
commit
c10b701b9a
@ -37,3 +37,4 @@ branch = "arc-byte-slice"
|
|||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
tempfile = "3.1.0"
|
tempfile = "3.1.0"
|
||||||
|
maplit = "1.0.2"
|
||||||
|
@ -1,7 +1,8 @@
|
|||||||
use std::convert::TryInto;
|
use std::convert::TryInto;
|
||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
use meilidb_core::DocumentId;
|
use meilidb_core::DocumentId;
|
||||||
use meilidb_schema::SchemaAttr;
|
use meilidb_schema::{Schema, SchemaAttr};
|
||||||
use rocksdb::DBVector;
|
use rocksdb::DBVector;
|
||||||
|
|
||||||
use crate::document_attr_key::DocumentAttrKey;
|
use crate::document_attr_key::DocumentAttrKey;
|
||||||
@ -54,6 +55,20 @@ impl DocumentsIndex {
|
|||||||
Ok(DocumentFieldsIter(iter))
|
Ok(DocumentFieldsIter(iter))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn documents_fields_repartition(&self, schema: Schema) -> RocksDbResult<HashMap<String, u64>> {
|
||||||
|
let iter = self.0.iter()?;
|
||||||
|
let mut repartition_attributes_id = HashMap::new();
|
||||||
|
for key in DocumentsKeysIter(iter) {
|
||||||
|
let counter = repartition_attributes_id.entry(key.attribute).or_insert(0);
|
||||||
|
*counter += 1u64;
|
||||||
|
}
|
||||||
|
let mut repartition_with_attribute_name = HashMap::new();
|
||||||
|
for (key, val) in repartition_attributes_id {
|
||||||
|
repartition_with_attribute_name.insert(schema.attribute_name(key).to_owned(), val);
|
||||||
|
}
|
||||||
|
Ok(repartition_with_attribute_name)
|
||||||
|
}
|
||||||
|
|
||||||
pub fn len(&self) -> RocksDbResult<u64> {
|
pub fn len(&self) -> RocksDbResult<u64> {
|
||||||
let mut last_document_id = None;
|
let mut last_document_id = None;
|
||||||
let mut count = 0;
|
let mut count = 0;
|
||||||
@ -88,3 +103,20 @@ impl Iterator for DocumentFieldsIter<'_> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub struct DocumentsKeysIter<'a>(crate::CfIter<'a>);
|
||||||
|
|
||||||
|
impl Iterator for DocumentsKeysIter<'_> {
|
||||||
|
type Item = DocumentAttrKey;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
match self.0.next() {
|
||||||
|
Some((key, _)) => {
|
||||||
|
let array = key.as_ref().try_into().unwrap();
|
||||||
|
let key = DocumentAttrKey::from_be_bytes(array);
|
||||||
|
Some(key)
|
||||||
|
},
|
||||||
|
None => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
use std::collections::{HashSet, BTreeMap};
|
use std::collections::{HashMap, HashSet, BTreeMap};
|
||||||
use std::convert::TryInto;
|
use std::convert::TryInto;
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
use std::sync::atomic::{AtomicU64, Ordering};
|
use std::sync::atomic::{AtomicU64, Ordering};
|
||||||
@ -155,11 +155,12 @@ fn last_update_id(
|
|||||||
Ok(uikey.max(urikey).unwrap_or(0))
|
Ok(uikey.max(urikey).unwrap_or(0))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Copy, Clone)]
|
#[derive(Clone)]
|
||||||
pub struct IndexStats {
|
pub struct IndexStats {
|
||||||
pub number_of_words: usize,
|
pub number_of_words: usize,
|
||||||
pub number_of_documents: u64,
|
pub number_of_documents: u64,
|
||||||
pub number_attrs_in_ranked_map: usize,
|
pub number_attrs_in_ranked_map: usize,
|
||||||
|
pub documents_fields_repartition: HashMap<String, u64>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
@ -275,10 +276,12 @@ impl Index {
|
|||||||
|
|
||||||
pub fn stats(&self) -> RocksDbResult<IndexStats> {
|
pub fn stats(&self) -> RocksDbResult<IndexStats> {
|
||||||
let cache = self.cache.load();
|
let cache = self.cache.load();
|
||||||
|
let documents_fields_repartition = self.documents_index.documents_fields_repartition(cache.schema.clone())?;
|
||||||
Ok(IndexStats {
|
Ok(IndexStats {
|
||||||
number_of_words: cache.words.len(),
|
number_of_words: cache.words.len(),
|
||||||
number_of_documents: cache.number_of_documents,
|
number_of_documents: cache.number_of_documents,
|
||||||
number_attrs_in_ranked_map: cache.ranked_map.len(),
|
number_attrs_in_ranked_map: cache.ranked_map.len(),
|
||||||
|
documents_fields_repartition,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
#[macro_use] extern crate maplit;
|
||||||
|
|
||||||
use std::sync::atomic::{AtomicBool, Ordering::Relaxed};
|
use std::sync::atomic::{AtomicBool, Ordering::Relaxed};
|
||||||
use std::sync::Arc;
|
use std::sync::Arc;
|
||||||
|
|
||||||
@ -94,3 +96,67 @@ fn replace_document() {
|
|||||||
assert_eq!(docs.len(), 1);
|
assert_eq!(docs.len(), 1);
|
||||||
assert_eq!(index.document(None, docs[0].id).unwrap().as_ref(), Some(&doc2));
|
assert_eq!(index.document(None, docs[0].id).unwrap().as_ref(), Some(&doc2));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn database_stats() {
|
||||||
|
let tmp_dir = tempfile::tempdir().unwrap();
|
||||||
|
let database = Database::open(&tmp_dir).unwrap();
|
||||||
|
|
||||||
|
let as_been_updated = Arc::new(AtomicBool::new(false));
|
||||||
|
|
||||||
|
let schema = simple_schema();
|
||||||
|
let index = database.create_index("hello", schema).unwrap();
|
||||||
|
|
||||||
|
let as_been_updated_clone = as_been_updated.clone();
|
||||||
|
index.set_update_callback(move |_| as_been_updated_clone.store(true, Relaxed));
|
||||||
|
|
||||||
|
let doc1 = json!({ "objectId": 123, "title": "hello" });
|
||||||
|
|
||||||
|
let mut addition = index.documents_addition();
|
||||||
|
addition.update_document(&doc1);
|
||||||
|
let update_id = addition.finalize().unwrap();
|
||||||
|
let status = index.update_status_blocking(update_id).unwrap();
|
||||||
|
assert!(as_been_updated.swap(false, Relaxed));
|
||||||
|
assert!(status.result.is_ok());
|
||||||
|
let stats = index.stats().unwrap();
|
||||||
|
let repartition = hashmap!{
|
||||||
|
"objectId".to_string() => 1u64,
|
||||||
|
"title".to_string() => 1u64,
|
||||||
|
};
|
||||||
|
assert_eq!(stats.number_of_documents, 1);
|
||||||
|
assert_eq!(stats.documents_fields_repartition, repartition);
|
||||||
|
|
||||||
|
let doc2 = json!({ "objectId": 456, "title": "world" });
|
||||||
|
|
||||||
|
let mut addition = index.documents_addition();
|
||||||
|
addition.update_document(&doc2);
|
||||||
|
let update_id = addition.finalize().unwrap();
|
||||||
|
let status = index.update_status_blocking(update_id).unwrap();
|
||||||
|
assert!(as_been_updated.swap(false, Relaxed));
|
||||||
|
assert!(status.result.is_ok());
|
||||||
|
let stats = index.stats().unwrap();
|
||||||
|
let repartition = hashmap!{
|
||||||
|
"objectId".to_string() => 2u64,
|
||||||
|
"title".to_string() => 2u64,
|
||||||
|
};
|
||||||
|
assert_eq!(stats.number_of_documents, 2);
|
||||||
|
assert_eq!(stats.documents_fields_repartition, repartition);
|
||||||
|
|
||||||
|
|
||||||
|
let doc3 = json!({ "objectId": 789 });
|
||||||
|
|
||||||
|
let mut addition = index.documents_addition();
|
||||||
|
addition.update_document(&doc3);
|
||||||
|
let update_id = addition.finalize().unwrap();
|
||||||
|
let status = index.update_status_blocking(update_id).unwrap();
|
||||||
|
assert!(as_been_updated.swap(false, Relaxed));
|
||||||
|
assert!(status.result.is_ok());
|
||||||
|
let stats = index.stats().unwrap();
|
||||||
|
let repartition = hashmap!{
|
||||||
|
"objectId".to_string() => 3u64,
|
||||||
|
"title".to_string() => 2u64,
|
||||||
|
};
|
||||||
|
assert_eq!(stats.number_of_documents, 3);
|
||||||
|
assert_eq!(stats.documents_fields_repartition, repartition);
|
||||||
|
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user