mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 20:37:15 +02:00
feat(index): store fields distribution in index
This commit is contained in:
parent
67e25f8724
commit
27c7ab6e00
5 changed files with 45 additions and 26 deletions
|
@ -23,6 +23,7 @@ pub const DOCUMENTS_IDS_KEY: &str = "documents-ids";
|
|||
pub const FACETED_DOCUMENTS_IDS_PREFIX: &str = "faceted-documents-ids";
|
||||
pub const FACETED_FIELDS_KEY: &str = "faceted-fields";
|
||||
pub const FIELDS_IDS_MAP_KEY: &str = "fields-ids-map";
|
||||
pub const FIELDS_DISTRIBUTION_KEY: &str = "fields-distribution";
|
||||
pub const PRIMARY_KEY_KEY: &str = "primary-key";
|
||||
pub const SEARCHABLE_FIELDS_KEY: &str = "searchable-fields";
|
||||
pub const HARD_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "hard-external-documents-ids";
|
||||
|
@ -33,6 +34,8 @@ pub const WORDS_PREFIXES_FST_KEY: &str = "words-prefixes-fst";
|
|||
const CREATED_AT_KEY: &str = "created-at";
|
||||
const UPDATED_AT_KEY: &str = "updated-at";
|
||||
|
||||
pub type FieldsDistribution = HashMap<String, u64>;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Index {
|
||||
/// The LMDB environment which this index is associated with.
|
||||
|
@ -204,23 +207,18 @@ impl Index {
|
|||
Ok(self.main.get::<_, Str, SerdeJson<FieldsIdsMap>>(rtxn, FIELDS_IDS_MAP_KEY)?.unwrap_or_default())
|
||||
}
|
||||
|
||||
/* fields ids distribution */
|
||||
/* fields distribution */
|
||||
|
||||
/// Returns the fields ids distribution which associate the internal field ids
|
||||
/// with the number of times it occurs in the obkv documents.
|
||||
// TODO store in the index itself and change only within updates that modify the documents
|
||||
pub fn fields_ids_distribution(&self, rtxn: &RoTxn) -> anyhow::Result<HashMap<FieldId, u64>> {
|
||||
let mut distribution = HashMap::new();
|
||||
/// Writes the fields distribution which associate the field with the number of times
|
||||
/// it occurs in the obkv documents.
|
||||
pub fn put_fields_distribution(&self, wtxn: &mut RwTxn, distribution: &FieldsDistribution) -> heed::Result<()> {
|
||||
self.main.put::<_, Str, SerdeJson<FieldsDistribution>>(wtxn, FIELDS_DISTRIBUTION_KEY, &distribution)
|
||||
}
|
||||
|
||||
for document in self.documents.iter(rtxn)? {
|
||||
let (_, obkv) = document?;
|
||||
|
||||
for (field_id, _) in obkv.iter() {
|
||||
*distribution.entry(field_id).or_default() += 1;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(distribution)
|
||||
/// Returns the fields distribution which associate the field with the number of times
|
||||
/// it occurs in the obkv documents.
|
||||
pub fn fields_distribution(&self, rtxn: &RoTxn) -> heed::Result<FieldsDistribution> {
|
||||
Ok(self.main.get::<_, Str, SerdeJson<FieldsDistribution>>(rtxn, FIELDS_DISTRIBUTION_KEY)?.unwrap_or_default())
|
||||
}
|
||||
|
||||
/* displayed fields */
|
||||
|
@ -469,6 +467,7 @@ impl Index {
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use heed::EnvOpenOptions;
|
||||
use maplit::hashmap;
|
||||
|
||||
use crate::Index;
|
||||
use crate::update::{IndexDocuments, UpdateFormat};
|
||||
|
@ -493,16 +492,15 @@ mod tests {
|
|||
}
|
||||
|
||||
#[test]
|
||||
fn fields_ids_distribution() {
|
||||
fn initial_fields_distribution() {
|
||||
let index = prepare_index();
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
|
||||
|
||||
let fields_ids_distribution = index.fields_ids_distribution(&rtxn).unwrap();
|
||||
assert_eq!(fields_ids_distribution.len(), 2);
|
||||
assert_eq!(fields_ids_distribution.get(&fields_ids_map.id("age").unwrap()), Some(&1));
|
||||
assert_eq!(fields_ids_distribution.get(&fields_ids_map.id("name").unwrap()), Some(&2));
|
||||
let fields_distribution = index.fields_distribution(&rtxn).unwrap();
|
||||
assert_eq!(fields_distribution, hashmap!{
|
||||
"age".to_string() => 1,
|
||||
"name".to_string() => 2
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue