mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-30 08:44:27 +01:00
rename fields_distribution in field_distribution
This commit is contained in:
parent
bb89ef9fc0
commit
969adaefdf
@ -26,7 +26,7 @@ pub mod main_key {
|
|||||||
pub const DISTINCT_FIELD_KEY: &str = "distinct-field-key";
|
pub const DISTINCT_FIELD_KEY: &str = "distinct-field-key";
|
||||||
pub const DOCUMENTS_IDS_KEY: &str = "documents-ids";
|
pub const DOCUMENTS_IDS_KEY: &str = "documents-ids";
|
||||||
pub const FILTERABLE_FIELDS_KEY: &str = "filterable-fields";
|
pub const FILTERABLE_FIELDS_KEY: &str = "filterable-fields";
|
||||||
pub const FIELDS_DISTRIBUTION_KEY: &str = "fields-distribution";
|
pub const FIELD_DISTRIBUTION_KEY: &str = "fields-distribution";
|
||||||
pub const FIELDS_IDS_MAP_KEY: &str = "fields-ids-map";
|
pub const FIELDS_IDS_MAP_KEY: &str = "fields-ids-map";
|
||||||
pub const HARD_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "hard-external-documents-ids";
|
pub const HARD_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "hard-external-documents-ids";
|
||||||
pub const NUMBER_FACETED_DOCUMENTS_IDS_PREFIX: &str = "number-faceted-documents-ids";
|
pub const NUMBER_FACETED_DOCUMENTS_IDS_PREFIX: &str = "number-faceted-documents-ids";
|
||||||
@ -290,28 +290,28 @@ impl Index {
|
|||||||
.unwrap_or_default())
|
.unwrap_or_default())
|
||||||
}
|
}
|
||||||
|
|
||||||
/* fields distribution */
|
/* field distribution */
|
||||||
|
|
||||||
/// Writes the fields distribution which associates every field name with
|
/// Writes the field distribution which associates every field name with
|
||||||
/// the number of times it occurs in the documents.
|
/// the number of times it occurs in the documents.
|
||||||
pub(crate) fn put_fields_distribution(
|
pub(crate) fn put_field_distribution(
|
||||||
&self,
|
&self,
|
||||||
wtxn: &mut RwTxn,
|
wtxn: &mut RwTxn,
|
||||||
distribution: &FieldsDistribution,
|
distribution: &FieldsDistribution,
|
||||||
) -> heed::Result<()> {
|
) -> heed::Result<()> {
|
||||||
self.main.put::<_, Str, SerdeJson<FieldsDistribution>>(
|
self.main.put::<_, Str, SerdeJson<FieldsDistribution>>(
|
||||||
wtxn,
|
wtxn,
|
||||||
main_key::FIELDS_DISTRIBUTION_KEY,
|
main_key::FIELD_DISTRIBUTION_KEY,
|
||||||
distribution,
|
distribution,
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the fields distribution which associates every field name with
|
/// Returns the field distribution which associates every field name with
|
||||||
/// the number of times it occurs in the documents.
|
/// the number of times it occurs in the documents.
|
||||||
pub fn fields_distribution(&self, rtxn: &RoTxn) -> heed::Result<FieldsDistribution> {
|
pub fn field_distribution(&self, rtxn: &RoTxn) -> heed::Result<FieldsDistribution> {
|
||||||
Ok(self
|
Ok(self
|
||||||
.main
|
.main
|
||||||
.get::<_, Str, SerdeJson<FieldsDistribution>>(rtxn, main_key::FIELDS_DISTRIBUTION_KEY)?
|
.get::<_, Str, SerdeJson<FieldsDistribution>>(rtxn, main_key::FIELD_DISTRIBUTION_KEY)?
|
||||||
.unwrap_or_default())
|
.unwrap_or_default())
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -823,7 +823,7 @@ pub(crate) mod tests {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn initial_fields_distribution() {
|
fn initial_field_distribution() {
|
||||||
let path = tempfile::tempdir().unwrap();
|
let path = tempfile::tempdir().unwrap();
|
||||||
let mut options = EnvOpenOptions::new();
|
let mut options = EnvOpenOptions::new();
|
||||||
options.map_size(10 * 1024 * 1024); // 10 MB
|
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||||
@ -842,9 +842,9 @@ pub(crate) mod tests {
|
|||||||
|
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
|
||||||
let fields_distribution = index.fields_distribution(&rtxn).unwrap();
|
let field_distribution = index.field_distribution(&rtxn).unwrap();
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
fields_distribution,
|
field_distribution,
|
||||||
hashmap! {
|
hashmap! {
|
||||||
"id".to_string() => 2,
|
"id".to_string() => 2,
|
||||||
"name".to_string() => 2,
|
"name".to_string() => 2,
|
||||||
|
@ -22,7 +22,9 @@ use fxhash::{FxHasher32, FxHasher64};
|
|||||||
use serde_json::{Map, Value};
|
use serde_json::{Map, Value};
|
||||||
|
|
||||||
pub use self::criterion::{default_criteria, Criterion};
|
pub use self::criterion::{default_criteria, Criterion};
|
||||||
pub use self::error::{Error, FieldIdMapMissingEntry, InternalError, SerializationError, UserError};
|
pub use self::error::{
|
||||||
|
Error, FieldIdMapMissingEntry, InternalError, SerializationError, UserError,
|
||||||
|
};
|
||||||
pub use self::external_documents_ids::ExternalDocumentsIds;
|
pub use self::external_documents_ids::ExternalDocumentsIds;
|
||||||
pub use self::fields_ids_map::FieldsIdsMap;
|
pub use self::fields_ids_map::FieldsIdsMap;
|
||||||
pub use self::heed_codec::{
|
pub use self::heed_codec::{
|
||||||
|
@ -47,7 +47,7 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
|
|||||||
self.index.put_words_prefixes_fst(self.wtxn, &fst::Set::default())?;
|
self.index.put_words_prefixes_fst(self.wtxn, &fst::Set::default())?;
|
||||||
self.index.put_external_documents_ids(self.wtxn, &ExternalDocumentsIds::default())?;
|
self.index.put_external_documents_ids(self.wtxn, &ExternalDocumentsIds::default())?;
|
||||||
self.index.put_documents_ids(self.wtxn, &RoaringBitmap::default())?;
|
self.index.put_documents_ids(self.wtxn, &RoaringBitmap::default())?;
|
||||||
self.index.put_fields_distribution(self.wtxn, &FieldsDistribution::default())?;
|
self.index.put_field_distribution(self.wtxn, &FieldsDistribution::default())?;
|
||||||
|
|
||||||
// We clean all the faceted documents ids.
|
// We clean all the faceted documents ids.
|
||||||
let empty = RoaringBitmap::default();
|
let empty = RoaringBitmap::default();
|
||||||
@ -113,7 +113,7 @@ mod tests {
|
|||||||
assert!(index.words_prefixes_fst(&rtxn).unwrap().is_empty());
|
assert!(index.words_prefixes_fst(&rtxn).unwrap().is_empty());
|
||||||
assert!(index.external_documents_ids(&rtxn).unwrap().is_empty());
|
assert!(index.external_documents_ids(&rtxn).unwrap().is_empty());
|
||||||
assert!(index.documents_ids(&rtxn).unwrap().is_empty());
|
assert!(index.documents_ids(&rtxn).unwrap().is_empty());
|
||||||
assert!(index.fields_distribution(&rtxn).unwrap().is_empty());
|
assert!(index.field_distribution(&rtxn).unwrap().is_empty());
|
||||||
|
|
||||||
assert!(index.word_docids.is_empty(&rtxn).unwrap());
|
assert!(index.word_docids.is_empty(&rtxn).unwrap());
|
||||||
assert!(index.word_prefix_docids.is_empty(&rtxn).unwrap());
|
assert!(index.word_prefix_docids.is_empty(&rtxn).unwrap());
|
||||||
|
@ -147,7 +147,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut fields_distribution = self.index.fields_distribution(self.wtxn)?;
|
let mut field_distribution = self.index.field_distribution(self.wtxn)?;
|
||||||
|
|
||||||
// We use pre-calculated number of fields occurrences that needs to be deleted
|
// We use pre-calculated number of fields occurrences that needs to be deleted
|
||||||
// to reflect deleted documents.
|
// to reflect deleted documents.
|
||||||
@ -155,7 +155,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
// Otherwise, insert new number of occurrences (current_count - count_diff).
|
// Otherwise, insert new number of occurrences (current_count - count_diff).
|
||||||
for (field_id, count_diff) in fields_ids_distribution_diff {
|
for (field_id, count_diff) in fields_ids_distribution_diff {
|
||||||
let field_name = fields_ids_map.name(field_id).unwrap();
|
let field_name = fields_ids_map.name(field_id).unwrap();
|
||||||
if let Entry::Occupied(mut entry) = fields_distribution.entry(field_name.to_string()) {
|
if let Entry::Occupied(mut entry) = field_distribution.entry(field_name.to_string()) {
|
||||||
match entry.get().checked_sub(count_diff) {
|
match entry.get().checked_sub(count_diff) {
|
||||||
Some(0) | None => entry.remove(),
|
Some(0) | None => entry.remove(),
|
||||||
Some(count) => entry.insert(count),
|
Some(count) => entry.insert(count),
|
||||||
@ -163,7 +163,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
self.index.put_fields_distribution(self.wtxn, &fields_distribution)?;
|
self.index.put_field_distribution(self.wtxn, &field_distribution)?;
|
||||||
|
|
||||||
// We create the FST map of the external ids that we must delete.
|
// We create the FST map of the external ids that we must delete.
|
||||||
external_ids.sort_unstable();
|
external_ids.sort_unstable();
|
||||||
@ -479,7 +479,7 @@ mod tests {
|
|||||||
|
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
|
||||||
assert!(index.fields_distribution(&rtxn).unwrap().is_empty());
|
assert!(index.field_distribution(&rtxn).unwrap().is_empty());
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -378,7 +378,7 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
|||||||
let TransformOutput {
|
let TransformOutput {
|
||||||
primary_key,
|
primary_key,
|
||||||
fields_ids_map,
|
fields_ids_map,
|
||||||
fields_distribution,
|
field_distribution,
|
||||||
external_documents_ids,
|
external_documents_ids,
|
||||||
new_documents_ids,
|
new_documents_ids,
|
||||||
replaced_documents_ids,
|
replaced_documents_ids,
|
||||||
@ -594,8 +594,8 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
|||||||
// We write the fields ids map into the main database
|
// We write the fields ids map into the main database
|
||||||
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
|
self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?;
|
||||||
|
|
||||||
// We write the fields distribution into the main database
|
// We write the field distribution into the main database
|
||||||
self.index.put_fields_distribution(self.wtxn, &fields_distribution)?;
|
self.index.put_field_distribution(self.wtxn, &field_distribution)?;
|
||||||
|
|
||||||
// We write the primary key field id into the main database
|
// We write the primary key field id into the main database
|
||||||
self.index.put_primary_key(self.wtxn, &primary_key)?;
|
self.index.put_primary_key(self.wtxn, &primary_key)?;
|
||||||
|
@ -25,7 +25,7 @@ const DEFAULT_PRIMARY_KEY_NAME: &str = "id";
|
|||||||
pub struct TransformOutput {
|
pub struct TransformOutput {
|
||||||
pub primary_key: String,
|
pub primary_key: String,
|
||||||
pub fields_ids_map: FieldsIdsMap,
|
pub fields_ids_map: FieldsIdsMap,
|
||||||
pub fields_distribution: FieldsDistribution,
|
pub field_distribution: FieldsDistribution,
|
||||||
pub external_documents_ids: ExternalDocumentsIds<'static>,
|
pub external_documents_ids: ExternalDocumentsIds<'static>,
|
||||||
pub new_documents_ids: RoaringBitmap,
|
pub new_documents_ids: RoaringBitmap,
|
||||||
pub replaced_documents_ids: RoaringBitmap,
|
pub replaced_documents_ids: RoaringBitmap,
|
||||||
@ -127,7 +127,7 @@ impl Transform<'_, '_> {
|
|||||||
return Ok(TransformOutput {
|
return Ok(TransformOutput {
|
||||||
primary_key,
|
primary_key,
|
||||||
fields_ids_map,
|
fields_ids_map,
|
||||||
fields_distribution: self.index.fields_distribution(self.rtxn)?,
|
field_distribution: self.index.field_distribution(self.rtxn)?,
|
||||||
external_documents_ids: ExternalDocumentsIds::default(),
|
external_documents_ids: ExternalDocumentsIds::default(),
|
||||||
new_documents_ids: RoaringBitmap::new(),
|
new_documents_ids: RoaringBitmap::new(),
|
||||||
replaced_documents_ids: RoaringBitmap::new(),
|
replaced_documents_ids: RoaringBitmap::new(),
|
||||||
@ -385,7 +385,7 @@ impl Transform<'_, '_> {
|
|||||||
Error: From<E>,
|
Error: From<E>,
|
||||||
{
|
{
|
||||||
let documents_ids = self.index.documents_ids(self.rtxn)?;
|
let documents_ids = self.index.documents_ids(self.rtxn)?;
|
||||||
let mut fields_distribution = self.index.fields_distribution(self.rtxn)?;
|
let mut field_distribution = self.index.field_distribution(self.rtxn)?;
|
||||||
let mut available_documents_ids = AvailableDocumentsIds::from_documents_ids(&documents_ids);
|
let mut available_documents_ids = AvailableDocumentsIds::from_documents_ids(&documents_ids);
|
||||||
|
|
||||||
// Once we have sort and deduplicated the documents we write them into a final file.
|
// Once we have sort and deduplicated the documents we write them into a final file.
|
||||||
@ -455,7 +455,7 @@ impl Transform<'_, '_> {
|
|||||||
let reader = obkv::KvReader::new(obkv);
|
let reader = obkv::KvReader::new(obkv);
|
||||||
for (field_id, _) in reader.iter() {
|
for (field_id, _) in reader.iter() {
|
||||||
let field_name = fields_ids_map.name(field_id).unwrap();
|
let field_name = fields_ids_map.name(field_id).unwrap();
|
||||||
*fields_distribution.entry(field_name.to_string()).or_default() += 1;
|
*field_distribution.entry(field_name.to_string()).or_default() += 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -485,7 +485,7 @@ impl Transform<'_, '_> {
|
|||||||
Ok(TransformOutput {
|
Ok(TransformOutput {
|
||||||
primary_key,
|
primary_key,
|
||||||
fields_ids_map,
|
fields_ids_map,
|
||||||
fields_distribution,
|
field_distribution,
|
||||||
external_documents_ids: external_documents_ids.into_static(),
|
external_documents_ids: external_documents_ids.into_static(),
|
||||||
new_documents_ids,
|
new_documents_ids,
|
||||||
replaced_documents_ids,
|
replaced_documents_ids,
|
||||||
@ -503,7 +503,7 @@ impl Transform<'_, '_> {
|
|||||||
old_fields_ids_map: FieldsIdsMap,
|
old_fields_ids_map: FieldsIdsMap,
|
||||||
new_fields_ids_map: FieldsIdsMap,
|
new_fields_ids_map: FieldsIdsMap,
|
||||||
) -> Result<TransformOutput> {
|
) -> Result<TransformOutput> {
|
||||||
let fields_distribution = self.index.fields_distribution(self.rtxn)?;
|
let field_distribution = self.index.field_distribution(self.rtxn)?;
|
||||||
let external_documents_ids = self.index.external_documents_ids(self.rtxn)?;
|
let external_documents_ids = self.index.external_documents_ids(self.rtxn)?;
|
||||||
let documents_ids = self.index.documents_ids(self.rtxn)?;
|
let documents_ids = self.index.documents_ids(self.rtxn)?;
|
||||||
let documents_count = documents_ids.len() as usize;
|
let documents_count = documents_ids.len() as usize;
|
||||||
@ -540,7 +540,7 @@ impl Transform<'_, '_> {
|
|||||||
Ok(TransformOutput {
|
Ok(TransformOutput {
|
||||||
primary_key,
|
primary_key,
|
||||||
fields_ids_map: new_fields_ids_map,
|
fields_ids_map: new_fields_ids_map,
|
||||||
fields_distribution,
|
field_distribution,
|
||||||
external_documents_ids: external_documents_ids.into_static(),
|
external_documents_ids: external_documents_ids.into_static(),
|
||||||
new_documents_ids: documents_ids,
|
new_documents_ids: documents_ids,
|
||||||
replaced_documents_ids: RoaringBitmap::default(),
|
replaced_documents_ids: RoaringBitmap::default(),
|
||||||
|
Loading…
Reference in New Issue
Block a user