mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 04:17:10 +02:00
Refactor Settings Indexing process
**Changes:** The transform structure is now relying on FieldIdMapWithMetadata and AttributePatterns to prepare the obkv documents during a settings reindexing. The InnerIndexSettingsDiff and InnerIndexSettings structs are now relying on FieldIdMapWithMetadata, FilterableAttributesRule and AttributePatterns to define the field and the databases that should be reindexed. The faceted_fields_ids, localized_searchable_fields_ids and localized_faceted_fields_ids have been removed in favor of the FieldIdMapWithMetadata. We are now relying on the FieldIdMapWithMetadata to retain vectors_fids from the facets and the searchables. The searchable database computing is now relying on the FieldIdMapWithMetadata to know if a field is searchable and retrieve the locales. The facet database computing is now relying on the FieldIdMapWithMetadata to compute the facet databases, the facet-search and retrieve the locales. The facet level database computing is now relying on the FieldIdMapWithMetadata and the facet level database are cleared depending on the settings differences (clear_facet_levels_based_on_settings_diff). The vector point extraction uses the FieldIdMapWithMetadata instead of FieldsIdsMapWithMetadata. **Impact:** - Dump import - Settings update
This commit is contained in:
parent
286d310287
commit
659855c88e
12 changed files with 375 additions and 272 deletions
|
@ -6,7 +6,7 @@ use heed::types::Bytes;
|
|||
use heed::{BytesDecode, BytesEncode, Error, PutFlags, RoTxn, RwTxn};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{FACET_GROUP_SIZE, FACET_MIN_LEVEL_SIZE};
|
||||
use super::{clear_facet_levels, FACET_GROUP_SIZE, FACET_MIN_LEVEL_SIZE};
|
||||
use crate::facet::FacetType;
|
||||
use crate::heed_codec::facet::{
|
||||
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
|
||||
|
@ -97,9 +97,7 @@ pub(crate) struct FacetsUpdateBulkInner<R: std::io::Read + std::io::Seek> {
|
|||
impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
|
||||
pub fn update(mut self, wtxn: &mut RwTxn<'_>, field_ids: &[u16]) -> Result<()> {
|
||||
self.update_level0(wtxn)?;
|
||||
for &field_id in field_ids.iter() {
|
||||
self.clear_levels(wtxn, field_id)?;
|
||||
}
|
||||
clear_facet_levels(wtxn, &self.db.remap_data_type(), field_ids)?;
|
||||
|
||||
for &field_id in field_ids.iter() {
|
||||
let level_readers = self.compute_levels_for_field_id(field_id, wtxn)?;
|
||||
|
@ -114,14 +112,6 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
|
|||
Ok(())
|
||||
}
|
||||
|
||||
fn clear_levels(&self, wtxn: &mut heed::RwTxn<'_>, field_id: FieldId) -> Result<()> {
|
||||
let left = FacetGroupKey::<&[u8]> { field_id, level: 1, left_bound: &[] };
|
||||
let right = FacetGroupKey::<&[u8]> { field_id, level: u8::MAX, left_bound: &[] };
|
||||
let range = left..=right;
|
||||
self.db.delete_range(wtxn, &range).map(drop)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn update_level0(&mut self, wtxn: &mut RwTxn<'_>) -> Result<()> {
|
||||
let delta_data = match self.delta_data.take() {
|
||||
Some(x) => x,
|
||||
|
@ -365,8 +355,6 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
|
|||
mod tests {
|
||||
use std::iter::once;
|
||||
|
||||
use big_s::S;
|
||||
use maplit::hashset;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::documents::mmap_from_objects;
|
||||
|
@ -374,7 +362,7 @@ mod tests {
|
|||
use crate::heed_codec::StrRefCodec;
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::update::facet::test_helpers::{ordered_string, FacetIndex};
|
||||
use crate::{db_snap, milli_snap};
|
||||
use crate::{db_snap, milli_snap, FilterableAttributesRule};
|
||||
|
||||
#[test]
|
||||
fn insert() {
|
||||
|
@ -474,7 +462,8 @@ mod tests {
|
|||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_primary_key("id".to_owned());
|
||||
settings.set_filterable_fields(hashset! { S("id") });
|
||||
settings
|
||||
.set_filterable_fields(vec![FilterableAttributesRule::Field("id".to_string())]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
|
|
|
@ -89,6 +89,7 @@ use time::OffsetDateTime;
|
|||
use tracing::debug;
|
||||
|
||||
use self::incremental::FacetsUpdateIncremental;
|
||||
use super::settings::{InnerIndexSettings, InnerIndexSettingsDiff};
|
||||
use super::{FacetsUpdateBulk, MergeDeladdBtreesetString, MergeDeladdCboRoaringBitmaps};
|
||||
use crate::facet::FacetType;
|
||||
use crate::heed_codec::facet::{
|
||||
|
@ -147,7 +148,11 @@ impl<'i> FacetsUpdate<'i> {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn execute(self, wtxn: &mut heed::RwTxn<'_>) -> Result<()> {
|
||||
pub fn execute(
|
||||
self,
|
||||
wtxn: &mut heed::RwTxn<'_>,
|
||||
new_settings: &InnerIndexSettings,
|
||||
) -> Result<()> {
|
||||
if self.data_size == 0 {
|
||||
return Ok(());
|
||||
}
|
||||
|
@ -156,8 +161,7 @@ impl<'i> FacetsUpdate<'i> {
|
|||
|
||||
// See self::comparison_bench::benchmark_facet_indexing
|
||||
if self.data_size >= (self.database.len(wtxn)? / 500) {
|
||||
let field_ids =
|
||||
self.index.faceted_fields_ids(wtxn)?.iter().copied().collect::<Vec<_>>();
|
||||
let field_ids = facet_levels_field_ids(new_settings);
|
||||
let bulk_update = FacetsUpdateBulk::new(
|
||||
self.index,
|
||||
field_ids,
|
||||
|
@ -291,6 +295,53 @@ fn index_facet_search(
|
|||
Ok(())
|
||||
}
|
||||
|
||||
/// Clear all the levels greater than 0 for given field ids.
|
||||
pub fn clear_facet_levels<'a, I>(
|
||||
wtxn: &mut heed::RwTxn<'_>,
|
||||
db: &heed::Database<FacetGroupKeyCodec<BytesRefCodec>, DecodeIgnore>,
|
||||
field_ids: I,
|
||||
) -> Result<()>
|
||||
where
|
||||
I: IntoIterator<Item = &'a FieldId>,
|
||||
{
|
||||
for field_id in field_ids {
|
||||
let field_id = *field_id;
|
||||
let left = FacetGroupKey::<&[u8]> { field_id, level: 1, left_bound: &[] };
|
||||
let right = FacetGroupKey::<&[u8]> { field_id, level: u8::MAX, left_bound: &[] };
|
||||
let range = left..=right;
|
||||
db.delete_range(wtxn, &range).map(drop)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn clear_facet_levels_based_on_settings_diff(
|
||||
wtxn: &mut heed::RwTxn<'_>,
|
||||
index: &Index,
|
||||
settings_diff: &InnerIndexSettingsDiff,
|
||||
) -> Result<()> {
|
||||
let new_field_ids: BTreeSet<_> = facet_levels_field_ids(&settings_diff.new);
|
||||
let old_field_ids: BTreeSet<_> = facet_levels_field_ids(&settings_diff.old);
|
||||
|
||||
let field_ids_to_clear: Vec<_> = old_field_ids.difference(&new_field_ids).copied().collect();
|
||||
clear_facet_levels(wtxn, &index.facet_id_string_docids.remap_types(), &field_ids_to_clear)?;
|
||||
clear_facet_levels(wtxn, &index.facet_id_f64_docids.remap_types(), &field_ids_to_clear)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn facet_levels_field_ids<B>(settings: &InnerIndexSettings) -> B
|
||||
where
|
||||
B: FromIterator<FieldId>,
|
||||
{
|
||||
settings
|
||||
.fields_ids_map
|
||||
.iter_id_metadata()
|
||||
.filter(|(_, metadata)| {
|
||||
metadata.require_facet_level_database(&settings.filterable_attributes_rules)
|
||||
})
|
||||
.map(|(id, _)| id)
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) mod test_helpers {
|
||||
use std::cell::Cell;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue