Try to make facet indexing incremental

This commit is contained in:
Loïc Lecrenier 2022-09-01 08:17:27 +02:00 committed by Loïc Lecrenier
parent d30c89e345
commit 85824ee203
6 changed files with 55 additions and 35 deletions

View file

@ -34,7 +34,6 @@ use self::helpers::{grenad_obkv_into_chunks, GrenadParameters};
pub use self::transform::{Transform, TransformOutput};
use crate::documents::{obkv_to_object, DocumentsBatchReader};
use crate::error::UserError;
use crate::heed_codec::facet::new::{FacetKeyCodec, MyByteSlice};
pub use crate::update::index_documents::helpers::CursorClonableMmap;
use crate::update::{
self, FacetsUpdateBulk, IndexerConfig, UpdateIndexingStep, WordPrefixDocids,
@ -431,23 +430,6 @@ where
// Merged databases are already been indexed, we start from this count;
let mut databases_seen = MERGED_DATABASE_COUNT;
// Run the facets update operation.
for facet_db in [
(&self.index.facet_id_string_docids).remap_key_type::<FacetKeyCodec<MyByteSlice>>(),
(&self.index.facet_id_f64_docids).remap_key_type::<FacetKeyCodec<MyByteSlice>>(),
] {
let mut builder = FacetsUpdateBulk::new(self.index, facet_db);
builder.chunk_compression_type = self.indexer_config.chunk_compression_type;
builder.chunk_compression_level = self.indexer_config.chunk_compression_level;
if let Some(value) = self.config.facet_level_group_size {
builder.level_group_size(value);
}
if let Some(value) = self.config.facet_min_level_size {
builder.min_level_size(value);
}
builder.execute(self.wtxn)?;
}
databases_seen += 1;
(self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
databases_seen,