Reorganise facets database indexing code

This commit is contained in:
Loïc Lecrenier 2022-08-31 13:03:36 +02:00 committed by Loïc Lecrenier
parent fb8d23deb3
commit e8a156d682
9 changed files with 33 additions and 37 deletions

1
http-ui/src/main.rs Normal file
View File

@ -0,0 +1 @@

View File

@ -4,8 +4,6 @@ use heed::{BytesDecode, RoTxn};
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec, MyByteSlice}; use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec, MyByteSlice};
pub use self::facet_distribution::{FacetDistribution, DEFAULT_VALUES_PER_FACET}; pub use self::facet_distribution::{FacetDistribution, DEFAULT_VALUES_PER_FACET};
// pub use self::facet_number::{FacetNumberIter, FacetNumberRange, FacetNumberRevRange};
// pub use self::facet_string::FacetStringIter;
pub use self::filter::Filter; pub use self::filter::Filter;
mod facet_distribution; mod facet_distribution;
@ -14,7 +12,6 @@ mod facet_range_search;
pub mod facet_sort_ascending; pub mod facet_sort_ascending;
pub mod facet_sort_descending; pub mod facet_sort_descending;
mod filter; mod filter;
mod incremental_update;
pub(crate) fn get_first_facet_value<'t, BoundCodec>( pub(crate) fn get_first_facet_value<'t, BoundCodec>(
txn: &'t RoTxn, txn: &'t RoTxn,

View File

@ -32,7 +32,7 @@ static LEVDIST2: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(2, true));
mod criteria; mod criteria;
mod distinct; mod distinct;
mod facet; pub mod facet;
mod fst_utils; mod fst_utils;
mod matches; mod matches;
mod query_tree; mod query_tree;

View File

@ -8,7 +8,7 @@ use serde::{Deserialize, Serialize};
use serde_json::Value; use serde_json::Value;
use time::OffsetDateTime; use time::OffsetDateTime;
use super::{ClearDocuments, Facets}; use super::{ClearDocuments, FacetsUpdateBulk};
use crate::error::{InternalError, UserError}; use crate::error::{InternalError, UserError};
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec, MyByteSlice}; use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec, MyByteSlice};
use crate::heed_codec::CboRoaringBitmapCodec; use crate::heed_codec::CboRoaringBitmapCodec;
@ -643,7 +643,7 @@ fn remove_docids_from_facet_id_docids<'a>(
if !modified { if !modified {
return Ok(()); return Ok(());
} }
let builder = Facets::new(index, db); let builder = FacetsUpdateBulk::new(index, db);
builder.execute(wtxn)?; builder.execute(wtxn)?;
Ok(()) Ok(())

View File

@ -1,23 +1,20 @@
use std::cmp; use crate::error::InternalError;
use std::fs::File; use crate::heed_codec::facet::new::{
use std::num::NonZeroUsize; FacetGroupValue, FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice,
};
use crate::update::index_documents::{create_writer, write_into_lmdb_database, writer_into_reader};
use crate::{FieldId, Index, Result};
use grenad::CompressionType; use grenad::CompressionType;
use heed::types::ByteSlice; use heed::types::ByteSlice;
use heed::{BytesEncode, Error, RoTxn}; use heed::{BytesEncode, Error, RoTxn};
use log::debug; use log::debug;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use std::cmp;
use std::fs::File;
use std::num::NonZeroUsize;
use time::OffsetDateTime; use time::OffsetDateTime;
use crate::error::InternalError; pub struct FacetsUpdateBulk<'i> {
use crate::heed_codec::facet::new::{
FacetGroupValue, FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice,
};
// use crate::heed_codec::CboRoaringBitmapCodec;
use crate::update::index_documents::{create_writer, write_into_lmdb_database, writer_into_reader};
use crate::{FieldId, Index, Result};
pub struct Facets<'i> {
index: &'i Index, index: &'i Index,
database: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>, database: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
pub(crate) chunk_compression_type: CompressionType, pub(crate) chunk_compression_type: CompressionType,
@ -26,12 +23,12 @@ pub struct Facets<'i> {
min_level_size: usize, min_level_size: usize,
} }
impl<'i> Facets<'i> { impl<'i> FacetsUpdateBulk<'i> {
pub fn new( pub fn new(
index: &'i Index, index: &'i Index,
database: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>, database: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
) -> Facets<'i> { ) -> FacetsUpdateBulk<'i> {
Facets { FacetsUpdateBulk {
index, index,
database, database,
chunk_compression_type: CompressionType::None, chunk_compression_type: CompressionType::None,
@ -63,7 +60,7 @@ impl<'i> Facets<'i> {
Ok(()) Ok(())
} }
#[logging_timer::time("Facets::{}")] #[logging_timer::time("FacetsUpdateBulk::{}")]
pub fn execute(self, wtxn: &mut heed::RwTxn) -> Result<()> { pub fn execute(self, wtxn: &mut heed::RwTxn) -> Result<()> {
self.index.set_updated_at(wtxn, &OffsetDateTime::now_utc())?; self.index.set_updated_at(wtxn, &OffsetDateTime::now_utc())?;
// We get the faceted fields to be able to create the facet levels. // We get the faceted fields to be able to create the facet levels.
@ -105,7 +102,7 @@ impl<'i> Facets<'i> {
field_id: FieldId, field_id: FieldId,
txn: &RoTxn, txn: &RoTxn,
) -> Result<(Vec<grenad::Reader<File>>, RoaringBitmap)> { ) -> Result<(Vec<grenad::Reader<File>>, RoaringBitmap)> {
let algo = CreateFacetsAlgo { let algo = FacetsUpdateBulkAlgorithm {
rtxn: txn, rtxn: txn,
db: &self.database, db: &self.database,
field_id, field_id,
@ -129,7 +126,7 @@ impl<'i> Facets<'i> {
} }
} }
pub struct CreateFacetsAlgo<'t> { pub struct FacetsUpdateBulkAlgorithm<'t> {
rtxn: &'t heed::RoTxn<'t>, rtxn: &'t heed::RoTxn<'t>,
db: &'t heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>, db: &'t heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
chunk_compression_type: CompressionType, chunk_compression_type: CompressionType,
@ -138,7 +135,7 @@ pub struct CreateFacetsAlgo<'t> {
level_group_size: usize, level_group_size: usize,
min_level_size: usize, min_level_size: usize,
} }
impl<'t> CreateFacetsAlgo<'t> { impl<'t> FacetsUpdateBulkAlgorithm<'t> {
fn read_level_0( fn read_level_0(
&self, &self,
handle_group: &mut dyn FnMut(&[RoaringBitmap], &'t [u8]) -> Result<()>, handle_group: &mut dyn FnMut(&[RoaringBitmap], &'t [u8]) -> Result<()>,

View File

@ -1,13 +1,12 @@
use crate::heed_codec::facet::new::{ use crate::heed_codec::facet::new::{
FacetGroupValue, FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice, FacetGroupValue, FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice,
}; };
use crate::search::facet::get_highest_level;
use crate::Result; use crate::Result;
use heed::Error; use heed::Error;
use heed::{types::ByteSlice, BytesDecode, RoTxn, RwTxn}; use heed::{types::ByteSlice, BytesDecode, RoTxn, RwTxn};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use super::get_highest_level;
enum InsertionResult { enum InsertionResult {
InPlace, InPlace,
Insert, Insert,
@ -18,14 +17,14 @@ enum DeletionResult {
Remove { prev: Option<Vec<u8>>, next: Option<Vec<u8>> }, Remove { prev: Option<Vec<u8>>, next: Option<Vec<u8>> },
} }
struct IncrementalFacetUpdate<'i> { struct FacetUpdateIncremental {
db: &'i heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>, db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
group_size: usize, group_size: usize,
min_level_size: usize, min_level_size: usize,
max_group_size: usize, max_group_size: usize,
} }
impl<'i> IncrementalFacetUpdate<'i> { impl FacetUpdateIncremental {
fn find_insertion_key_value<'a>( fn find_insertion_key_value(
&self, &self,
field_id: u16, field_id: u16,
level: u8, level: u8,

View File

@ -0,0 +1,2 @@
pub mod bulk;
pub mod incremental;

View File

@ -37,8 +37,8 @@ use crate::error::UserError;
use crate::heed_codec::facet::new::{FacetKeyCodec, MyByteSlice}; use crate::heed_codec::facet::new::{FacetKeyCodec, MyByteSlice};
pub use crate::update::index_documents::helpers::CursorClonableMmap; pub use crate::update::index_documents::helpers::CursorClonableMmap;
use crate::update::{ use crate::update::{
self, Facets, IndexerConfig, PrefixWordPairsProximityDocids, UpdateIndexingStep, self, FacetsUpdateBulk, IndexerConfig, UpdateIndexingStep, WordPrefixDocids,
WordPrefixDocids, WordPrefixPositionDocids, WordsPrefixesFst, PrefixWordPairsProximityDocids, WordPrefixPositionDocids, WordsPrefixesFst,
}; };
use crate::{Index, Result, RoaringBitmapCodec}; use crate::{Index, Result, RoaringBitmapCodec};
@ -436,7 +436,7 @@ where
(&self.index.facet_id_string_docids).remap_key_type::<FacetKeyCodec<MyByteSlice>>(), (&self.index.facet_id_string_docids).remap_key_type::<FacetKeyCodec<MyByteSlice>>(),
(&self.index.facet_id_f64_docids).remap_key_type::<FacetKeyCodec<MyByteSlice>>(), (&self.index.facet_id_f64_docids).remap_key_type::<FacetKeyCodec<MyByteSlice>>(),
] { ] {
let mut builder = Facets::new(self.index, facet_db); let mut builder = FacetsUpdateBulk::new(self.index, facet_db);
builder.chunk_compression_type = self.indexer_config.chunk_compression_type; builder.chunk_compression_type = self.indexer_config.chunk_compression_type;
builder.chunk_compression_level = self.indexer_config.chunk_compression_level; builder.chunk_compression_level = self.indexer_config.chunk_compression_level;
if let Some(value) = self.config.facet_level_group_size { if let Some(value) = self.config.facet_level_group_size {

View File

@ -1,7 +1,7 @@
pub use self::available_documents_ids::AvailableDocumentsIds; pub use self::available_documents_ids::AvailableDocumentsIds;
pub use self::clear_documents::ClearDocuments; pub use self::clear_documents::ClearDocuments;
pub use self::delete_documents::{DeleteDocuments, DocumentDeletionResult}; pub use self::delete_documents::{DeleteDocuments, DocumentDeletionResult};
pub use self::facets::Facets; pub use self::facet::bulk::FacetsUpdateBulk;
pub use self::index_documents::{ pub use self::index_documents::{
DocumentAdditionResult, DocumentId, IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod, DocumentAdditionResult, DocumentId, IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod,
}; };
@ -16,7 +16,7 @@ pub use self::words_prefixes_fst::WordsPrefixesFst;
mod available_documents_ids; mod available_documents_ids;
mod clear_documents; mod clear_documents;
mod delete_documents; mod delete_documents;
mod facets; mod facet;
mod index_documents; mod index_documents;
mod indexer_config; mod indexer_config;
mod prefix_word_pairs; mod prefix_word_pairs;