Reorganise facets database indexing code

This commit is contained in:
Loïc Lecrenier 2022-08-31 13:03:36 +02:00 committed by Loïc Lecrenier
parent fb8d23deb3
commit e8a156d682
9 changed files with 33 additions and 37 deletions

1
http-ui/src/main.rs Normal file
View File

@ -0,0 +1 @@

View File

@ -4,8 +4,6 @@ use heed::{BytesDecode, RoTxn};
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec, MyByteSlice};
pub use self::facet_distribution::{FacetDistribution, DEFAULT_VALUES_PER_FACET};
// pub use self::facet_number::{FacetNumberIter, FacetNumberRange, FacetNumberRevRange};
// pub use self::facet_string::FacetStringIter;
pub use self::filter::Filter;
mod facet_distribution;
@ -14,7 +12,6 @@ mod facet_range_search;
pub mod facet_sort_ascending;
pub mod facet_sort_descending;
mod filter;
mod incremental_update;
pub(crate) fn get_first_facet_value<'t, BoundCodec>(
txn: &'t RoTxn,

View File

@ -32,7 +32,7 @@ static LEVDIST2: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(2, true));
mod criteria;
mod distinct;
mod facet;
pub mod facet;
mod fst_utils;
mod matches;
mod query_tree;

View File

@ -8,7 +8,7 @@ use serde::{Deserialize, Serialize};
use serde_json::Value;
use time::OffsetDateTime;
use super::{ClearDocuments, Facets};
use super::{ClearDocuments, FacetsUpdateBulk};
use crate::error::{InternalError, UserError};
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec, MyByteSlice};
use crate::heed_codec::CboRoaringBitmapCodec;
@ -643,7 +643,7 @@ fn remove_docids_from_facet_id_docids<'a>(
if !modified {
return Ok(());
}
let builder = Facets::new(index, db);
let builder = FacetsUpdateBulk::new(index, db);
builder.execute(wtxn)?;
Ok(())

View File

@ -1,23 +1,20 @@
use std::cmp;
use std::fs::File;
use std::num::NonZeroUsize;
use crate::error::InternalError;
use crate::heed_codec::facet::new::{
FacetGroupValue, FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice,
};
use crate::update::index_documents::{create_writer, write_into_lmdb_database, writer_into_reader};
use crate::{FieldId, Index, Result};
use grenad::CompressionType;
use heed::types::ByteSlice;
use heed::{BytesEncode, Error, RoTxn};
use log::debug;
use roaring::RoaringBitmap;
use std::cmp;
use std::fs::File;
use std::num::NonZeroUsize;
use time::OffsetDateTime;
use crate::error::InternalError;
use crate::heed_codec::facet::new::{
FacetGroupValue, FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice,
};
// use crate::heed_codec::CboRoaringBitmapCodec;
use crate::update::index_documents::{create_writer, write_into_lmdb_database, writer_into_reader};
use crate::{FieldId, Index, Result};
pub struct Facets<'i> {
pub struct FacetsUpdateBulk<'i> {
index: &'i Index,
database: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
pub(crate) chunk_compression_type: CompressionType,
@ -26,12 +23,12 @@ pub struct Facets<'i> {
min_level_size: usize,
}
impl<'i> Facets<'i> {
impl<'i> FacetsUpdateBulk<'i> {
pub fn new(
index: &'i Index,
database: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
) -> Facets<'i> {
Facets {
) -> FacetsUpdateBulk<'i> {
FacetsUpdateBulk {
index,
database,
chunk_compression_type: CompressionType::None,
@ -63,7 +60,7 @@ impl<'i> Facets<'i> {
Ok(())
}
#[logging_timer::time("Facets::{}")]
#[logging_timer::time("FacetsUpdateBulk::{}")]
pub fn execute(self, wtxn: &mut heed::RwTxn) -> Result<()> {
self.index.set_updated_at(wtxn, &OffsetDateTime::now_utc())?;
// We get the faceted fields to be able to create the facet levels.
@ -105,7 +102,7 @@ impl<'i> Facets<'i> {
field_id: FieldId,
txn: &RoTxn,
) -> Result<(Vec<grenad::Reader<File>>, RoaringBitmap)> {
let algo = CreateFacetsAlgo {
let algo = FacetsUpdateBulkAlgorithm {
rtxn: txn,
db: &self.database,
field_id,
@ -129,7 +126,7 @@ impl<'i> Facets<'i> {
}
}
pub struct CreateFacetsAlgo<'t> {
pub struct FacetsUpdateBulkAlgorithm<'t> {
rtxn: &'t heed::RoTxn<'t>,
db: &'t heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
chunk_compression_type: CompressionType,
@ -138,7 +135,7 @@ pub struct CreateFacetsAlgo<'t> {
level_group_size: usize,
min_level_size: usize,
}
impl<'t> CreateFacetsAlgo<'t> {
impl<'t> FacetsUpdateBulkAlgorithm<'t> {
fn read_level_0(
&self,
handle_group: &mut dyn FnMut(&[RoaringBitmap], &'t [u8]) -> Result<()>,

View File

@ -1,13 +1,12 @@
use crate::heed_codec::facet::new::{
FacetGroupValue, FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice,
};
use crate::search::facet::get_highest_level;
use crate::Result;
use heed::Error;
use heed::{types::ByteSlice, BytesDecode, RoTxn, RwTxn};
use roaring::RoaringBitmap;
use super::get_highest_level;
enum InsertionResult {
InPlace,
Insert,
@ -18,14 +17,14 @@ enum DeletionResult {
Remove { prev: Option<Vec<u8>>, next: Option<Vec<u8>> },
}
struct IncrementalFacetUpdate<'i> {
db: &'i heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
struct FacetUpdateIncremental {
db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
group_size: usize,
min_level_size: usize,
max_group_size: usize,
}
impl<'i> IncrementalFacetUpdate<'i> {
fn find_insertion_key_value<'a>(
impl FacetUpdateIncremental {
fn find_insertion_key_value(
&self,
field_id: u16,
level: u8,

View File

@ -0,0 +1,2 @@
pub mod bulk;
pub mod incremental;

View File

@ -37,8 +37,8 @@ use crate::error::UserError;
use crate::heed_codec::facet::new::{FacetKeyCodec, MyByteSlice};
pub use crate::update::index_documents::helpers::CursorClonableMmap;
use crate::update::{
self, Facets, IndexerConfig, PrefixWordPairsProximityDocids, UpdateIndexingStep,
WordPrefixDocids, WordPrefixPositionDocids, WordsPrefixesFst,
self, FacetsUpdateBulk, IndexerConfig, UpdateIndexingStep, WordPrefixDocids,
PrefixWordPairsProximityDocids, WordPrefixPositionDocids, WordsPrefixesFst,
};
use crate::{Index, Result, RoaringBitmapCodec};
@ -436,7 +436,7 @@ where
(&self.index.facet_id_string_docids).remap_key_type::<FacetKeyCodec<MyByteSlice>>(),
(&self.index.facet_id_f64_docids).remap_key_type::<FacetKeyCodec<MyByteSlice>>(),
] {
let mut builder = Facets::new(self.index, facet_db);
let mut builder = FacetsUpdateBulk::new(self.index, facet_db);
builder.chunk_compression_type = self.indexer_config.chunk_compression_type;
builder.chunk_compression_level = self.indexer_config.chunk_compression_level;
if let Some(value) = self.config.facet_level_group_size {

View File

@ -1,7 +1,7 @@
pub use self::available_documents_ids::AvailableDocumentsIds;
pub use self::clear_documents::ClearDocuments;
pub use self::delete_documents::{DeleteDocuments, DocumentDeletionResult};
pub use self::facets::Facets;
pub use self::facet::bulk::FacetsUpdateBulk;
pub use self::index_documents::{
DocumentAdditionResult, DocumentId, IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod,
};
@ -16,7 +16,7 @@ pub use self::words_prefixes_fst::WordsPrefixesFst;
mod available_documents_ids;
mod clear_documents;
mod delete_documents;
mod facets;
mod facet;
mod index_documents;
mod indexer_config;
mod prefix_word_pairs;