Make the facet levels maps to previous level groups and don't split them

This commit is contained in:
Clément Renault 2020-11-28 12:43:43 +01:00
parent 276c87af68
commit ba4ba685f9
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
4 changed files with 41 additions and 125 deletions

View File

@ -28,7 +28,7 @@ use warp::{Filter, http::Response};
use milli::tokenizer::{simple_tokenizer, TokenType}; use milli::tokenizer::{simple_tokenizer, TokenType};
use milli::update::UpdateIndexingStep::*; use milli::update::UpdateIndexingStep::*;
use milli::update::{UpdateBuilder, IndexDocumentsMethod, UpdateFormat, EasingName}; use milli::update::{UpdateBuilder, IndexDocumentsMethod, UpdateFormat};
use milli::{obkv_to_json, Index, UpdateStore, SearchResult, FacetCondition}; use milli::{obkv_to_json, Index, UpdateStore, SearchResult, FacetCondition};
static GLOBAL_THREAD_POOL: OnceCell<ThreadPool> = OnceCell::new(); static GLOBAL_THREAD_POOL: OnceCell<ThreadPool> = OnceCell::new();
@ -237,9 +237,8 @@ struct Settings {
#[serde(deny_unknown_fields)] #[serde(deny_unknown_fields)]
#[serde(rename_all = "camelCase")] #[serde(rename_all = "camelCase")]
struct Facets { struct Facets {
last_level_size: Option<NonZeroUsize>, level_group_size: Option<NonZeroUsize>,
number_of_levels: Option<NonZeroUsize>, min_level_size: Option<NonZeroUsize>,
easing_function: Option<String>,
} }
// Any value that is present is considered Some value, including null. // Any value that is present is considered Some value, including null.
@ -415,27 +414,12 @@ async fn main() -> anyhow::Result<()> {
// We must use the write transaction of the update here. // We must use the write transaction of the update here.
let mut wtxn = index_cloned.write_txn()?; let mut wtxn = index_cloned.write_txn()?;
let mut builder = update_builder.facets(&mut wtxn, &index_cloned); let mut builder = update_builder.facets(&mut wtxn, &index_cloned);
if let Some(value) = levels.last_level_size { if let Some(value) = levels.level_group_size {
builder.last_level_size(value); builder.level_group_size(value);
} }
if let Some(value) = levels.number_of_levels { if let Some(value) = levels.min_level_size {
builder.number_of_levels(value); builder.min_level_size(value);
} }
if let Some(value) = levels.easing_function {
let easing_name = if value.eq_ignore_ascii_case("expo") {
EasingName::Expo
} else if value.eq_ignore_ascii_case("quart") {
EasingName::Quart
} else if value.eq_ignore_ascii_case("circ") {
EasingName::Circ
} else if value.eq_ignore_ascii_case("linear") {
EasingName::Linear
} else {
panic!("Invalid easing function name")
};
builder.easing_function(easing_name);
}
match builder.execute() { match builder.execute() {
Ok(()) => wtxn.commit().map_err(Into::into), Ok(()) => wtxn.commit().map_err(Into::into),
Err(e) => Err(e.into()) Err(e) => Err(e.into())
@ -804,7 +788,7 @@ async fn main() -> anyhow::Result<()> {
let update_store_cloned = update_store.clone(); let update_store_cloned = update_store.clone();
let update_status_sender_cloned = update_status_sender.clone(); let update_status_sender_cloned = update_status_sender.clone();
let change_facet_levels_route = warp::filters::method::post() let change_facet_levels_route = warp::filters::method::post()
.and(warp::path!("facet-levels")) .and(warp::path!("facet-level-sizes"))
.and(warp::body::json()) .and(warp::body::json())
.map(move |levels: Facets| { .map(move |levels: Facets| {
let meta = UpdateMeta::Facets(levels); let meta = UpdateMeta::Facets(levels);

View File

@ -1,10 +1,10 @@
use std::cmp;
use std::fs::File; use std::fs::File;
use std::num::NonZeroUsize; use std::num::NonZeroUsize;
use grenad::{CompressionType, Reader, Writer, FileFuse}; use grenad::{CompressionType, Reader, Writer, FileFuse};
use heed::types::{ByteSlice, DecodeIgnore}; use heed::types::{ByteSlice, DecodeIgnore};
use heed::{BytesEncode, Error}; use heed::{BytesEncode, Error};
use itertools::Itertools;
use log::debug; use log::debug;
use num_traits::{Bounded, Zero}; use num_traits::{Bounded, Zero};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
@ -16,23 +16,14 @@ use crate::Index;
use crate::update::index_documents::WriteMethod; use crate::update::index_documents::WriteMethod;
use crate::update::index_documents::{create_writer, writer_into_reader, write_into_lmdb_database}; use crate::update::index_documents::{create_writer, writer_into_reader, write_into_lmdb_database};
#[derive(Debug, Copy, Clone)]
pub enum EasingName {
Expo,
Quart,
Circ,
Linear,
}
pub struct Facets<'t, 'u, 'i> { pub struct Facets<'t, 'u, 'i> {
wtxn: &'t mut heed::RwTxn<'i, 'u>, wtxn: &'t mut heed::RwTxn<'i, 'u>,
index: &'i Index, index: &'i Index,
pub(crate) chunk_compression_type: CompressionType, pub(crate) chunk_compression_type: CompressionType,
pub(crate) chunk_compression_level: Option<u32>, pub(crate) chunk_compression_level: Option<u32>,
pub(crate) chunk_fusing_shrink_size: Option<u64>, pub(crate) chunk_fusing_shrink_size: Option<u64>,
number_of_levels: NonZeroUsize, level_group_size: NonZeroUsize,
last_level_size: NonZeroUsize, min_level_size: NonZeroUsize,
easing_function: EasingName,
} }
impl<'t, 'u, 'i> Facets<'t, 'u, 'i> { impl<'t, 'u, 'i> Facets<'t, 'u, 'i> {
@ -43,24 +34,18 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> {
chunk_compression_type: CompressionType::None, chunk_compression_type: CompressionType::None,
chunk_compression_level: None, chunk_compression_level: None,
chunk_fusing_shrink_size: None, chunk_fusing_shrink_size: None,
number_of_levels: NonZeroUsize::new(5).unwrap(), level_group_size: NonZeroUsize::new(4).unwrap(),
last_level_size: NonZeroUsize::new(5).unwrap(), min_level_size: NonZeroUsize::new(5).unwrap(),
easing_function: EasingName::Expo,
} }
} }
pub fn number_of_levels(&mut self, value: NonZeroUsize) -> &mut Self { pub fn level_group_size(&mut self, value: NonZeroUsize) -> &mut Self {
self.number_of_levels = value; self.level_group_size = NonZeroUsize::new(cmp::max(value.get(), 2)).unwrap();
self self
} }
pub fn last_level_size(&mut self, value: NonZeroUsize) -> &mut Self { pub fn min_level_size(&mut self, value: NonZeroUsize) -> &mut Self {
self.last_level_size = value; self.min_level_size = value;
self
}
pub fn easing_function(&mut self, value: EasingName) -> &mut Self {
self.easing_function = value;
self self
} }
@ -90,9 +75,8 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> {
self.chunk_compression_type, self.chunk_compression_type,
self.chunk_compression_level, self.chunk_compression_level,
self.chunk_fusing_shrink_size, self.chunk_fusing_shrink_size,
self.last_level_size, self.level_group_size,
self.number_of_levels, self.min_level_size,
self.easing_function,
field_id, field_id,
)?; )?;
@ -117,9 +101,8 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> {
self.chunk_compression_type, self.chunk_compression_type,
self.chunk_compression_level, self.chunk_compression_level,
self.chunk_fusing_shrink_size, self.chunk_fusing_shrink_size,
self.last_level_size, self.level_group_size,
self.number_of_levels, self.min_level_size,
self.easing_function,
field_id, field_id,
)?; )?;
@ -175,9 +158,8 @@ fn compute_facet_levels<'t, T: 't, KC>(
compression_type: CompressionType, compression_type: CompressionType,
compression_level: Option<u32>, compression_level: Option<u32>,
shrink_size: Option<u64>, shrink_size: Option<u64>,
last_level_size: NonZeroUsize, level_group_size: NonZeroUsize,
number_of_levels: NonZeroUsize, min_level_size: NonZeroUsize,
easing_function: EasingName,
field_id: u8, field_id: u8,
) -> anyhow::Result<Reader<FileFuse>> ) -> anyhow::Result<Reader<FileFuse>>
where where
@ -201,15 +183,13 @@ where
left..=right left..=right
}; };
let level_sizes_iter = // Groups sizes are always a power of the original level_group_size and therefore a group
levels_iterator(first_level_size, last_level_size.get(), number_of_levels.get(), easing_function) // always maps groups of the previous level and never splits previous levels groups in half.
.map(|size| (first_level_size as f64 / size as f64).ceil() as usize) let group_size_iter = (1u8..)
.unique() .map(|l| (l, level_group_size.get().pow(l as u32)))
.enumerate() .take_while(|(_, s)| first_level_size / *s >= min_level_size.get());
.skip(1);
// TODO we must not create levels with identical group sizes. for (level, group_size) in group_size_iter {
for (level, level_entry_sizes) in level_sizes_iter {
let mut left = T::zero(); let mut left = T::zero();
let mut right = T::zero(); let mut right = T::zero();
let mut group_docids = RoaringBitmap::new(); let mut group_docids = RoaringBitmap::new();
@ -220,10 +200,10 @@ where
if i == 0 { if i == 0 {
left = value; left = value;
} else if i % level_entry_sizes == 0 { } else if i % group_size == 0 {
// we found the first bound of the next group, we must store the left // we found the first bound of the next group, we must store the left
// and right bounds associated with the docids. // and right bounds associated with the docids.
write_entry::<T, KC>(&mut writer, field_id, level as u8, left, right, &group_docids)?; write_entry::<T, KC>(&mut writer, field_id, level, left, right, &group_docids)?;
// We save the left bound for the new group and also reset the docids. // We save the left bound for the new group and also reset the docids.
group_docids = RoaringBitmap::new(); group_docids = RoaringBitmap::new();
@ -236,7 +216,7 @@ where
} }
if !group_docids.is_empty() { if !group_docids.is_empty() {
write_entry::<T, KC>(&mut writer, field_id, level as u8, left, right, &group_docids)?; write_entry::<T, KC>(&mut writer, field_id, level, left, right, &group_docids)?;
} }
} }
@ -274,51 +254,3 @@ where
writer.insert(&key, &data)?; writer.insert(&key, &data)?;
Ok(()) Ok(())
} }
fn levels_iterator(
first_level_size: usize, // biggest level
last_level_size: usize, // smallest level
number_of_levels: usize,
easing_function: EasingName,
) -> impl Iterator<Item=usize>
{
let easing_function = match easing_function {
EasingName::Expo => ease_out_expo,
EasingName::Quart => ease_out_quart,
EasingName::Circ => ease_out_circ,
EasingName::Linear => ease_out_linear,
};
let b = last_level_size as f64;
let end = first_level_size as f64;
let c = end - b;
let d = number_of_levels;
(0..=d).map(move |t| ((end + b) - easing_function(t as f64, b, c, d as f64)) as usize)
}
// Go look at the function definitions here:
// https://docs.rs/easer/0.2.1/easer/index.html
// https://easings.net/#easeOutExpo
fn ease_out_expo(t: f64, b: f64, c: f64, d: f64) -> f64 {
if t == d {
b + c
} else {
c * (-2.0_f64.powf(-10.0 * t / d) + 1.0) + b
}
}
// https://easings.net/#easeOutCirc
fn ease_out_circ(t: f64, b: f64, c: f64, d: f64) -> f64 {
let t = t / d - 1.0;
c * (1.0 - t * t).sqrt() + b
}
// https://easings.net/#easeOutQuart
fn ease_out_quart(t: f64, b: f64, c: f64, d: f64) -> f64 {
let t = t / d - 1.0;
-c * ((t * t * t * t) - 1.0) + b
}
fn ease_out_linear(t: f64, b: f64, c: f64, d: f64) -> f64 {
c * t / d + b
}

View File

@ -208,8 +208,8 @@ pub struct IndexDocuments<'t, 'u, 'i, 'a> {
pub(crate) chunk_compression_level: Option<u32>, pub(crate) chunk_compression_level: Option<u32>,
pub(crate) chunk_fusing_shrink_size: Option<u64>, pub(crate) chunk_fusing_shrink_size: Option<u64>,
pub(crate) thread_pool: Option<&'a ThreadPool>, pub(crate) thread_pool: Option<&'a ThreadPool>,
facet_number_of_levels: Option<NonZeroUsize>, facet_level_group_size: Option<NonZeroUsize>,
facet_last_level_size: Option<NonZeroUsize>, facet_min_level_size: Option<NonZeroUsize>,
update_method: IndexDocumentsMethod, update_method: IndexDocumentsMethod,
update_format: UpdateFormat, update_format: UpdateFormat,
autogenerate_docids: bool, autogenerate_docids: bool,
@ -228,8 +228,8 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
chunk_compression_level: None, chunk_compression_level: None,
chunk_fusing_shrink_size: None, chunk_fusing_shrink_size: None,
thread_pool: None, thread_pool: None,
facet_number_of_levels: None, facet_level_group_size: None,
facet_last_level_size: None, facet_min_level_size: None,
update_method: IndexDocumentsMethod::ReplaceDocuments, update_method: IndexDocumentsMethod::ReplaceDocuments,
update_format: UpdateFormat::Json, update_format: UpdateFormat::Json,
autogenerate_docids: true, autogenerate_docids: true,
@ -588,11 +588,11 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
builder.chunk_compression_type = self.chunk_compression_type; builder.chunk_compression_type = self.chunk_compression_type;
builder.chunk_compression_level = self.chunk_compression_level; builder.chunk_compression_level = self.chunk_compression_level;
builder.chunk_fusing_shrink_size = self.chunk_fusing_shrink_size; builder.chunk_fusing_shrink_size = self.chunk_fusing_shrink_size;
if let Some(value) = self.facet_number_of_levels { if let Some(value) = self.facet_level_group_size {
builder.number_of_levels(value); builder.level_group_size(value);
} }
if let Some(value) = self.facet_last_level_size { if let Some(value) = self.facet_min_level_size {
builder.last_level_size(value); builder.min_level_size(value);
} }
builder.execute()?; builder.execute()?;

View File

@ -12,7 +12,7 @@ pub use self::available_documents_ids::AvailableDocumentsIds;
pub use self::clear_documents::ClearDocuments; pub use self::clear_documents::ClearDocuments;
pub use self::delete_documents::DeleteDocuments; pub use self::delete_documents::DeleteDocuments;
pub use self::index_documents::{IndexDocuments, IndexDocumentsMethod, UpdateFormat}; pub use self::index_documents::{IndexDocuments, IndexDocumentsMethod, UpdateFormat};
pub use self::facets::{Facets, EasingName}; pub use self::facets::Facets;
pub use self::settings::Settings; pub use self::settings::Settings;
pub use self::update_builder::UpdateBuilder; pub use self::update_builder::UpdateBuilder;
pub use self::update_step::UpdateIndexingStep; pub use self::update_step::UpdateIndexingStep;