mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-23 05:14:27 +01:00
Make the facet levels maps to previous level groups and don't split them
This commit is contained in:
parent
276c87af68
commit
ba4ba685f9
@ -28,7 +28,7 @@ use warp::{Filter, http::Response};
|
||||
|
||||
use milli::tokenizer::{simple_tokenizer, TokenType};
|
||||
use milli::update::UpdateIndexingStep::*;
|
||||
use milli::update::{UpdateBuilder, IndexDocumentsMethod, UpdateFormat, EasingName};
|
||||
use milli::update::{UpdateBuilder, IndexDocumentsMethod, UpdateFormat};
|
||||
use milli::{obkv_to_json, Index, UpdateStore, SearchResult, FacetCondition};
|
||||
|
||||
static GLOBAL_THREAD_POOL: OnceCell<ThreadPool> = OnceCell::new();
|
||||
@ -237,9 +237,8 @@ struct Settings {
|
||||
#[serde(deny_unknown_fields)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
struct Facets {
|
||||
last_level_size: Option<NonZeroUsize>,
|
||||
number_of_levels: Option<NonZeroUsize>,
|
||||
easing_function: Option<String>,
|
||||
level_group_size: Option<NonZeroUsize>,
|
||||
min_level_size: Option<NonZeroUsize>,
|
||||
}
|
||||
|
||||
// Any value that is present is considered Some value, including null.
|
||||
@ -415,27 +414,12 @@ async fn main() -> anyhow::Result<()> {
|
||||
// We must use the write transaction of the update here.
|
||||
let mut wtxn = index_cloned.write_txn()?;
|
||||
let mut builder = update_builder.facets(&mut wtxn, &index_cloned);
|
||||
if let Some(value) = levels.last_level_size {
|
||||
builder.last_level_size(value);
|
||||
if let Some(value) = levels.level_group_size {
|
||||
builder.level_group_size(value);
|
||||
}
|
||||
if let Some(value) = levels.number_of_levels {
|
||||
builder.number_of_levels(value);
|
||||
if let Some(value) = levels.min_level_size {
|
||||
builder.min_level_size(value);
|
||||
}
|
||||
if let Some(value) = levels.easing_function {
|
||||
let easing_name = if value.eq_ignore_ascii_case("expo") {
|
||||
EasingName::Expo
|
||||
} else if value.eq_ignore_ascii_case("quart") {
|
||||
EasingName::Quart
|
||||
} else if value.eq_ignore_ascii_case("circ") {
|
||||
EasingName::Circ
|
||||
} else if value.eq_ignore_ascii_case("linear") {
|
||||
EasingName::Linear
|
||||
} else {
|
||||
panic!("Invalid easing function name")
|
||||
};
|
||||
builder.easing_function(easing_name);
|
||||
}
|
||||
|
||||
match builder.execute() {
|
||||
Ok(()) => wtxn.commit().map_err(Into::into),
|
||||
Err(e) => Err(e.into())
|
||||
@ -804,7 +788,7 @@ async fn main() -> anyhow::Result<()> {
|
||||
let update_store_cloned = update_store.clone();
|
||||
let update_status_sender_cloned = update_status_sender.clone();
|
||||
let change_facet_levels_route = warp::filters::method::post()
|
||||
.and(warp::path!("facet-levels"))
|
||||
.and(warp::path!("facet-level-sizes"))
|
||||
.and(warp::body::json())
|
||||
.map(move |levels: Facets| {
|
||||
let meta = UpdateMeta::Facets(levels);
|
||||
|
@ -1,10 +1,10 @@
|
||||
use std::cmp;
|
||||
use std::fs::File;
|
||||
use std::num::NonZeroUsize;
|
||||
|
||||
use grenad::{CompressionType, Reader, Writer, FileFuse};
|
||||
use heed::types::{ByteSlice, DecodeIgnore};
|
||||
use heed::{BytesEncode, Error};
|
||||
use itertools::Itertools;
|
||||
use log::debug;
|
||||
use num_traits::{Bounded, Zero};
|
||||
use roaring::RoaringBitmap;
|
||||
@ -16,23 +16,14 @@ use crate::Index;
|
||||
use crate::update::index_documents::WriteMethod;
|
||||
use crate::update::index_documents::{create_writer, writer_into_reader, write_into_lmdb_database};
|
||||
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub enum EasingName {
|
||||
Expo,
|
||||
Quart,
|
||||
Circ,
|
||||
Linear,
|
||||
}
|
||||
|
||||
pub struct Facets<'t, 'u, 'i> {
|
||||
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
||||
index: &'i Index,
|
||||
pub(crate) chunk_compression_type: CompressionType,
|
||||
pub(crate) chunk_compression_level: Option<u32>,
|
||||
pub(crate) chunk_fusing_shrink_size: Option<u64>,
|
||||
number_of_levels: NonZeroUsize,
|
||||
last_level_size: NonZeroUsize,
|
||||
easing_function: EasingName,
|
||||
level_group_size: NonZeroUsize,
|
||||
min_level_size: NonZeroUsize,
|
||||
}
|
||||
|
||||
impl<'t, 'u, 'i> Facets<'t, 'u, 'i> {
|
||||
@ -43,24 +34,18 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> {
|
||||
chunk_compression_type: CompressionType::None,
|
||||
chunk_compression_level: None,
|
||||
chunk_fusing_shrink_size: None,
|
||||
number_of_levels: NonZeroUsize::new(5).unwrap(),
|
||||
last_level_size: NonZeroUsize::new(5).unwrap(),
|
||||
easing_function: EasingName::Expo,
|
||||
level_group_size: NonZeroUsize::new(4).unwrap(),
|
||||
min_level_size: NonZeroUsize::new(5).unwrap(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn number_of_levels(&mut self, value: NonZeroUsize) -> &mut Self {
|
||||
self.number_of_levels = value;
|
||||
pub fn level_group_size(&mut self, value: NonZeroUsize) -> &mut Self {
|
||||
self.level_group_size = NonZeroUsize::new(cmp::max(value.get(), 2)).unwrap();
|
||||
self
|
||||
}
|
||||
|
||||
pub fn last_level_size(&mut self, value: NonZeroUsize) -> &mut Self {
|
||||
self.last_level_size = value;
|
||||
self
|
||||
}
|
||||
|
||||
pub fn easing_function(&mut self, value: EasingName) -> &mut Self {
|
||||
self.easing_function = value;
|
||||
pub fn min_level_size(&mut self, value: NonZeroUsize) -> &mut Self {
|
||||
self.min_level_size = value;
|
||||
self
|
||||
}
|
||||
|
||||
@ -90,9 +75,8 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> {
|
||||
self.chunk_compression_type,
|
||||
self.chunk_compression_level,
|
||||
self.chunk_fusing_shrink_size,
|
||||
self.last_level_size,
|
||||
self.number_of_levels,
|
||||
self.easing_function,
|
||||
self.level_group_size,
|
||||
self.min_level_size,
|
||||
field_id,
|
||||
)?;
|
||||
|
||||
@ -117,9 +101,8 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> {
|
||||
self.chunk_compression_type,
|
||||
self.chunk_compression_level,
|
||||
self.chunk_fusing_shrink_size,
|
||||
self.last_level_size,
|
||||
self.number_of_levels,
|
||||
self.easing_function,
|
||||
self.level_group_size,
|
||||
self.min_level_size,
|
||||
field_id,
|
||||
)?;
|
||||
|
||||
@ -175,9 +158,8 @@ fn compute_facet_levels<'t, T: 't, KC>(
|
||||
compression_type: CompressionType,
|
||||
compression_level: Option<u32>,
|
||||
shrink_size: Option<u64>,
|
||||
last_level_size: NonZeroUsize,
|
||||
number_of_levels: NonZeroUsize,
|
||||
easing_function: EasingName,
|
||||
level_group_size: NonZeroUsize,
|
||||
min_level_size: NonZeroUsize,
|
||||
field_id: u8,
|
||||
) -> anyhow::Result<Reader<FileFuse>>
|
||||
where
|
||||
@ -201,15 +183,13 @@ where
|
||||
left..=right
|
||||
};
|
||||
|
||||
let level_sizes_iter =
|
||||
levels_iterator(first_level_size, last_level_size.get(), number_of_levels.get(), easing_function)
|
||||
.map(|size| (first_level_size as f64 / size as f64).ceil() as usize)
|
||||
.unique()
|
||||
.enumerate()
|
||||
.skip(1);
|
||||
// Groups sizes are always a power of the original level_group_size and therefore a group
|
||||
// always maps groups of the previous level and never splits previous levels groups in half.
|
||||
let group_size_iter = (1u8..)
|
||||
.map(|l| (l, level_group_size.get().pow(l as u32)))
|
||||
.take_while(|(_, s)| first_level_size / *s >= min_level_size.get());
|
||||
|
||||
// TODO we must not create levels with identical group sizes.
|
||||
for (level, level_entry_sizes) in level_sizes_iter {
|
||||
for (level, group_size) in group_size_iter {
|
||||
let mut left = T::zero();
|
||||
let mut right = T::zero();
|
||||
let mut group_docids = RoaringBitmap::new();
|
||||
@ -220,10 +200,10 @@ where
|
||||
|
||||
if i == 0 {
|
||||
left = value;
|
||||
} else if i % level_entry_sizes == 0 {
|
||||
} else if i % group_size == 0 {
|
||||
// we found the first bound of the next group, we must store the left
|
||||
// and right bounds associated with the docids.
|
||||
write_entry::<T, KC>(&mut writer, field_id, level as u8, left, right, &group_docids)?;
|
||||
write_entry::<T, KC>(&mut writer, field_id, level, left, right, &group_docids)?;
|
||||
|
||||
// We save the left bound for the new group and also reset the docids.
|
||||
group_docids = RoaringBitmap::new();
|
||||
@ -236,7 +216,7 @@ where
|
||||
}
|
||||
|
||||
if !group_docids.is_empty() {
|
||||
write_entry::<T, KC>(&mut writer, field_id, level as u8, left, right, &group_docids)?;
|
||||
write_entry::<T, KC>(&mut writer, field_id, level, left, right, &group_docids)?;
|
||||
}
|
||||
}
|
||||
|
||||
@ -274,51 +254,3 @@ where
|
||||
writer.insert(&key, &data)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn levels_iterator(
|
||||
first_level_size: usize, // biggest level
|
||||
last_level_size: usize, // smallest level
|
||||
number_of_levels: usize,
|
||||
easing_function: EasingName,
|
||||
) -> impl Iterator<Item=usize>
|
||||
{
|
||||
let easing_function = match easing_function {
|
||||
EasingName::Expo => ease_out_expo,
|
||||
EasingName::Quart => ease_out_quart,
|
||||
EasingName::Circ => ease_out_circ,
|
||||
EasingName::Linear => ease_out_linear,
|
||||
};
|
||||
|
||||
let b = last_level_size as f64;
|
||||
let end = first_level_size as f64;
|
||||
let c = end - b;
|
||||
let d = number_of_levels;
|
||||
(0..=d).map(move |t| ((end + b) - easing_function(t as f64, b, c, d as f64)) as usize)
|
||||
}
|
||||
|
||||
// Go look at the function definitions here:
|
||||
// https://docs.rs/easer/0.2.1/easer/index.html
|
||||
// https://easings.net/#easeOutExpo
|
||||
fn ease_out_expo(t: f64, b: f64, c: f64, d: f64) -> f64 {
|
||||
if t == d {
|
||||
b + c
|
||||
} else {
|
||||
c * (-2.0_f64.powf(-10.0 * t / d) + 1.0) + b
|
||||
}
|
||||
}
|
||||
|
||||
// https://easings.net/#easeOutCirc
|
||||
fn ease_out_circ(t: f64, b: f64, c: f64, d: f64) -> f64 {
|
||||
let t = t / d - 1.0;
|
||||
c * (1.0 - t * t).sqrt() + b
|
||||
}
|
||||
|
||||
// https://easings.net/#easeOutQuart
|
||||
fn ease_out_quart(t: f64, b: f64, c: f64, d: f64) -> f64 {
|
||||
let t = t / d - 1.0;
|
||||
-c * ((t * t * t * t) - 1.0) + b
|
||||
}
|
||||
|
||||
fn ease_out_linear(t: f64, b: f64, c: f64, d: f64) -> f64 {
|
||||
c * t / d + b
|
||||
}
|
||||
|
@ -208,8 +208,8 @@ pub struct IndexDocuments<'t, 'u, 'i, 'a> {
|
||||
pub(crate) chunk_compression_level: Option<u32>,
|
||||
pub(crate) chunk_fusing_shrink_size: Option<u64>,
|
||||
pub(crate) thread_pool: Option<&'a ThreadPool>,
|
||||
facet_number_of_levels: Option<NonZeroUsize>,
|
||||
facet_last_level_size: Option<NonZeroUsize>,
|
||||
facet_level_group_size: Option<NonZeroUsize>,
|
||||
facet_min_level_size: Option<NonZeroUsize>,
|
||||
update_method: IndexDocumentsMethod,
|
||||
update_format: UpdateFormat,
|
||||
autogenerate_docids: bool,
|
||||
@ -228,8 +228,8 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
||||
chunk_compression_level: None,
|
||||
chunk_fusing_shrink_size: None,
|
||||
thread_pool: None,
|
||||
facet_number_of_levels: None,
|
||||
facet_last_level_size: None,
|
||||
facet_level_group_size: None,
|
||||
facet_min_level_size: None,
|
||||
update_method: IndexDocumentsMethod::ReplaceDocuments,
|
||||
update_format: UpdateFormat::Json,
|
||||
autogenerate_docids: true,
|
||||
@ -588,11 +588,11 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
||||
builder.chunk_compression_type = self.chunk_compression_type;
|
||||
builder.chunk_compression_level = self.chunk_compression_level;
|
||||
builder.chunk_fusing_shrink_size = self.chunk_fusing_shrink_size;
|
||||
if let Some(value) = self.facet_number_of_levels {
|
||||
builder.number_of_levels(value);
|
||||
if let Some(value) = self.facet_level_group_size {
|
||||
builder.level_group_size(value);
|
||||
}
|
||||
if let Some(value) = self.facet_last_level_size {
|
||||
builder.last_level_size(value);
|
||||
if let Some(value) = self.facet_min_level_size {
|
||||
builder.min_level_size(value);
|
||||
}
|
||||
builder.execute()?;
|
||||
|
||||
|
@ -12,7 +12,7 @@ pub use self::available_documents_ids::AvailableDocumentsIds;
|
||||
pub use self::clear_documents::ClearDocuments;
|
||||
pub use self::delete_documents::DeleteDocuments;
|
||||
pub use self::index_documents::{IndexDocuments, IndexDocumentsMethod, UpdateFormat};
|
||||
pub use self::facets::{Facets, EasingName};
|
||||
pub use self::facets::Facets;
|
||||
pub use self::settings::Settings;
|
||||
pub use self::update_builder::UpdateBuilder;
|
||||
pub use self::update_step::UpdateIndexingStep;
|
||||
|
Loading…
Reference in New Issue
Block a user