mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-23 13:24:27 +01:00
Make the facet levels maps to previous level groups and don't split them
This commit is contained in:
parent
276c87af68
commit
ba4ba685f9
@ -28,7 +28,7 @@ use warp::{Filter, http::Response};
|
|||||||
|
|
||||||
use milli::tokenizer::{simple_tokenizer, TokenType};
|
use milli::tokenizer::{simple_tokenizer, TokenType};
|
||||||
use milli::update::UpdateIndexingStep::*;
|
use milli::update::UpdateIndexingStep::*;
|
||||||
use milli::update::{UpdateBuilder, IndexDocumentsMethod, UpdateFormat, EasingName};
|
use milli::update::{UpdateBuilder, IndexDocumentsMethod, UpdateFormat};
|
||||||
use milli::{obkv_to_json, Index, UpdateStore, SearchResult, FacetCondition};
|
use milli::{obkv_to_json, Index, UpdateStore, SearchResult, FacetCondition};
|
||||||
|
|
||||||
static GLOBAL_THREAD_POOL: OnceCell<ThreadPool> = OnceCell::new();
|
static GLOBAL_THREAD_POOL: OnceCell<ThreadPool> = OnceCell::new();
|
||||||
@ -237,9 +237,8 @@ struct Settings {
|
|||||||
#[serde(deny_unknown_fields)]
|
#[serde(deny_unknown_fields)]
|
||||||
#[serde(rename_all = "camelCase")]
|
#[serde(rename_all = "camelCase")]
|
||||||
struct Facets {
|
struct Facets {
|
||||||
last_level_size: Option<NonZeroUsize>,
|
level_group_size: Option<NonZeroUsize>,
|
||||||
number_of_levels: Option<NonZeroUsize>,
|
min_level_size: Option<NonZeroUsize>,
|
||||||
easing_function: Option<String>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Any value that is present is considered Some value, including null.
|
// Any value that is present is considered Some value, including null.
|
||||||
@ -415,27 +414,12 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
// We must use the write transaction of the update here.
|
// We must use the write transaction of the update here.
|
||||||
let mut wtxn = index_cloned.write_txn()?;
|
let mut wtxn = index_cloned.write_txn()?;
|
||||||
let mut builder = update_builder.facets(&mut wtxn, &index_cloned);
|
let mut builder = update_builder.facets(&mut wtxn, &index_cloned);
|
||||||
if let Some(value) = levels.last_level_size {
|
if let Some(value) = levels.level_group_size {
|
||||||
builder.last_level_size(value);
|
builder.level_group_size(value);
|
||||||
}
|
}
|
||||||
if let Some(value) = levels.number_of_levels {
|
if let Some(value) = levels.min_level_size {
|
||||||
builder.number_of_levels(value);
|
builder.min_level_size(value);
|
||||||
}
|
}
|
||||||
if let Some(value) = levels.easing_function {
|
|
||||||
let easing_name = if value.eq_ignore_ascii_case("expo") {
|
|
||||||
EasingName::Expo
|
|
||||||
} else if value.eq_ignore_ascii_case("quart") {
|
|
||||||
EasingName::Quart
|
|
||||||
} else if value.eq_ignore_ascii_case("circ") {
|
|
||||||
EasingName::Circ
|
|
||||||
} else if value.eq_ignore_ascii_case("linear") {
|
|
||||||
EasingName::Linear
|
|
||||||
} else {
|
|
||||||
panic!("Invalid easing function name")
|
|
||||||
};
|
|
||||||
builder.easing_function(easing_name);
|
|
||||||
}
|
|
||||||
|
|
||||||
match builder.execute() {
|
match builder.execute() {
|
||||||
Ok(()) => wtxn.commit().map_err(Into::into),
|
Ok(()) => wtxn.commit().map_err(Into::into),
|
||||||
Err(e) => Err(e.into())
|
Err(e) => Err(e.into())
|
||||||
@ -804,7 +788,7 @@ async fn main() -> anyhow::Result<()> {
|
|||||||
let update_store_cloned = update_store.clone();
|
let update_store_cloned = update_store.clone();
|
||||||
let update_status_sender_cloned = update_status_sender.clone();
|
let update_status_sender_cloned = update_status_sender.clone();
|
||||||
let change_facet_levels_route = warp::filters::method::post()
|
let change_facet_levels_route = warp::filters::method::post()
|
||||||
.and(warp::path!("facet-levels"))
|
.and(warp::path!("facet-level-sizes"))
|
||||||
.and(warp::body::json())
|
.and(warp::body::json())
|
||||||
.map(move |levels: Facets| {
|
.map(move |levels: Facets| {
|
||||||
let meta = UpdateMeta::Facets(levels);
|
let meta = UpdateMeta::Facets(levels);
|
||||||
|
@ -1,10 +1,10 @@
|
|||||||
|
use std::cmp;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::num::NonZeroUsize;
|
use std::num::NonZeroUsize;
|
||||||
|
|
||||||
use grenad::{CompressionType, Reader, Writer, FileFuse};
|
use grenad::{CompressionType, Reader, Writer, FileFuse};
|
||||||
use heed::types::{ByteSlice, DecodeIgnore};
|
use heed::types::{ByteSlice, DecodeIgnore};
|
||||||
use heed::{BytesEncode, Error};
|
use heed::{BytesEncode, Error};
|
||||||
use itertools::Itertools;
|
|
||||||
use log::debug;
|
use log::debug;
|
||||||
use num_traits::{Bounded, Zero};
|
use num_traits::{Bounded, Zero};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
@ -16,23 +16,14 @@ use crate::Index;
|
|||||||
use crate::update::index_documents::WriteMethod;
|
use crate::update::index_documents::WriteMethod;
|
||||||
use crate::update::index_documents::{create_writer, writer_into_reader, write_into_lmdb_database};
|
use crate::update::index_documents::{create_writer, writer_into_reader, write_into_lmdb_database};
|
||||||
|
|
||||||
#[derive(Debug, Copy, Clone)]
|
|
||||||
pub enum EasingName {
|
|
||||||
Expo,
|
|
||||||
Quart,
|
|
||||||
Circ,
|
|
||||||
Linear,
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct Facets<'t, 'u, 'i> {
|
pub struct Facets<'t, 'u, 'i> {
|
||||||
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
||||||
index: &'i Index,
|
index: &'i Index,
|
||||||
pub(crate) chunk_compression_type: CompressionType,
|
pub(crate) chunk_compression_type: CompressionType,
|
||||||
pub(crate) chunk_compression_level: Option<u32>,
|
pub(crate) chunk_compression_level: Option<u32>,
|
||||||
pub(crate) chunk_fusing_shrink_size: Option<u64>,
|
pub(crate) chunk_fusing_shrink_size: Option<u64>,
|
||||||
number_of_levels: NonZeroUsize,
|
level_group_size: NonZeroUsize,
|
||||||
last_level_size: NonZeroUsize,
|
min_level_size: NonZeroUsize,
|
||||||
easing_function: EasingName,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'t, 'u, 'i> Facets<'t, 'u, 'i> {
|
impl<'t, 'u, 'i> Facets<'t, 'u, 'i> {
|
||||||
@ -43,24 +34,18 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> {
|
|||||||
chunk_compression_type: CompressionType::None,
|
chunk_compression_type: CompressionType::None,
|
||||||
chunk_compression_level: None,
|
chunk_compression_level: None,
|
||||||
chunk_fusing_shrink_size: None,
|
chunk_fusing_shrink_size: None,
|
||||||
number_of_levels: NonZeroUsize::new(5).unwrap(),
|
level_group_size: NonZeroUsize::new(4).unwrap(),
|
||||||
last_level_size: NonZeroUsize::new(5).unwrap(),
|
min_level_size: NonZeroUsize::new(5).unwrap(),
|
||||||
easing_function: EasingName::Expo,
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn number_of_levels(&mut self, value: NonZeroUsize) -> &mut Self {
|
pub fn level_group_size(&mut self, value: NonZeroUsize) -> &mut Self {
|
||||||
self.number_of_levels = value;
|
self.level_group_size = NonZeroUsize::new(cmp::max(value.get(), 2)).unwrap();
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn last_level_size(&mut self, value: NonZeroUsize) -> &mut Self {
|
pub fn min_level_size(&mut self, value: NonZeroUsize) -> &mut Self {
|
||||||
self.last_level_size = value;
|
self.min_level_size = value;
|
||||||
self
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn easing_function(&mut self, value: EasingName) -> &mut Self {
|
|
||||||
self.easing_function = value;
|
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -90,9 +75,8 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> {
|
|||||||
self.chunk_compression_type,
|
self.chunk_compression_type,
|
||||||
self.chunk_compression_level,
|
self.chunk_compression_level,
|
||||||
self.chunk_fusing_shrink_size,
|
self.chunk_fusing_shrink_size,
|
||||||
self.last_level_size,
|
self.level_group_size,
|
||||||
self.number_of_levels,
|
self.min_level_size,
|
||||||
self.easing_function,
|
|
||||||
field_id,
|
field_id,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
@ -117,9 +101,8 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> {
|
|||||||
self.chunk_compression_type,
|
self.chunk_compression_type,
|
||||||
self.chunk_compression_level,
|
self.chunk_compression_level,
|
||||||
self.chunk_fusing_shrink_size,
|
self.chunk_fusing_shrink_size,
|
||||||
self.last_level_size,
|
self.level_group_size,
|
||||||
self.number_of_levels,
|
self.min_level_size,
|
||||||
self.easing_function,
|
|
||||||
field_id,
|
field_id,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
@ -175,9 +158,8 @@ fn compute_facet_levels<'t, T: 't, KC>(
|
|||||||
compression_type: CompressionType,
|
compression_type: CompressionType,
|
||||||
compression_level: Option<u32>,
|
compression_level: Option<u32>,
|
||||||
shrink_size: Option<u64>,
|
shrink_size: Option<u64>,
|
||||||
last_level_size: NonZeroUsize,
|
level_group_size: NonZeroUsize,
|
||||||
number_of_levels: NonZeroUsize,
|
min_level_size: NonZeroUsize,
|
||||||
easing_function: EasingName,
|
|
||||||
field_id: u8,
|
field_id: u8,
|
||||||
) -> anyhow::Result<Reader<FileFuse>>
|
) -> anyhow::Result<Reader<FileFuse>>
|
||||||
where
|
where
|
||||||
@ -201,15 +183,13 @@ where
|
|||||||
left..=right
|
left..=right
|
||||||
};
|
};
|
||||||
|
|
||||||
let level_sizes_iter =
|
// Groups sizes are always a power of the original level_group_size and therefore a group
|
||||||
levels_iterator(first_level_size, last_level_size.get(), number_of_levels.get(), easing_function)
|
// always maps groups of the previous level and never splits previous levels groups in half.
|
||||||
.map(|size| (first_level_size as f64 / size as f64).ceil() as usize)
|
let group_size_iter = (1u8..)
|
||||||
.unique()
|
.map(|l| (l, level_group_size.get().pow(l as u32)))
|
||||||
.enumerate()
|
.take_while(|(_, s)| first_level_size / *s >= min_level_size.get());
|
||||||
.skip(1);
|
|
||||||
|
|
||||||
// TODO we must not create levels with identical group sizes.
|
for (level, group_size) in group_size_iter {
|
||||||
for (level, level_entry_sizes) in level_sizes_iter {
|
|
||||||
let mut left = T::zero();
|
let mut left = T::zero();
|
||||||
let mut right = T::zero();
|
let mut right = T::zero();
|
||||||
let mut group_docids = RoaringBitmap::new();
|
let mut group_docids = RoaringBitmap::new();
|
||||||
@ -220,10 +200,10 @@ where
|
|||||||
|
|
||||||
if i == 0 {
|
if i == 0 {
|
||||||
left = value;
|
left = value;
|
||||||
} else if i % level_entry_sizes == 0 {
|
} else if i % group_size == 0 {
|
||||||
// we found the first bound of the next group, we must store the left
|
// we found the first bound of the next group, we must store the left
|
||||||
// and right bounds associated with the docids.
|
// and right bounds associated with the docids.
|
||||||
write_entry::<T, KC>(&mut writer, field_id, level as u8, left, right, &group_docids)?;
|
write_entry::<T, KC>(&mut writer, field_id, level, left, right, &group_docids)?;
|
||||||
|
|
||||||
// We save the left bound for the new group and also reset the docids.
|
// We save the left bound for the new group and also reset the docids.
|
||||||
group_docids = RoaringBitmap::new();
|
group_docids = RoaringBitmap::new();
|
||||||
@ -236,7 +216,7 @@ where
|
|||||||
}
|
}
|
||||||
|
|
||||||
if !group_docids.is_empty() {
|
if !group_docids.is_empty() {
|
||||||
write_entry::<T, KC>(&mut writer, field_id, level as u8, left, right, &group_docids)?;
|
write_entry::<T, KC>(&mut writer, field_id, level, left, right, &group_docids)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -274,51 +254,3 @@ where
|
|||||||
writer.insert(&key, &data)?;
|
writer.insert(&key, &data)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn levels_iterator(
|
|
||||||
first_level_size: usize, // biggest level
|
|
||||||
last_level_size: usize, // smallest level
|
|
||||||
number_of_levels: usize,
|
|
||||||
easing_function: EasingName,
|
|
||||||
) -> impl Iterator<Item=usize>
|
|
||||||
{
|
|
||||||
let easing_function = match easing_function {
|
|
||||||
EasingName::Expo => ease_out_expo,
|
|
||||||
EasingName::Quart => ease_out_quart,
|
|
||||||
EasingName::Circ => ease_out_circ,
|
|
||||||
EasingName::Linear => ease_out_linear,
|
|
||||||
};
|
|
||||||
|
|
||||||
let b = last_level_size as f64;
|
|
||||||
let end = first_level_size as f64;
|
|
||||||
let c = end - b;
|
|
||||||
let d = number_of_levels;
|
|
||||||
(0..=d).map(move |t| ((end + b) - easing_function(t as f64, b, c, d as f64)) as usize)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Go look at the function definitions here:
|
|
||||||
// https://docs.rs/easer/0.2.1/easer/index.html
|
|
||||||
// https://easings.net/#easeOutExpo
|
|
||||||
fn ease_out_expo(t: f64, b: f64, c: f64, d: f64) -> f64 {
|
|
||||||
if t == d {
|
|
||||||
b + c
|
|
||||||
} else {
|
|
||||||
c * (-2.0_f64.powf(-10.0 * t / d) + 1.0) + b
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// https://easings.net/#easeOutCirc
|
|
||||||
fn ease_out_circ(t: f64, b: f64, c: f64, d: f64) -> f64 {
|
|
||||||
let t = t / d - 1.0;
|
|
||||||
c * (1.0 - t * t).sqrt() + b
|
|
||||||
}
|
|
||||||
|
|
||||||
// https://easings.net/#easeOutQuart
|
|
||||||
fn ease_out_quart(t: f64, b: f64, c: f64, d: f64) -> f64 {
|
|
||||||
let t = t / d - 1.0;
|
|
||||||
-c * ((t * t * t * t) - 1.0) + b
|
|
||||||
}
|
|
||||||
|
|
||||||
fn ease_out_linear(t: f64, b: f64, c: f64, d: f64) -> f64 {
|
|
||||||
c * t / d + b
|
|
||||||
}
|
|
||||||
|
@ -208,8 +208,8 @@ pub struct IndexDocuments<'t, 'u, 'i, 'a> {
|
|||||||
pub(crate) chunk_compression_level: Option<u32>,
|
pub(crate) chunk_compression_level: Option<u32>,
|
||||||
pub(crate) chunk_fusing_shrink_size: Option<u64>,
|
pub(crate) chunk_fusing_shrink_size: Option<u64>,
|
||||||
pub(crate) thread_pool: Option<&'a ThreadPool>,
|
pub(crate) thread_pool: Option<&'a ThreadPool>,
|
||||||
facet_number_of_levels: Option<NonZeroUsize>,
|
facet_level_group_size: Option<NonZeroUsize>,
|
||||||
facet_last_level_size: Option<NonZeroUsize>,
|
facet_min_level_size: Option<NonZeroUsize>,
|
||||||
update_method: IndexDocumentsMethod,
|
update_method: IndexDocumentsMethod,
|
||||||
update_format: UpdateFormat,
|
update_format: UpdateFormat,
|
||||||
autogenerate_docids: bool,
|
autogenerate_docids: bool,
|
||||||
@ -228,8 +228,8 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
|||||||
chunk_compression_level: None,
|
chunk_compression_level: None,
|
||||||
chunk_fusing_shrink_size: None,
|
chunk_fusing_shrink_size: None,
|
||||||
thread_pool: None,
|
thread_pool: None,
|
||||||
facet_number_of_levels: None,
|
facet_level_group_size: None,
|
||||||
facet_last_level_size: None,
|
facet_min_level_size: None,
|
||||||
update_method: IndexDocumentsMethod::ReplaceDocuments,
|
update_method: IndexDocumentsMethod::ReplaceDocuments,
|
||||||
update_format: UpdateFormat::Json,
|
update_format: UpdateFormat::Json,
|
||||||
autogenerate_docids: true,
|
autogenerate_docids: true,
|
||||||
@ -588,11 +588,11 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> {
|
|||||||
builder.chunk_compression_type = self.chunk_compression_type;
|
builder.chunk_compression_type = self.chunk_compression_type;
|
||||||
builder.chunk_compression_level = self.chunk_compression_level;
|
builder.chunk_compression_level = self.chunk_compression_level;
|
||||||
builder.chunk_fusing_shrink_size = self.chunk_fusing_shrink_size;
|
builder.chunk_fusing_shrink_size = self.chunk_fusing_shrink_size;
|
||||||
if let Some(value) = self.facet_number_of_levels {
|
if let Some(value) = self.facet_level_group_size {
|
||||||
builder.number_of_levels(value);
|
builder.level_group_size(value);
|
||||||
}
|
}
|
||||||
if let Some(value) = self.facet_last_level_size {
|
if let Some(value) = self.facet_min_level_size {
|
||||||
builder.last_level_size(value);
|
builder.min_level_size(value);
|
||||||
}
|
}
|
||||||
builder.execute()?;
|
builder.execute()?;
|
||||||
|
|
||||||
|
@ -12,7 +12,7 @@ pub use self::available_documents_ids::AvailableDocumentsIds;
|
|||||||
pub use self::clear_documents::ClearDocuments;
|
pub use self::clear_documents::ClearDocuments;
|
||||||
pub use self::delete_documents::DeleteDocuments;
|
pub use self::delete_documents::DeleteDocuments;
|
||||||
pub use self::index_documents::{IndexDocuments, IndexDocumentsMethod, UpdateFormat};
|
pub use self::index_documents::{IndexDocuments, IndexDocumentsMethod, UpdateFormat};
|
||||||
pub use self::facets::{Facets, EasingName};
|
pub use self::facets::Facets;
|
||||||
pub use self::settings::Settings;
|
pub use self::settings::Settings;
|
||||||
pub use self::update_builder::UpdateBuilder;
|
pub use self::update_builder::UpdateBuilder;
|
||||||
pub use self::update_step::UpdateIndexingStep;
|
pub use self::update_step::UpdateIndexingStep;
|
||||||
|
Loading…
Reference in New Issue
Block a user