Add the max_values_by_facet setting to the database

This commit is contained in:
Kerollmops 2022-06-08 17:28:23 +02:00
parent 52a494bd3b
commit 69931e50d2
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
6 changed files with 52 additions and 20 deletions

View File

@ -56,6 +56,8 @@ pub mod main_key {
pub const TWO_TYPOS_WORD_LEN: &str = "two-typos-word-len";
pub const EXACT_WORDS: &str = "exact-words";
pub const EXACT_ATTRIBUTES: &str = "exact-attributes";
pub const MAX_VALUES_PER_FACET: &str = "max-values-per-facet";
pub const PAGINATION_LIMITED_TO: &str = "pagination-limited-to";
}
pub mod db_name {
@ -1087,6 +1089,18 @@ impl Index {
self.main.delete::<_, Str>(txn, main_key::EXACT_ATTRIBUTES)?;
Ok(())
}
pub fn max_values_per_facet(&self, txn: &RoTxn) -> heed::Result<Option<usize>> {
self.main.get::<_, Str, OwnedType<usize>>(txn, main_key::MAX_VALUES_PER_FACET)
}
pub(crate) fn put_max_values_per_facet(&self, txn: &mut RwTxn, val: usize) -> heed::Result<()> {
self.main.put::<_, Str, OwnedType<usize>>(txn, main_key::MAX_VALUES_PER_FACET, &val)
}
pub(crate) fn delete_max_values_per_facet(&self, txn: &mut RwTxn) -> heed::Result<bool> {
self.main.delete::<_, Str>(txn, main_key::MAX_VALUES_PER_FACET)
}
}
#[cfg(test)]

View File

@ -38,7 +38,7 @@ pub use self::heed_codec::{
pub use self::index::Index;
pub use self::search::{
FacetDistribution, Filter, FormatOptions, MatchBounds, MatcherBuilder, MatchingWord,
MatchingWords, Search, SearchResult,
MatchingWords, Search, SearchResult, DEFAULT_VALUES_PER_FACET,
};
pub type Result<T> = std::result::Result<T, error::Error>;

View File

@ -15,7 +15,7 @@ use crate::{FieldId, Index, Result};
/// The default number of values by facets that will
/// be fetched from the key-value store.
const DEFAULT_VALUES_BY_FACET: usize = 100;
pub const DEFAULT_VALUES_PER_FACET: usize = 100;
/// Threshold on the number of candidates that will make
/// the system to choose between one algorithm or another.
@ -24,7 +24,7 @@ const CANDIDATES_THRESHOLD: u64 = 3000;
pub struct FacetDistribution<'a> {
facets: Option<HashSet<String>>,
candidates: Option<RoaringBitmap>,
max_values_by_facet: usize,
max_values_per_facet: usize,
rtxn: &'a heed::RoTxn<'a>,
index: &'a Index,
}
@ -34,7 +34,7 @@ impl<'a> FacetDistribution<'a> {
FacetDistribution {
facets: None,
candidates: None,
max_values_by_facet: DEFAULT_VALUES_BY_FACET,
max_values_per_facet: DEFAULT_VALUES_PER_FACET,
rtxn,
index,
}
@ -45,8 +45,8 @@ impl<'a> FacetDistribution<'a> {
self
}
pub fn max_values_by_facet(&mut self, max: usize) -> &mut Self {
self.max_values_by_facet = max;
pub fn max_values_per_facet(&mut self, max: usize) -> &mut Self {
self.max_values_per_facet = max;
self
}
@ -82,7 +82,8 @@ impl<'a> FacetDistribution<'a> {
let ((_, _, value), ()) = result?;
*distribution.entry(value.to_string()).or_insert(0) += 1;
if distribution.len() - distribution_prelength == self.max_values_by_facet {
if distribution.len() - distribution_prelength == self.max_values_per_facet
{
break;
}
}
@ -108,7 +109,7 @@ impl<'a> FacetDistribution<'a> {
.or_insert_with(|| (original_value, 0));
*count += 1;
if normalized_distribution.len() == self.max_values_by_facet {
if normalized_distribution.len() == self.max_values_per_facet {
break;
}
}
@ -141,7 +142,7 @@ impl<'a> FacetDistribution<'a> {
if !docids.is_empty() {
distribution.insert(value.to_string(), docids.len());
}
if distribution.len() == self.max_values_by_facet {
if distribution.len() == self.max_values_per_facet {
break;
}
}
@ -164,7 +165,7 @@ impl<'a> FacetDistribution<'a> {
if !docids.is_empty() {
distribution.insert(original.to_string(), docids.len());
}
if distribution.len() == self.max_values_by_facet {
if distribution.len() == self.max_values_per_facet {
break;
}
}
@ -186,7 +187,7 @@ impl<'a> FacetDistribution<'a> {
for result in range {
let ((_, _, value, _), docids) = result?;
distribution.insert(value.to_string(), docids.len());
if distribution.len() == self.max_values_by_facet {
if distribution.len() == self.max_values_per_facet {
break;
}
}
@ -202,7 +203,7 @@ impl<'a> FacetDistribution<'a> {
for result in iter {
let ((_, normalized_value), (original_value, docids)) = result?;
normalized_distribution.insert(normalized_value, (original_value, docids.len()));
if normalized_distribution.len() == self.max_values_by_facet {
if normalized_distribution.len() == self.max_values_per_facet {
break;
}
}
@ -290,12 +291,13 @@ impl<'a> FacetDistribution<'a> {
impl fmt::Debug for FacetDistribution<'_> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
let FacetDistribution { facets, candidates, max_values_by_facet, rtxn: _, index: _ } = self;
let FacetDistribution { facets, candidates, max_values_per_facet, rtxn: _, index: _ } =
self;
f.debug_struct("FacetDistribution")
.field("facets", facets)
.field("candidates", candidates)
.field("max_values_by_facet", max_values_by_facet)
.field("max_values_per_facet", max_values_per_facet)
.finish()
}
}

View File

@ -1,4 +1,4 @@
pub use self::facet_distribution::FacetDistribution;
pub use self::facet_distribution::{FacetDistribution, DEFAULT_VALUES_PER_FACET};
pub use self::facet_number::{FacetNumberIter, FacetNumberRange, FacetNumberRevRange};
pub use self::facet_string::FacetStringIter;
pub use self::filter::Filter;

View File

@ -15,7 +15,7 @@ use log::debug;
use once_cell::sync::Lazy;
use roaring::bitmap::RoaringBitmap;
pub use self::facet::{FacetDistribution, FacetNumberIter, Filter};
pub use self::facet::{FacetDistribution, FacetNumberIter, Filter, DEFAULT_VALUES_PER_FACET};
use self::fst_utils::{Complement, Intersection, StartsWith, Union};
pub use self::matches::{
FormatOptions, MatchBounds, Matcher, MatcherBuilder, MatchingWord, MatchingWords,

View File

@ -105,7 +105,7 @@ pub struct Settings<'a, 't, 'u, 'i> {
/// Attributes on which typo tolerance is disabled.
exact_attributes: Setting<HashSet<String>>,
max_values_per_facet: Setting<usize>,
limit_pagination_to: Setting<usize>,
pagination_limited_to: Setting<usize>,
}
impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
@ -132,7 +132,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
min_word_len_one_typo: Setting::NotSet,
exact_attributes: Setting::NotSet,
max_values_per_facet: Setting::NotSet,
limit_pagination_to: Setting::NotSet,
pagination_limited_to: Setting::NotSet,
indexer_config,
}
}
@ -632,6 +632,20 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
Ok(())
}
fn update_max_values_per_facet(&mut self) -> Result<()> {
match self.max_values_per_facet {
Setting::Set(max) => {
self.index.put_max_values_per_facet(&mut self.wtxn, max)?;
}
Setting::Reset => {
self.index.delete_max_values_per_facet(&mut self.wtxn)?;
}
Setting::NotSet => (),
}
Ok(())
}
pub fn execute<F>(mut self, progress_callback: F) -> Result<()>
where
F: Fn(UpdateIndexingStep) + Sync,
@ -650,6 +664,8 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
self.update_authorize_typos()?;
self.update_min_typo_word_len()?;
self.update_exact_words()?;
self.update_max_values_per_facet()?;
self.update_pagination_limited_to()?;
// If there is new faceted fields we indicate that we must reindex as we must
// index new fields as facets. It means that the distinct attribute,
@ -1546,7 +1562,7 @@ mod tests {
exact_words,
exact_attributes,
max_values_per_facet,
limit_pagination_to,
pagination_limited_to,
} = builder;
assert!(matches!(searchable_fields, Setting::NotSet));
@ -1564,6 +1580,6 @@ mod tests {
assert!(matches!(exact_words, Setting::NotSet));
assert!(matches!(exact_attributes, Setting::NotSet));
assert!(matches!(max_values_per_facet, Setting::NotSet));
assert!(matches!(limit_pagination_to, Setting::NotSet));
assert!(matches!(pagination_limited_to, Setting::NotSet));
}
}