diff --git a/milli/src/index.rs b/milli/src/index.rs index 41bd85b93..28c870592 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -56,6 +56,8 @@ pub mod main_key { pub const TWO_TYPOS_WORD_LEN: &str = "two-typos-word-len"; pub const EXACT_WORDS: &str = "exact-words"; pub const EXACT_ATTRIBUTES: &str = "exact-attributes"; + pub const MAX_VALUES_PER_FACET: &str = "max-values-per-facet"; + pub const PAGINATION_LIMITED_TO: &str = "pagination-limited-to"; } pub mod db_name { @@ -1087,6 +1089,34 @@ impl Index { self.main.delete::<_, Str>(txn, main_key::EXACT_ATTRIBUTES)?; Ok(()) } + + pub fn max_values_per_facet(&self, txn: &RoTxn) -> heed::Result> { + self.main.get::<_, Str, OwnedType>(txn, main_key::MAX_VALUES_PER_FACET) + } + + pub(crate) fn put_max_values_per_facet(&self, txn: &mut RwTxn, val: usize) -> heed::Result<()> { + self.main.put::<_, Str, OwnedType>(txn, main_key::MAX_VALUES_PER_FACET, &val) + } + + pub(crate) fn delete_max_values_per_facet(&self, txn: &mut RwTxn) -> heed::Result { + self.main.delete::<_, Str>(txn, main_key::MAX_VALUES_PER_FACET) + } + + pub fn pagination_limited_to(&self, txn: &RoTxn) -> heed::Result> { + self.main.get::<_, Str, OwnedType>(txn, main_key::PAGINATION_LIMITED_TO) + } + + pub(crate) fn put_pagination_limited_to( + &self, + txn: &mut RwTxn, + val: usize, + ) -> heed::Result<()> { + self.main.put::<_, Str, OwnedType>(txn, main_key::PAGINATION_LIMITED_TO, &val) + } + + pub(crate) fn delete_pagination_limited_to(&self, txn: &mut RwTxn) -> heed::Result { + self.main.delete::<_, Str>(txn, main_key::PAGINATION_LIMITED_TO) + } } #[cfg(test)] diff --git a/milli/src/lib.rs b/milli/src/lib.rs index f28677ed8..81cd057d5 100644 --- a/milli/src/lib.rs +++ b/milli/src/lib.rs @@ -38,7 +38,7 @@ pub use self::heed_codec::{ pub use self::index::Index; pub use self::search::{ FacetDistribution, Filter, FormatOptions, MatchBounds, MatcherBuilder, MatchingWord, - MatchingWords, Search, SearchResult, + MatchingWords, Search, SearchResult, DEFAULT_VALUES_PER_FACET, }; pub type Result = std::result::Result; diff --git a/milli/src/search/facet/facet_distribution.rs b/milli/src/search/facet/facet_distribution.rs index 8069abede..b2718a490 100644 --- a/milli/src/search/facet/facet_distribution.rs +++ b/milli/src/search/facet/facet_distribution.rs @@ -15,7 +15,7 @@ use crate::{FieldId, Index, Result}; /// The default number of values by facets that will /// be fetched from the key-value store. -const DEFAULT_VALUES_BY_FACET: usize = 100; +pub const DEFAULT_VALUES_PER_FACET: usize = 100; /// Threshold on the number of candidates that will make /// the system to choose between one algorithm or another. @@ -24,7 +24,7 @@ const CANDIDATES_THRESHOLD: u64 = 3000; pub struct FacetDistribution<'a> { facets: Option>, candidates: Option, - max_values_by_facet: usize, + max_values_per_facet: usize, rtxn: &'a heed::RoTxn<'a>, index: &'a Index, } @@ -34,7 +34,7 @@ impl<'a> FacetDistribution<'a> { FacetDistribution { facets: None, candidates: None, - max_values_by_facet: DEFAULT_VALUES_BY_FACET, + max_values_per_facet: DEFAULT_VALUES_PER_FACET, rtxn, index, } @@ -45,8 +45,8 @@ impl<'a> FacetDistribution<'a> { self } - pub fn max_values_by_facet(&mut self, max: usize) -> &mut Self { - self.max_values_by_facet = max; + pub fn max_values_per_facet(&mut self, max: usize) -> &mut Self { + self.max_values_per_facet = max; self } @@ -82,7 +82,8 @@ impl<'a> FacetDistribution<'a> { let ((_, _, value), ()) = result?; *distribution.entry(value.to_string()).or_insert(0) += 1; - if distribution.len() - distribution_prelength == self.max_values_by_facet { + if distribution.len() - distribution_prelength == self.max_values_per_facet + { break; } } @@ -108,7 +109,7 @@ impl<'a> FacetDistribution<'a> { .or_insert_with(|| (original_value, 0)); *count += 1; - if normalized_distribution.len() == self.max_values_by_facet { + if normalized_distribution.len() == self.max_values_per_facet { break; } } @@ -141,7 +142,7 @@ impl<'a> FacetDistribution<'a> { if !docids.is_empty() { distribution.insert(value.to_string(), docids.len()); } - if distribution.len() == self.max_values_by_facet { + if distribution.len() == self.max_values_per_facet { break; } } @@ -164,7 +165,7 @@ impl<'a> FacetDistribution<'a> { if !docids.is_empty() { distribution.insert(original.to_string(), docids.len()); } - if distribution.len() == self.max_values_by_facet { + if distribution.len() == self.max_values_per_facet { break; } } @@ -186,7 +187,7 @@ impl<'a> FacetDistribution<'a> { for result in range { let ((_, _, value, _), docids) = result?; distribution.insert(value.to_string(), docids.len()); - if distribution.len() == self.max_values_by_facet { + if distribution.len() == self.max_values_per_facet { break; } } @@ -202,7 +203,7 @@ impl<'a> FacetDistribution<'a> { for result in iter { let ((_, normalized_value), (original_value, docids)) = result?; normalized_distribution.insert(normalized_value, (original_value, docids.len())); - if normalized_distribution.len() == self.max_values_by_facet { + if normalized_distribution.len() == self.max_values_per_facet { break; } } @@ -290,12 +291,13 @@ impl<'a> FacetDistribution<'a> { impl fmt::Debug for FacetDistribution<'_> { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - let FacetDistribution { facets, candidates, max_values_by_facet, rtxn: _, index: _ } = self; + let FacetDistribution { facets, candidates, max_values_per_facet, rtxn: _, index: _ } = + self; f.debug_struct("FacetDistribution") .field("facets", facets) .field("candidates", candidates) - .field("max_values_by_facet", max_values_by_facet) + .field("max_values_per_facet", max_values_per_facet) .finish() } } diff --git a/milli/src/search/facet/mod.rs b/milli/src/search/facet/mod.rs index c8f91352b..e3ac95882 100644 --- a/milli/src/search/facet/mod.rs +++ b/milli/src/search/facet/mod.rs @@ -1,4 +1,4 @@ -pub use self::facet_distribution::FacetDistribution; +pub use self::facet_distribution::{FacetDistribution, DEFAULT_VALUES_PER_FACET}; pub use self::facet_number::{FacetNumberIter, FacetNumberRange, FacetNumberRevRange}; pub use self::facet_string::FacetStringIter; pub use self::filter::Filter; diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 62a7815b0..1c363e142 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -15,7 +15,7 @@ use log::debug; use once_cell::sync::Lazy; use roaring::bitmap::RoaringBitmap; -pub use self::facet::{FacetDistribution, FacetNumberIter, Filter}; +pub use self::facet::{FacetDistribution, FacetNumberIter, Filter, DEFAULT_VALUES_PER_FACET}; use self::fst_utils::{Complement, Intersection, StartsWith, Union}; pub use self::matches::{ FormatOptions, MatchBounds, Matcher, MatcherBuilder, MatchingWord, MatchingWords, diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index 9363d8eb6..174d7073d 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -104,6 +104,8 @@ pub struct Settings<'a, 't, 'u, 'i> { exact_words: Setting>, /// Attributes on which typo tolerance is disabled. exact_attributes: Setting>, + max_values_per_facet: Setting, + pagination_limited_to: Setting, } impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { @@ -129,6 +131,8 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { min_word_len_two_typos: Setting::NotSet, min_word_len_one_typo: Setting::NotSet, exact_attributes: Setting::NotSet, + max_values_per_facet: Setting::NotSet, + pagination_limited_to: Setting::NotSet, indexer_config, } } @@ -246,6 +250,22 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { self.exact_attributes = Setting::Reset; } + pub fn set_max_values_per_facet(&mut self, value: usize) { + self.max_values_per_facet = Setting::Set(value); + } + + pub fn reset_max_values_per_facet(&mut self) { + self.max_values_per_facet = Setting::Reset; + } + + pub fn set_pagination_limited_to(&mut self, value: usize) { + self.pagination_limited_to = Setting::Set(value); + } + + pub fn reset_pagination_limited_to(&mut self) { + self.pagination_limited_to = Setting::Reset; + } + fn reindex(&mut self, cb: &F, old_fields_ids_map: FieldsIdsMap) -> Result<()> where F: Fn(UpdateIndexingStep) + Sync, @@ -612,6 +632,34 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { Ok(()) } + fn update_max_values_per_facet(&mut self) -> Result<()> { + match self.max_values_per_facet { + Setting::Set(max) => { + self.index.put_max_values_per_facet(&mut self.wtxn, max)?; + } + Setting::Reset => { + self.index.delete_max_values_per_facet(&mut self.wtxn)?; + } + Setting::NotSet => (), + } + + Ok(()) + } + + fn update_pagination_limited_to(&mut self) -> Result<()> { + match self.pagination_limited_to { + Setting::Set(max) => { + self.index.put_pagination_limited_to(&mut self.wtxn, max)?; + } + Setting::Reset => { + self.index.delete_pagination_limited_to(&mut self.wtxn)?; + } + Setting::NotSet => (), + } + + Ok(()) + } + pub fn execute(mut self, progress_callback: F) -> Result<()> where F: Fn(UpdateIndexingStep) + Sync, @@ -630,6 +678,8 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { self.update_authorize_typos()?; self.update_min_typo_word_len()?; self.update_exact_words()?; + self.update_max_values_per_facet()?; + self.update_pagination_limited_to()?; // If there is new faceted fields we indicate that we must reindex as we must // index new fields as facets. It means that the distinct attribute, @@ -1525,6 +1575,8 @@ mod tests { min_word_len_one_typo, exact_words, exact_attributes, + max_values_per_facet, + pagination_limited_to, } = builder; assert!(matches!(searchable_fields, Setting::NotSet)); @@ -1541,5 +1593,7 @@ mod tests { assert!(matches!(min_word_len_one_typo, Setting::NotSet)); assert!(matches!(exact_words, Setting::NotSet)); assert!(matches!(exact_attributes, Setting::NotSet)); + assert!(matches!(max_values_per_facet, Setting::NotSet)); + assert!(matches!(pagination_limited_to, Setting::NotSet)); } }