From 522e79f2e0dec684cca7db9958f1851d43387db3 Mon Sep 17 00:00:00 2001 From: Alexey Shekhirin Date: Mon, 29 Mar 2021 19:07:22 +0300 Subject: [PATCH 1/3] feat(search, criteria): introduce a percentage threshold to the asc/desc --- milli/src/search/criteria/asc_desc.rs | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/milli/src/search/criteria/asc_desc.rs b/milli/src/search/criteria/asc_desc.rs index 6b8afad2c..d4b85b2bf 100644 --- a/milli/src/search/criteria/asc_desc.rs +++ b/milli/src/search/criteria/asc_desc.rs @@ -19,6 +19,10 @@ use crate::search::WordDerivationsCache; use crate::{FieldsIdsMap, FieldId, Index}; use super::{Criterion, CriterionResult}; +/// If the number of candidates is lower or equal to the specified % of total number of documents, +/// use simple sort. Otherwise, use facet database. +const CANDIDATES_THRESHOLD: f64 = 0.1; + pub struct AscDesc<'t> { index: &'t Index, rtxn: &'t heed::RoTxn<'t>, @@ -237,7 +241,7 @@ fn field_id_facet_type( /// Returns an iterator over groups of the given candidates in ascending or descending order. /// -/// It will either use an iterative or a recusrsive method on the whole facet database depending +/// It will either use an iterative or a recursive method on the whole facet database depending /// on the number of candidates to rank. fn facet_ordered<'t>( index: &'t Index, @@ -248,9 +252,11 @@ fn facet_ordered<'t>( candidates: RoaringBitmap, ) -> anyhow::Result> + 't>> { + let number_of_documents = index.number_of_documents(&rtxn)? as f64; + match facet_type { FacetType::Float => { - if candidates.len() <= 1000 { + if candidates.len() / number_of_documents * 100 <= CANDIDATES_THRESHOLD { let iter = iterative_facet_ordered_iter::>( index, rtxn, field_id, ascending, candidates, )?; @@ -266,7 +272,7 @@ fn facet_ordered<'t>( } }, FacetType::Integer => { - if candidates.len() <= 1000 { + if candidates.len() / number_of_documents * 100 <= CANDIDATES_THRESHOLD { let iter = iterative_facet_ordered_iter::( index, rtxn, field_id, ascending, candidates, )?; From a776ec97185b449a718236bd231d2cf8ef7650b3 Mon Sep 17 00:00:00 2001 From: Alexey Shekhirin Date: Mon, 29 Mar 2021 19:16:35 +0300 Subject: [PATCH 2/3] fix division --- milli/src/search/criteria/asc_desc.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/milli/src/search/criteria/asc_desc.rs b/milli/src/search/criteria/asc_desc.rs index d4b85b2bf..df9f164e2 100644 --- a/milli/src/search/criteria/asc_desc.rs +++ b/milli/src/search/criteria/asc_desc.rs @@ -256,7 +256,7 @@ fn facet_ordered<'t>( match facet_type { FacetType::Float => { - if candidates.len() / number_of_documents * 100 <= CANDIDATES_THRESHOLD { + if candidates.len() as f64 / number_of_documents * 100.0 <= CANDIDATES_THRESHOLD { let iter = iterative_facet_ordered_iter::>( index, rtxn, field_id, ascending, candidates, )?; @@ -272,7 +272,7 @@ fn facet_ordered<'t>( } }, FacetType::Integer => { - if candidates.len() / number_of_documents * 100 <= CANDIDATES_THRESHOLD { + if candidates.len() as f64 / number_of_documents * 100.0 <= CANDIDATES_THRESHOLD { let iter = iterative_facet_ordered_iter::( index, rtxn, field_id, ascending, candidates, )?; From 1e3f05db8f0cdd82f092c709c2e70522cc5f4902 Mon Sep 17 00:00:00 2001 From: Alexey Shekhirin Date: Tue, 30 Mar 2021 11:57:10 +0300 Subject: [PATCH 3/3] use fixed number of candidates as a threshold --- milli/src/search/criteria/asc_desc.rs | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/milli/src/search/criteria/asc_desc.rs b/milli/src/search/criteria/asc_desc.rs index df9f164e2..78ae540e4 100644 --- a/milli/src/search/criteria/asc_desc.rs +++ b/milli/src/search/criteria/asc_desc.rs @@ -19,9 +19,9 @@ use crate::search::WordDerivationsCache; use crate::{FieldsIdsMap, FieldId, Index}; use super::{Criterion, CriterionResult}; -/// If the number of candidates is lower or equal to the specified % of total number of documents, -/// use simple sort. Otherwise, use facet database. -const CANDIDATES_THRESHOLD: f64 = 0.1; +/// Threshold on the number of candidates that will make +/// the system to choose between one algorithm or another. +const CANDIDATES_THRESHOLD: u64 = 1000; pub struct AscDesc<'t> { index: &'t Index, @@ -252,11 +252,9 @@ fn facet_ordered<'t>( candidates: RoaringBitmap, ) -> anyhow::Result> + 't>> { - let number_of_documents = index.number_of_documents(&rtxn)? as f64; - match facet_type { FacetType::Float => { - if candidates.len() as f64 / number_of_documents * 100.0 <= CANDIDATES_THRESHOLD { + if candidates.len() <= CANDIDATES_THRESHOLD { let iter = iterative_facet_ordered_iter::>( index, rtxn, field_id, ascending, candidates, )?; @@ -272,7 +270,7 @@ fn facet_ordered<'t>( } }, FacetType::Integer => { - if candidates.len() as f64 / number_of_documents * 100.0 <= CANDIDATES_THRESHOLD { + if candidates.len() <= CANDIDATES_THRESHOLD { let iter = iterative_facet_ordered_iter::( index, rtxn, field_id, ascending, candidates, )?;