mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-05-25 09:03:59 +02:00
Refactor geo_max_bucket_size injection
This commit is contained in:
parent
7c1c4f9c26
commit
fd7fbfa9eb
@ -164,7 +164,7 @@ impl Search<'_> {
|
|||||||
sort_criteria: self.sort_criteria.clone(),
|
sort_criteria: self.sort_criteria.clone(),
|
||||||
distinct: self.distinct.clone(),
|
distinct: self.distinct.clone(),
|
||||||
searchable_attributes: self.searchable_attributes,
|
searchable_attributes: self.searchable_attributes,
|
||||||
geo_strategy: self.geo_strategy,
|
geo_param: self.geo_param,
|
||||||
terms_matching_strategy: self.terms_matching_strategy,
|
terms_matching_strategy: self.terms_matching_strategy,
|
||||||
scoring_strategy: ScoringStrategy::Detailed,
|
scoring_strategy: ScoringStrategy::Detailed,
|
||||||
words_limit: self.words_limit,
|
words_limit: self.words_limit,
|
||||||
|
@ -45,7 +45,7 @@ pub struct Search<'a> {
|
|||||||
sort_criteria: Option<Vec<AscDesc>>,
|
sort_criteria: Option<Vec<AscDesc>>,
|
||||||
distinct: Option<String>,
|
distinct: Option<String>,
|
||||||
searchable_attributes: Option<&'a [String]>,
|
searchable_attributes: Option<&'a [String]>,
|
||||||
geo_strategy: new::GeoSortStrategy,
|
geo_param: new::GeoSortParameter,
|
||||||
terms_matching_strategy: TermsMatchingStrategy,
|
terms_matching_strategy: TermsMatchingStrategy,
|
||||||
scoring_strategy: ScoringStrategy,
|
scoring_strategy: ScoringStrategy,
|
||||||
words_limit: usize,
|
words_limit: usize,
|
||||||
@ -68,7 +68,7 @@ impl<'a> Search<'a> {
|
|||||||
sort_criteria: None,
|
sort_criteria: None,
|
||||||
distinct: None,
|
distinct: None,
|
||||||
searchable_attributes: None,
|
searchable_attributes: None,
|
||||||
geo_strategy: new::GeoSortStrategy::default(),
|
geo_param: new::GeoSortParameter::default(),
|
||||||
terms_matching_strategy: TermsMatchingStrategy::default(),
|
terms_matching_strategy: TermsMatchingStrategy::default(),
|
||||||
scoring_strategy: Default::default(),
|
scoring_strategy: Default::default(),
|
||||||
exhaustive_number_hits: false,
|
exhaustive_number_hits: false,
|
||||||
@ -145,7 +145,13 @@ impl<'a> Search<'a> {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
pub fn geo_sort_strategy(&mut self, strategy: new::GeoSortStrategy) -> &mut Search<'a> {
|
pub fn geo_sort_strategy(&mut self, strategy: new::GeoSortStrategy) -> &mut Search<'a> {
|
||||||
self.geo_strategy = strategy;
|
self.geo_param.strategy = strategy;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
pub fn geo_max_bucket_size(&mut self, max_size: u64) -> &mut Search<'a> {
|
||||||
|
self.geo_param.max_bucket_size = max_size;
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -232,7 +238,7 @@ impl<'a> Search<'a> {
|
|||||||
universe,
|
universe,
|
||||||
&self.sort_criteria,
|
&self.sort_criteria,
|
||||||
&self.distinct,
|
&self.distinct,
|
||||||
self.geo_strategy,
|
self.geo_param,
|
||||||
self.offset,
|
self.offset,
|
||||||
self.limit,
|
self.limit,
|
||||||
embedder_name,
|
embedder_name,
|
||||||
@ -251,7 +257,7 @@ impl<'a> Search<'a> {
|
|||||||
universe,
|
universe,
|
||||||
&self.sort_criteria,
|
&self.sort_criteria,
|
||||||
&self.distinct,
|
&self.distinct,
|
||||||
self.geo_strategy,
|
self.geo_param,
|
||||||
self.offset,
|
self.offset,
|
||||||
self.limit,
|
self.limit,
|
||||||
Some(self.words_limit),
|
Some(self.words_limit),
|
||||||
@ -290,7 +296,7 @@ impl fmt::Debug for Search<'_> {
|
|||||||
sort_criteria,
|
sort_criteria,
|
||||||
distinct,
|
distinct,
|
||||||
searchable_attributes,
|
searchable_attributes,
|
||||||
geo_strategy: _,
|
geo_param: _,
|
||||||
terms_matching_strategy,
|
terms_matching_strategy,
|
||||||
scoring_strategy,
|
scoring_strategy,
|
||||||
words_limit,
|
words_limit,
|
||||||
|
@ -39,6 +39,22 @@ fn facet_number_values<'a>(
|
|||||||
Ok(iter)
|
Ok(iter)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy)]
|
||||||
|
|
||||||
|
pub struct Parameter {
|
||||||
|
// Define the strategy used by the geo sort
|
||||||
|
pub strategy: Strategy,
|
||||||
|
// Limit the number of docs in a single bucket to avoid unexpectedly large overhead
|
||||||
|
pub max_bucket_size: u64,
|
||||||
|
// Considering the errors of GPS and geographical calculations, distances less than distance_error_margin will be treated as equal
|
||||||
|
pub distance_error_margin: f64,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for Parameter {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self { strategy: Strategy::default(), max_bucket_size: 1000, distance_error_margin: 1.0 }
|
||||||
|
}
|
||||||
|
}
|
||||||
/// Define the strategy used by the geo sort.
|
/// Define the strategy used by the geo sort.
|
||||||
/// The parameter represents the cache size, and, in the case of the Dynamic strategy,
|
/// The parameter represents the cache size, and, in the case of the Dynamic strategy,
|
||||||
/// the point where we move from using the iterative strategy to the rtree.
|
/// the point where we move from using the iterative strategy to the rtree.
|
||||||
@ -71,26 +87,6 @@ impl Strategy {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(not(test))]
|
|
||||||
fn default_max_bucket_size() -> u64 {
|
|
||||||
1000
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
static DEFAULT_MAX_BUCKET_SIZE: std::sync::Mutex<u64> = std::sync::Mutex::new(1000);
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
pub fn set_default_max_bucket_size(n: u64) {
|
|
||||||
let mut size = DEFAULT_MAX_BUCKET_SIZE.lock().unwrap();
|
|
||||||
*size = n;
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
fn default_max_bucket_size() -> u64 {
|
|
||||||
let max_size = *(DEFAULT_MAX_BUCKET_SIZE.lock().unwrap());
|
|
||||||
max_size
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct GeoSort<Q: RankingRuleQueryTrait> {
|
pub struct GeoSort<Q: RankingRuleQueryTrait> {
|
||||||
query: Option<Q>,
|
query: Option<Q>,
|
||||||
|
|
||||||
@ -111,22 +107,23 @@ pub struct GeoSort<Q: RankingRuleQueryTrait> {
|
|||||||
|
|
||||||
impl<Q: RankingRuleQueryTrait> GeoSort<Q> {
|
impl<Q: RankingRuleQueryTrait> GeoSort<Q> {
|
||||||
pub fn new(
|
pub fn new(
|
||||||
strategy: Strategy,
|
parameter: &Parameter,
|
||||||
geo_faceted_docids: RoaringBitmap,
|
geo_faceted_docids: RoaringBitmap,
|
||||||
point: [f64; 2],
|
point: [f64; 2],
|
||||||
ascending: bool,
|
ascending: bool,
|
||||||
) -> Result<Self> {
|
) -> Result<Self> {
|
||||||
|
let Parameter { strategy, max_bucket_size, distance_error_margin } = parameter;
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
query: None,
|
query: None,
|
||||||
strategy,
|
strategy: *strategy,
|
||||||
ascending,
|
ascending,
|
||||||
point,
|
point,
|
||||||
geo_candidates: geo_faceted_docids,
|
geo_candidates: geo_faceted_docids,
|
||||||
field_ids: None,
|
field_ids: None,
|
||||||
rtree: None,
|
rtree: None,
|
||||||
cached_sorted_docids: VecDeque::new(),
|
cached_sorted_docids: VecDeque::new(),
|
||||||
max_bucket_size: default_max_bucket_size(),
|
max_bucket_size: *max_bucket_size,
|
||||||
distance_error_margin: 1.0,
|
distance_error_margin: *distance_error_margin,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -513,7 +513,7 @@ mod tests {
|
|||||||
universe,
|
universe,
|
||||||
&None,
|
&None,
|
||||||
&None,
|
&None,
|
||||||
crate::search::new::GeoSortStrategy::default(),
|
crate::search::new::GeoSortParameter::default(),
|
||||||
0,
|
0,
|
||||||
100,
|
100,
|
||||||
Some(10),
|
Some(10),
|
||||||
|
@ -45,6 +45,7 @@ use sort::Sort;
|
|||||||
|
|
||||||
use self::distinct::facet_string_values;
|
use self::distinct::facet_string_values;
|
||||||
use self::geo_sort::GeoSort;
|
use self::geo_sort::GeoSort;
|
||||||
|
pub use self::geo_sort::Parameter as GeoSortParameter;
|
||||||
pub use self::geo_sort::Strategy as GeoSortStrategy;
|
pub use self::geo_sort::Strategy as GeoSortStrategy;
|
||||||
use self::graph_based_ranking_rule::Words;
|
use self::graph_based_ranking_rule::Words;
|
||||||
use self::interner::Interned;
|
use self::interner::Interned;
|
||||||
@ -274,7 +275,7 @@ fn resolve_negative_phrases(
|
|||||||
fn get_ranking_rules_for_placeholder_search<'ctx>(
|
fn get_ranking_rules_for_placeholder_search<'ctx>(
|
||||||
ctx: &SearchContext<'ctx>,
|
ctx: &SearchContext<'ctx>,
|
||||||
sort_criteria: &Option<Vec<AscDesc>>,
|
sort_criteria: &Option<Vec<AscDesc>>,
|
||||||
geo_strategy: geo_sort::Strategy,
|
geo_param: geo_sort::Parameter,
|
||||||
) -> Result<Vec<BoxRankingRule<'ctx, PlaceholderQuery>>> {
|
) -> Result<Vec<BoxRankingRule<'ctx, PlaceholderQuery>>> {
|
||||||
let mut sort = false;
|
let mut sort = false;
|
||||||
let mut sorted_fields = HashSet::new();
|
let mut sorted_fields = HashSet::new();
|
||||||
@ -299,7 +300,7 @@ fn get_ranking_rules_for_placeholder_search<'ctx>(
|
|||||||
&mut ranking_rules,
|
&mut ranking_rules,
|
||||||
&mut sorted_fields,
|
&mut sorted_fields,
|
||||||
&mut geo_sorted,
|
&mut geo_sorted,
|
||||||
geo_strategy,
|
&geo_param,
|
||||||
)?;
|
)?;
|
||||||
sort = true;
|
sort = true;
|
||||||
}
|
}
|
||||||
@ -326,7 +327,7 @@ fn get_ranking_rules_for_placeholder_search<'ctx>(
|
|||||||
fn get_ranking_rules_for_vector<'ctx>(
|
fn get_ranking_rules_for_vector<'ctx>(
|
||||||
ctx: &SearchContext<'ctx>,
|
ctx: &SearchContext<'ctx>,
|
||||||
sort_criteria: &Option<Vec<AscDesc>>,
|
sort_criteria: &Option<Vec<AscDesc>>,
|
||||||
geo_strategy: geo_sort::Strategy,
|
geo_param: geo_sort::Parameter,
|
||||||
limit_plus_offset: usize,
|
limit_plus_offset: usize,
|
||||||
target: &[f32],
|
target: &[f32],
|
||||||
embedder_name: &str,
|
embedder_name: &str,
|
||||||
@ -375,7 +376,7 @@ fn get_ranking_rules_for_vector<'ctx>(
|
|||||||
&mut ranking_rules,
|
&mut ranking_rules,
|
||||||
&mut sorted_fields,
|
&mut sorted_fields,
|
||||||
&mut geo_sorted,
|
&mut geo_sorted,
|
||||||
geo_strategy,
|
&geo_param,
|
||||||
)?;
|
)?;
|
||||||
sort = true;
|
sort = true;
|
||||||
}
|
}
|
||||||
@ -403,7 +404,7 @@ fn get_ranking_rules_for_vector<'ctx>(
|
|||||||
fn get_ranking_rules_for_query_graph_search<'ctx>(
|
fn get_ranking_rules_for_query_graph_search<'ctx>(
|
||||||
ctx: &SearchContext<'ctx>,
|
ctx: &SearchContext<'ctx>,
|
||||||
sort_criteria: &Option<Vec<AscDesc>>,
|
sort_criteria: &Option<Vec<AscDesc>>,
|
||||||
geo_strategy: geo_sort::Strategy,
|
geo_param: geo_sort::Parameter,
|
||||||
terms_matching_strategy: TermsMatchingStrategy,
|
terms_matching_strategy: TermsMatchingStrategy,
|
||||||
) -> Result<Vec<BoxRankingRule<'ctx, QueryGraph>>> {
|
) -> Result<Vec<BoxRankingRule<'ctx, QueryGraph>>> {
|
||||||
// query graph search
|
// query graph search
|
||||||
@ -477,7 +478,7 @@ fn get_ranking_rules_for_query_graph_search<'ctx>(
|
|||||||
&mut ranking_rules,
|
&mut ranking_rules,
|
||||||
&mut sorted_fields,
|
&mut sorted_fields,
|
||||||
&mut geo_sorted,
|
&mut geo_sorted,
|
||||||
geo_strategy,
|
&geo_param,
|
||||||
)?;
|
)?;
|
||||||
sort = true;
|
sort = true;
|
||||||
}
|
}
|
||||||
@ -514,7 +515,7 @@ fn resolve_sort_criteria<'ctx, Query: RankingRuleQueryTrait>(
|
|||||||
ranking_rules: &mut Vec<BoxRankingRule<'ctx, Query>>,
|
ranking_rules: &mut Vec<BoxRankingRule<'ctx, Query>>,
|
||||||
sorted_fields: &mut HashSet<String>,
|
sorted_fields: &mut HashSet<String>,
|
||||||
geo_sorted: &mut bool,
|
geo_sorted: &mut bool,
|
||||||
geo_strategy: geo_sort::Strategy,
|
geo_param: &geo_sort::Parameter,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let sort_criteria = sort_criteria.clone().unwrap_or_default();
|
let sort_criteria = sort_criteria.clone().unwrap_or_default();
|
||||||
ranking_rules.reserve(sort_criteria.len());
|
ranking_rules.reserve(sort_criteria.len());
|
||||||
@ -540,7 +541,7 @@ fn resolve_sort_criteria<'ctx, Query: RankingRuleQueryTrait>(
|
|||||||
}
|
}
|
||||||
let geo_faceted_docids = ctx.index.geo_faceted_documents_ids(ctx.txn)?;
|
let geo_faceted_docids = ctx.index.geo_faceted_documents_ids(ctx.txn)?;
|
||||||
ranking_rules.push(Box::new(GeoSort::new(
|
ranking_rules.push(Box::new(GeoSort::new(
|
||||||
geo_strategy,
|
geo_param,
|
||||||
geo_faceted_docids,
|
geo_faceted_docids,
|
||||||
point,
|
point,
|
||||||
true,
|
true,
|
||||||
@ -552,7 +553,7 @@ fn resolve_sort_criteria<'ctx, Query: RankingRuleQueryTrait>(
|
|||||||
}
|
}
|
||||||
let geo_faceted_docids = ctx.index.geo_faceted_documents_ids(ctx.txn)?;
|
let geo_faceted_docids = ctx.index.geo_faceted_documents_ids(ctx.txn)?;
|
||||||
ranking_rules.push(Box::new(GeoSort::new(
|
ranking_rules.push(Box::new(GeoSort::new(
|
||||||
geo_strategy,
|
geo_param,
|
||||||
geo_faceted_docids,
|
geo_faceted_docids,
|
||||||
point,
|
point,
|
||||||
false,
|
false,
|
||||||
@ -584,7 +585,7 @@ pub fn execute_vector_search(
|
|||||||
universe: RoaringBitmap,
|
universe: RoaringBitmap,
|
||||||
sort_criteria: &Option<Vec<AscDesc>>,
|
sort_criteria: &Option<Vec<AscDesc>>,
|
||||||
distinct: &Option<String>,
|
distinct: &Option<String>,
|
||||||
geo_strategy: geo_sort::Strategy,
|
geo_param: geo_sort::Parameter,
|
||||||
from: usize,
|
from: usize,
|
||||||
length: usize,
|
length: usize,
|
||||||
embedder_name: &str,
|
embedder_name: &str,
|
||||||
@ -600,7 +601,7 @@ pub fn execute_vector_search(
|
|||||||
let ranking_rules = get_ranking_rules_for_vector(
|
let ranking_rules = get_ranking_rules_for_vector(
|
||||||
ctx,
|
ctx,
|
||||||
sort_criteria,
|
sort_criteria,
|
||||||
geo_strategy,
|
geo_param,
|
||||||
from + length,
|
from + length,
|
||||||
vector,
|
vector,
|
||||||
embedder_name,
|
embedder_name,
|
||||||
@ -647,7 +648,7 @@ pub fn execute_search(
|
|||||||
mut universe: RoaringBitmap,
|
mut universe: RoaringBitmap,
|
||||||
sort_criteria: &Option<Vec<AscDesc>>,
|
sort_criteria: &Option<Vec<AscDesc>>,
|
||||||
distinct: &Option<String>,
|
distinct: &Option<String>,
|
||||||
geo_strategy: geo_sort::Strategy,
|
geo_param: geo_sort::Parameter,
|
||||||
from: usize,
|
from: usize,
|
||||||
length: usize,
|
length: usize,
|
||||||
words_limit: Option<usize>,
|
words_limit: Option<usize>,
|
||||||
@ -761,7 +762,7 @@ pub fn execute_search(
|
|||||||
let ranking_rules = get_ranking_rules_for_query_graph_search(
|
let ranking_rules = get_ranking_rules_for_query_graph_search(
|
||||||
ctx,
|
ctx,
|
||||||
sort_criteria,
|
sort_criteria,
|
||||||
geo_strategy,
|
geo_param,
|
||||||
terms_matching_strategy,
|
terms_matching_strategy,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
@ -783,7 +784,7 @@ pub fn execute_search(
|
|||||||
)?
|
)?
|
||||||
} else {
|
} else {
|
||||||
let ranking_rules =
|
let ranking_rules =
|
||||||
get_ranking_rules_for_placeholder_search(ctx, sort_criteria, geo_strategy)?;
|
get_ranking_rules_for_placeholder_search(ctx, sort_criteria, geo_param)?;
|
||||||
bucket_sort(
|
bucket_sort(
|
||||||
ctx,
|
ctx,
|
||||||
ranking_rules,
|
ranking_rules,
|
||||||
|
@ -157,10 +157,10 @@ fn test_geo_sort_reached_max_bucket_size() {
|
|||||||
]))
|
]))
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
crate::search::new::geo_sort::set_default_max_bucket_size(2);
|
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
|
||||||
let mut s = Search::new(&rtxn, &index);
|
let mut s = Search::new(&rtxn, &index);
|
||||||
|
s.geo_max_bucket_size(2);
|
||||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||||
s.sort_criteria(vec![
|
s.sort_criteria(vec![
|
||||||
AscDesc::Asc(Member::Geo([0., 0.])),
|
AscDesc::Asc(Member::Geo([0., 0.])),
|
||||||
@ -200,9 +200,6 @@ fn test_geo_sort_reached_max_bucket_size() {
|
|||||||
}
|
}
|
||||||
let no_geo_ids = rtree_ids[10..].iter().collect_vec();
|
let no_geo_ids = rtree_ids[10..].iter().collect_vec();
|
||||||
insta::assert_snapshot!(format!("{no_geo_ids:?}"), @r#"["1", "4", "3", "2", "5"]"#);
|
insta::assert_snapshot!(format!("{no_geo_ids:?}"), @r#"["1", "4", "3", "2", "5"]"#);
|
||||||
|
|
||||||
// recover settings
|
|
||||||
crate::search::new::geo_sort::set_default_max_bucket_size(1000);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
Loading…
x
Reference in New Issue
Block a user