Merge pull request #5487 from HDT3213/bugfix/geosort

fix ranking rules after _geo do not work
This commit is contained in:
Tamo 2025-04-15 13:29:07 +00:00 committed by GitHub
commit 3ec5b9d488
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 981 additions and 56 deletions

View File

@ -1777,7 +1777,7 @@ async fn add_documents_with_geo_field() {
}, },
{ {
"id": "4", "id": "4",
"_geo": { "lat": "1", "lng": "1" }, "_geo": { "lat": "2", "lng": "2" },
}, },
]); ]);
@ -1828,8 +1828,8 @@ async fn add_documents_with_geo_field() {
{ {
"id": "4", "id": "4",
"_geo": { "_geo": {
"lat": "1", "lat": "2",
"lng": "1" "lng": "2"
} }
} }
], ],
@ -1848,14 +1848,6 @@ async fn add_documents_with_geo_field() {
@r###" @r###"
{ {
"hits": [ "hits": [
{
"id": "4",
"_geo": {
"lat": "1",
"lng": "1"
},
"_geoDistance": 5522018
},
{ {
"id": "3", "id": "3",
"_geo": { "_geo": {
@ -1864,6 +1856,14 @@ async fn add_documents_with_geo_field() {
}, },
"_geoDistance": 5522018 "_geoDistance": 5522018
}, },
{
"id": "4",
"_geo": {
"lat": "2",
"lng": "2"
},
"_geoDistance": 5408322
},
{ {
"id": "1" "id": "1"
}, },

View File

@ -164,7 +164,7 @@ impl Search<'_> {
sort_criteria: self.sort_criteria.clone(), sort_criteria: self.sort_criteria.clone(),
distinct: self.distinct.clone(), distinct: self.distinct.clone(),
searchable_attributes: self.searchable_attributes, searchable_attributes: self.searchable_attributes,
geo_strategy: self.geo_strategy, geo_param: self.geo_param,
terms_matching_strategy: self.terms_matching_strategy, terms_matching_strategy: self.terms_matching_strategy,
scoring_strategy: ScoringStrategy::Detailed, scoring_strategy: ScoringStrategy::Detailed,
words_limit: self.words_limit, words_limit: self.words_limit,

View File

@ -45,7 +45,7 @@ pub struct Search<'a> {
sort_criteria: Option<Vec<AscDesc>>, sort_criteria: Option<Vec<AscDesc>>,
distinct: Option<String>, distinct: Option<String>,
searchable_attributes: Option<&'a [String]>, searchable_attributes: Option<&'a [String]>,
geo_strategy: new::GeoSortStrategy, geo_param: new::GeoSortParameter,
terms_matching_strategy: TermsMatchingStrategy, terms_matching_strategy: TermsMatchingStrategy,
scoring_strategy: ScoringStrategy, scoring_strategy: ScoringStrategy,
words_limit: usize, words_limit: usize,
@ -68,7 +68,7 @@ impl<'a> Search<'a> {
sort_criteria: None, sort_criteria: None,
distinct: None, distinct: None,
searchable_attributes: None, searchable_attributes: None,
geo_strategy: new::GeoSortStrategy::default(), geo_param: new::GeoSortParameter::default(),
terms_matching_strategy: TermsMatchingStrategy::default(), terms_matching_strategy: TermsMatchingStrategy::default(),
scoring_strategy: Default::default(), scoring_strategy: Default::default(),
exhaustive_number_hits: false, exhaustive_number_hits: false,
@ -145,7 +145,13 @@ impl<'a> Search<'a> {
#[cfg(test)] #[cfg(test)]
pub fn geo_sort_strategy(&mut self, strategy: new::GeoSortStrategy) -> &mut Search<'a> { pub fn geo_sort_strategy(&mut self, strategy: new::GeoSortStrategy) -> &mut Search<'a> {
self.geo_strategy = strategy; self.geo_param.strategy = strategy;
self
}
#[cfg(test)]
pub fn geo_max_bucket_size(&mut self, max_size: u64) -> &mut Search<'a> {
self.geo_param.max_bucket_size = max_size;
self self
} }
@ -232,7 +238,7 @@ impl<'a> Search<'a> {
universe, universe,
&self.sort_criteria, &self.sort_criteria,
&self.distinct, &self.distinct,
self.geo_strategy, self.geo_param,
self.offset, self.offset,
self.limit, self.limit,
embedder_name, embedder_name,
@ -251,7 +257,7 @@ impl<'a> Search<'a> {
universe, universe,
&self.sort_criteria, &self.sort_criteria,
&self.distinct, &self.distinct,
self.geo_strategy, self.geo_param,
self.offset, self.offset,
self.limit, self.limit,
Some(self.words_limit), Some(self.words_limit),
@ -290,7 +296,7 @@ impl fmt::Debug for Search<'_> {
sort_criteria, sort_criteria,
distinct, distinct,
searchable_attributes, searchable_attributes,
geo_strategy: _, geo_param: _,
terms_matching_strategy, terms_matching_strategy,
scoring_strategy, scoring_strategy,
words_limit, words_limit,

View File

@ -1,10 +1,8 @@
use std::collections::VecDeque;
use std::iter::FromIterator;
use heed::types::{Bytes, Unit}; use heed::types::{Bytes, Unit};
use heed::{RoPrefix, RoTxn}; use heed::{RoPrefix, RoTxn};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use rstar::RTree; use rstar::RTree;
use std::collections::VecDeque;
use super::facet_string_values; use super::facet_string_values;
use super::ranking_rules::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait}; use super::ranking_rules::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait};
@ -41,6 +39,21 @@ fn facet_number_values<'a>(
Ok(iter) Ok(iter)
} }
#[derive(Debug, Clone, Copy)]
pub struct Parameter {
// Define the strategy used by the geo sort
pub strategy: Strategy,
// Limit the number of docs in a single bucket to avoid unexpectedly large overhead
pub max_bucket_size: u64,
// Considering the errors of GPS and geographical calculations, distances less than distance_error_margin will be treated as equal
pub distance_error_margin: f64,
}
impl Default for Parameter {
fn default() -> Self {
Self { strategy: Strategy::default(), max_bucket_size: 1000, distance_error_margin: 1.0 }
}
}
/// Define the strategy used by the geo sort. /// Define the strategy used by the geo sort.
/// The parameter represents the cache size, and, in the case of the Dynamic strategy, /// The parameter represents the cache size, and, in the case of the Dynamic strategy,
/// the point where we move from using the iterative strategy to the rtree. /// the point where we move from using the iterative strategy to the rtree.
@ -84,15 +97,21 @@ pub struct GeoSort<Q: RankingRuleQueryTrait> {
cached_sorted_docids: VecDeque<(u32, [f64; 2])>, cached_sorted_docids: VecDeque<(u32, [f64; 2])>,
geo_candidates: RoaringBitmap, geo_candidates: RoaringBitmap,
// Limit the number of docs in a single bucket to avoid unexpectedly large overhead
max_bucket_size: u64,
// Considering the errors of GPS and geographical calculations, distances less than distance_error_margin will be treated as equal
distance_error_margin: f64,
} }
impl<Q: RankingRuleQueryTrait> GeoSort<Q> { impl<Q: RankingRuleQueryTrait> GeoSort<Q> {
pub fn new( pub fn new(
strategy: Strategy, parameter: Parameter,
geo_faceted_docids: RoaringBitmap, geo_faceted_docids: RoaringBitmap,
point: [f64; 2], point: [f64; 2],
ascending: bool, ascending: bool,
) -> Result<Self> { ) -> Result<Self> {
let Parameter { strategy, max_bucket_size, distance_error_margin } = parameter;
Ok(Self { Ok(Self {
query: None, query: None,
strategy, strategy,
@ -102,6 +121,8 @@ impl<Q: RankingRuleQueryTrait> GeoSort<Q> {
field_ids: None, field_ids: None,
rtree: None, rtree: None,
cached_sorted_docids: VecDeque::new(), cached_sorted_docids: VecDeque::new(),
max_bucket_size,
distance_error_margin,
}) })
} }
@ -240,12 +261,12 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort<Q> {
fn next_bucket( fn next_bucket(
&mut self, &mut self,
ctx: &mut SearchContext<'ctx>, ctx: &mut SearchContext<'ctx>,
logger: &mut dyn SearchLogger<Q>, _logger: &mut dyn SearchLogger<Q>,
universe: &RoaringBitmap, universe: &RoaringBitmap,
) -> Result<Option<RankingRuleOutput<Q>>> { ) -> Result<Option<RankingRuleOutput<Q>>> {
let query = self.query.as_ref().unwrap().clone(); let query = self.query.as_ref().unwrap().clone();
let geo_candidates = &self.geo_candidates & universe; let mut geo_candidates = &self.geo_candidates & universe;
if geo_candidates.is_empty() { if geo_candidates.is_empty() {
return Ok(Some(RankingRuleOutput { return Ok(Some(RankingRuleOutput {
@ -267,24 +288,102 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort<Q> {
cache.pop_back() cache.pop_back()
} }
}; };
while let Some((id, point)) = next(&mut self.cached_sorted_docids) { let put_back = |cache: &mut VecDeque<_>, x: _| {
if ascending {
cache.push_front(x)
} else {
cache.push_back(x)
}
};
let mut current_bucket = RoaringBitmap::new();
// current_distance stores the first point and distance in current bucket
let mut current_distance: Option<([f64; 2], f64)> = None;
loop {
// The loop will only exit when we have found all points with equal distance or have exhausted the candidates.
if let Some((id, point)) = next(&mut self.cached_sorted_docids) {
if geo_candidates.contains(id) { if geo_candidates.contains(id) {
let distance = distance_between_two_points(&self.point, &point);
if let Some((point0, bucket_distance)) = current_distance.as_ref() {
if (bucket_distance - distance).abs() > self.distance_error_margin {
// different distance, point belongs to next bucket
put_back(&mut self.cached_sorted_docids, (id, point));
return Ok(Some(RankingRuleOutput { return Ok(Some(RankingRuleOutput {
query, query,
candidates: RoaringBitmap::from_iter([id]), candidates: current_bucket,
score: ScoreDetails::GeoSort(score_details::GeoSort { score: ScoreDetails::GeoSort(score_details::GeoSort {
target_point: self.point, target_point: self.point,
ascending: self.ascending, ascending: self.ascending,
value: Some(point), value: Some(point0.to_owned()),
}),
}));
} else {
// same distance, point belongs to current bucket
current_bucket.insert(id);
// remove from cadidates to prevent it from being added to the cache again
geo_candidates.remove(id);
// current bucket size reaches limit, force return
if current_bucket.len() == self.max_bucket_size {
return Ok(Some(RankingRuleOutput {
query,
candidates: current_bucket,
score: ScoreDetails::GeoSort(score_details::GeoSort {
target_point: self.point,
ascending: self.ascending,
value: Some(point0.to_owned()),
}), }),
})); }));
} }
} }
} else {
// if we got out of this loop it means we've exhausted our cache. // first doc in current bucket
// we need to refill it and run the function again. current_distance = Some((point, distance));
current_bucket.insert(id);
geo_candidates.remove(id);
// current bucket size reaches limit, force return
if current_bucket.len() == self.max_bucket_size {
return Ok(Some(RankingRuleOutput {
query,
candidates: current_bucket,
score: ScoreDetails::GeoSort(score_details::GeoSort {
target_point: self.point,
ascending: self.ascending,
value: Some(point.to_owned()),
}),
}));
}
}
}
} else {
// cache exhausted, we need to refill it
self.fill_buffer(ctx, &geo_candidates)?; self.fill_buffer(ctx, &geo_candidates)?;
self.next_bucket(ctx, logger, universe)
if self.cached_sorted_docids.is_empty() {
// candidates exhausted, exit
if let Some((point0, _)) = current_distance.as_ref() {
return Ok(Some(RankingRuleOutput {
query,
candidates: current_bucket,
score: ScoreDetails::GeoSort(score_details::GeoSort {
target_point: self.point,
ascending: self.ascending,
value: Some(point0.to_owned()),
}),
}));
} else {
return Ok(Some(RankingRuleOutput {
query,
candidates: universe.clone(),
score: ScoreDetails::GeoSort(score_details::GeoSort {
target_point: self.point,
ascending: self.ascending,
value: None,
}),
}));
}
}
}
}
} }
#[tracing::instrument(level = "trace", skip_all, target = "search::geo_sort")] #[tracing::instrument(level = "trace", skip_all, target = "search::geo_sort")]

View File

@ -513,7 +513,7 @@ mod tests {
universe, universe,
&None, &None,
&None, &None,
crate::search::new::GeoSortStrategy::default(), crate::search::new::GeoSortParameter::default(),
0, 0,
100, 100,
Some(10), Some(10),

View File

@ -45,6 +45,7 @@ use sort::Sort;
use self::distinct::facet_string_values; use self::distinct::facet_string_values;
use self::geo_sort::GeoSort; use self::geo_sort::GeoSort;
pub use self::geo_sort::Parameter as GeoSortParameter;
pub use self::geo_sort::Strategy as GeoSortStrategy; pub use self::geo_sort::Strategy as GeoSortStrategy;
use self::graph_based_ranking_rule::Words; use self::graph_based_ranking_rule::Words;
use self::interner::Interned; use self::interner::Interned;
@ -274,7 +275,7 @@ fn resolve_negative_phrases(
fn get_ranking_rules_for_placeholder_search<'ctx>( fn get_ranking_rules_for_placeholder_search<'ctx>(
ctx: &SearchContext<'ctx>, ctx: &SearchContext<'ctx>,
sort_criteria: &Option<Vec<AscDesc>>, sort_criteria: &Option<Vec<AscDesc>>,
geo_strategy: geo_sort::Strategy, geo_param: geo_sort::Parameter,
) -> Result<Vec<BoxRankingRule<'ctx, PlaceholderQuery>>> { ) -> Result<Vec<BoxRankingRule<'ctx, PlaceholderQuery>>> {
let mut sort = false; let mut sort = false;
let mut sorted_fields = HashSet::new(); let mut sorted_fields = HashSet::new();
@ -299,7 +300,7 @@ fn get_ranking_rules_for_placeholder_search<'ctx>(
&mut ranking_rules, &mut ranking_rules,
&mut sorted_fields, &mut sorted_fields,
&mut geo_sorted, &mut geo_sorted,
geo_strategy, geo_param,
)?; )?;
sort = true; sort = true;
} }
@ -326,7 +327,7 @@ fn get_ranking_rules_for_placeholder_search<'ctx>(
fn get_ranking_rules_for_vector<'ctx>( fn get_ranking_rules_for_vector<'ctx>(
ctx: &SearchContext<'ctx>, ctx: &SearchContext<'ctx>,
sort_criteria: &Option<Vec<AscDesc>>, sort_criteria: &Option<Vec<AscDesc>>,
geo_strategy: geo_sort::Strategy, geo_param: geo_sort::Parameter,
limit_plus_offset: usize, limit_plus_offset: usize,
target: &[f32], target: &[f32],
embedder_name: &str, embedder_name: &str,
@ -375,7 +376,7 @@ fn get_ranking_rules_for_vector<'ctx>(
&mut ranking_rules, &mut ranking_rules,
&mut sorted_fields, &mut sorted_fields,
&mut geo_sorted, &mut geo_sorted,
geo_strategy, geo_param,
)?; )?;
sort = true; sort = true;
} }
@ -403,7 +404,7 @@ fn get_ranking_rules_for_vector<'ctx>(
fn get_ranking_rules_for_query_graph_search<'ctx>( fn get_ranking_rules_for_query_graph_search<'ctx>(
ctx: &SearchContext<'ctx>, ctx: &SearchContext<'ctx>,
sort_criteria: &Option<Vec<AscDesc>>, sort_criteria: &Option<Vec<AscDesc>>,
geo_strategy: geo_sort::Strategy, geo_param: geo_sort::Parameter,
terms_matching_strategy: TermsMatchingStrategy, terms_matching_strategy: TermsMatchingStrategy,
) -> Result<Vec<BoxRankingRule<'ctx, QueryGraph>>> { ) -> Result<Vec<BoxRankingRule<'ctx, QueryGraph>>> {
// query graph search // query graph search
@ -477,7 +478,7 @@ fn get_ranking_rules_for_query_graph_search<'ctx>(
&mut ranking_rules, &mut ranking_rules,
&mut sorted_fields, &mut sorted_fields,
&mut geo_sorted, &mut geo_sorted,
geo_strategy, geo_param,
)?; )?;
sort = true; sort = true;
} }
@ -514,7 +515,7 @@ fn resolve_sort_criteria<'ctx, Query: RankingRuleQueryTrait>(
ranking_rules: &mut Vec<BoxRankingRule<'ctx, Query>>, ranking_rules: &mut Vec<BoxRankingRule<'ctx, Query>>,
sorted_fields: &mut HashSet<String>, sorted_fields: &mut HashSet<String>,
geo_sorted: &mut bool, geo_sorted: &mut bool,
geo_strategy: geo_sort::Strategy, geo_param: geo_sort::Parameter,
) -> Result<()> { ) -> Result<()> {
let sort_criteria = sort_criteria.clone().unwrap_or_default(); let sort_criteria = sort_criteria.clone().unwrap_or_default();
ranking_rules.reserve(sort_criteria.len()); ranking_rules.reserve(sort_criteria.len());
@ -540,7 +541,7 @@ fn resolve_sort_criteria<'ctx, Query: RankingRuleQueryTrait>(
} }
let geo_faceted_docids = ctx.index.geo_faceted_documents_ids(ctx.txn)?; let geo_faceted_docids = ctx.index.geo_faceted_documents_ids(ctx.txn)?;
ranking_rules.push(Box::new(GeoSort::new( ranking_rules.push(Box::new(GeoSort::new(
geo_strategy, geo_param,
geo_faceted_docids, geo_faceted_docids,
point, point,
true, true,
@ -552,7 +553,7 @@ fn resolve_sort_criteria<'ctx, Query: RankingRuleQueryTrait>(
} }
let geo_faceted_docids = ctx.index.geo_faceted_documents_ids(ctx.txn)?; let geo_faceted_docids = ctx.index.geo_faceted_documents_ids(ctx.txn)?;
ranking_rules.push(Box::new(GeoSort::new( ranking_rules.push(Box::new(GeoSort::new(
geo_strategy, geo_param,
geo_faceted_docids, geo_faceted_docids,
point, point,
false, false,
@ -584,7 +585,7 @@ pub fn execute_vector_search(
universe: RoaringBitmap, universe: RoaringBitmap,
sort_criteria: &Option<Vec<AscDesc>>, sort_criteria: &Option<Vec<AscDesc>>,
distinct: &Option<String>, distinct: &Option<String>,
geo_strategy: geo_sort::Strategy, geo_param: geo_sort::Parameter,
from: usize, from: usize,
length: usize, length: usize,
embedder_name: &str, embedder_name: &str,
@ -600,7 +601,7 @@ pub fn execute_vector_search(
let ranking_rules = get_ranking_rules_for_vector( let ranking_rules = get_ranking_rules_for_vector(
ctx, ctx,
sort_criteria, sort_criteria,
geo_strategy, geo_param,
from + length, from + length,
vector, vector,
embedder_name, embedder_name,
@ -647,7 +648,7 @@ pub fn execute_search(
mut universe: RoaringBitmap, mut universe: RoaringBitmap,
sort_criteria: &Option<Vec<AscDesc>>, sort_criteria: &Option<Vec<AscDesc>>,
distinct: &Option<String>, distinct: &Option<String>,
geo_strategy: geo_sort::Strategy, geo_param: geo_sort::Parameter,
from: usize, from: usize,
length: usize, length: usize,
words_limit: Option<usize>, words_limit: Option<usize>,
@ -761,7 +762,7 @@ pub fn execute_search(
let ranking_rules = get_ranking_rules_for_query_graph_search( let ranking_rules = get_ranking_rules_for_query_graph_search(
ctx, ctx,
sort_criteria, sort_criteria,
geo_strategy, geo_param,
terms_matching_strategy, terms_matching_strategy,
)?; )?;
@ -783,7 +784,7 @@ pub fn execute_search(
)? )?
} else { } else {
let ranking_rules = let ranking_rules =
get_ranking_rules_for_placeholder_search(ctx, sort_criteria, geo_strategy)?; get_ranking_rules_for_placeholder_search(ctx, sort_criteria, geo_param)?;
bucket_sort( bucket_sort(
ctx, ctx,
ranking_rules, ranking_rules,

View File

@ -4,6 +4,7 @@ This module tests the `geo_sort` ranking rule
use big_s::S; use big_s::S;
use heed::RoTxn; use heed::RoTxn;
use itertools::Itertools;
use maplit::hashset; use maplit::hashset;
use crate::constants::RESERVED_GEO_FIELD_NAME; use crate::constants::RESERVED_GEO_FIELD_NAME;
@ -18,7 +19,7 @@ fn create_index() -> TempIndex {
index index
.update_settings(|s| { .update_settings(|s| {
s.set_primary_key("id".to_owned()); s.set_primary_key("id".to_owned());
s.set_sortable_fields(hashset! { S(RESERVED_GEO_FIELD_NAME) }); s.set_sortable_fields(hashset! { S(RESERVED_GEO_FIELD_NAME), S("score") });
s.set_criteria(vec![Criterion::Words, Criterion::Sort]); s.set_criteria(vec![Criterion::Words, Criterion::Sort]);
}) })
.unwrap(); .unwrap();
@ -95,6 +96,112 @@ fn test_geo_sort() {
insta::assert_snapshot!(format!("{scores:#?}")); insta::assert_snapshot!(format!("{scores:#?}"));
} }
#[test]
fn test_geo_sort_with_following_ranking_rules() {
let index = create_index();
index
.add_documents(documents!([
{ "id": 1 }, { "id": 4 }, { "id": 3 }, { "id": 2 }, { "id": 5 },
{ "id": 6, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score": 10 },
{ "id": 7, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score": 9 },
{ "id": 8, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score": 8 },
{ "id": 9, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score": 7 },
{ "id": 10, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score":6 },
{ "id": 11, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score": 5 },
{ "id": 12, RESERVED_GEO_FIELD_NAME: { "lat": 5, "lng": 5 }, "score": 10 },
{ "id": 13, RESERVED_GEO_FIELD_NAME: { "lat": 5, "lng": 5 }, "score": 9 },
{ "id": 14, RESERVED_GEO_FIELD_NAME: { "lat": 5, "lng": 5 }, "score": 8 },
{ "id": 15, RESERVED_GEO_FIELD_NAME: { "lat": 5, "lng": 5 }, "score": 7 },
]))
.unwrap();
let rtxn = index.read_txn().unwrap();
let mut s = Search::new(&rtxn, &index);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.sort_criteria(vec![
AscDesc::Asc(Member::Geo([0., 0.])),
AscDesc::Desc(Member::Field("score".to_string())),
]);
let (ids, scores) = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s);
insta::assert_snapshot!(format!("{ids:?}"), @"[6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 4, 3, 2, 5]");
insta::assert_snapshot!(format!("{scores:#?}"));
s.sort_criteria(vec![
AscDesc::Desc(Member::Geo([0., 0.])),
AscDesc::Desc(Member::Field("score".to_string())),
]);
let (ids, scores) = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s);
insta::assert_snapshot!(format!("{ids:?}"), @"[12, 13, 14, 15, 6, 7, 8, 9, 10, 11, 1, 4, 3, 2, 5]");
insta::assert_snapshot!(format!("{scores:#?}"));
}
#[test]
fn test_geo_sort_reached_max_bucket_size() {
let index = create_index();
index
.add_documents(documents!([
{ "id": 1 }, { "id": 4 }, { "id": 3 }, { "id": 2 }, { "id": 5 },
{ "id": 6, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score": 10 },
{ "id": 7, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score": 9 },
{ "id": 8, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score": 8 },
{ "id": 9, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score": 7 },
{ "id": 10, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score":6 },
{ "id": 11, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score": 5 },
{ "id": 12, RESERVED_GEO_FIELD_NAME: { "lat": 5, "lng": 5 }, "score": 10 },
{ "id": 13, RESERVED_GEO_FIELD_NAME: { "lat": 5, "lng": 5 }, "score": 9 },
{ "id": 14, RESERVED_GEO_FIELD_NAME: { "lat": 5, "lng": 5 }, "score": 8 },
{ "id": 15, RESERVED_GEO_FIELD_NAME: { "lat": 5, "lng": 5 }, "score": 7 },
]))
.unwrap();
let rtxn = index.read_txn().unwrap();
let mut s = Search::new(&rtxn, &index);
s.geo_max_bucket_size(2);
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
s.sort_criteria(vec![
AscDesc::Asc(Member::Geo([0., 0.])),
AscDesc::Desc(Member::Field("score".to_string())),
]);
/* We should not expect the results to obey the following ranking rules when the bucket size limit is reached,
* nor should we expect Iteration and rtree to give exactly the same order for the same bucket in this case.*/
s.geo_sort_strategy(GeoSortStrategy::AlwaysIterative(1000));
let SearchResult { documents_ids, .. } = s.execute().unwrap();
let iterative_ids = collect_field_values(&index, &rtxn, "id", &documents_ids);
assert_eq!(iterative_ids.len(), 15);
for id_str in &iterative_ids[0..6] {
let id = id_str.parse::<u32>().unwrap();
assert!((6..=11).contains(&id))
}
for id_str in &iterative_ids[6..10] {
let id = id_str.parse::<u32>().unwrap();
assert!((12..=15).contains(&id))
}
let no_geo_ids = iterative_ids[10..].iter().collect_vec();
insta::assert_snapshot!(format!("{no_geo_ids:?}"), @r#"["1", "4", "3", "2", "5"]"#);
s.geo_sort_strategy(GeoSortStrategy::AlwaysRtree(1000));
let SearchResult { documents_ids, .. } = s.execute().unwrap();
let rtree_ids = collect_field_values(&index, &rtxn, "id", &documents_ids);
assert_eq!(rtree_ids.len(), 15);
for id_str in &rtree_ids[0..6] {
let id = id_str.parse::<u32>().unwrap();
assert!((6..=11).contains(&id))
}
for id_str in &rtree_ids[6..10] {
let id = id_str.parse::<u32>().unwrap();
assert!((12..=15).contains(&id))
}
let no_geo_ids = rtree_ids[10..].iter().collect_vec();
insta::assert_snapshot!(format!("{no_geo_ids:?}"), @r#"["1", "4", "3", "2", "5"]"#);
}
#[test] #[test]
fn test_geo_sort_around_the_edge_of_the_flat_earth() { fn test_geo_sort_around_the_edge_of_the_flat_earth() {
let index = create_index(); let index = create_index();

View File

@ -0,0 +1,356 @@
---
source: crates/milli/src/search/new/tests/geo_sort.rs
expression: "format!(\"{scores:#?}\")"
---
[
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: true,
value: Some(
[
2.0,
2.0,
],
),
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Number(10.0),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: true,
value: Some(
[
2.0,
2.0,
],
),
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Number(9.0),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: true,
value: Some(
[
2.0,
2.0,
],
),
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Number(8.0),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: true,
value: Some(
[
2.0,
2.0,
],
),
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Number(7.0),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: true,
value: Some(
[
2.0,
2.0,
],
),
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Number(6.0),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: true,
value: Some(
[
2.0,
2.0,
],
),
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Number(5.0),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: true,
value: Some(
[
5.0,
5.0,
],
),
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Number(10.0),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: true,
value: Some(
[
5.0,
5.0,
],
),
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Number(9.0),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: true,
value: Some(
[
5.0,
5.0,
],
),
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Number(8.0),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: true,
value: Some(
[
5.0,
5.0,
],
),
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Number(7.0),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: true,
value: None,
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Null,
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: true,
value: None,
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Null,
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: true,
value: None,
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Null,
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: true,
value: None,
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Null,
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: true,
value: None,
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Null,
},
),
],
]

View File

@ -0,0 +1,356 @@
---
source: crates/milli/src/search/new/tests/geo_sort.rs
expression: "format!(\"{scores:#?}\")"
---
[
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: false,
value: Some(
[
5.0,
5.0,
],
),
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Number(10.0),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: false,
value: Some(
[
5.0,
5.0,
],
),
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Number(9.0),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: false,
value: Some(
[
5.0,
5.0,
],
),
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Number(8.0),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: false,
value: Some(
[
5.0,
5.0,
],
),
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Number(7.0),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: false,
value: Some(
[
2.0,
2.0,
],
),
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Number(10.0),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: false,
value: Some(
[
2.0,
2.0,
],
),
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Number(9.0),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: false,
value: Some(
[
2.0,
2.0,
],
),
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Number(8.0),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: false,
value: Some(
[
2.0,
2.0,
],
),
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Number(7.0),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: false,
value: Some(
[
2.0,
2.0,
],
),
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Number(6.0),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: false,
value: Some(
[
2.0,
2.0,
],
),
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Number(5.0),
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: false,
value: None,
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Null,
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: false,
value: None,
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Null,
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: false,
value: None,
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Null,
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: false,
value: None,
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Null,
},
),
],
[
GeoSort(
GeoSort {
target_point: [
0.0,
0.0,
],
ascending: false,
value: None,
},
),
Sort(
Sort {
field_name: "score",
ascending: false,
redacted: false,
value: Null,
},
),
],
]