mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-05-14 08:14:05 +02:00
Merge pull request #5487 from HDT3213/bugfix/geosort
fix ranking rules after _geo do not work
This commit is contained in:
commit
3ec5b9d488
@ -1777,7 +1777,7 @@ async fn add_documents_with_geo_field() {
|
||||
},
|
||||
{
|
||||
"id": "4",
|
||||
"_geo": { "lat": "1", "lng": "1" },
|
||||
"_geo": { "lat": "2", "lng": "2" },
|
||||
},
|
||||
]);
|
||||
|
||||
@ -1828,8 +1828,8 @@ async fn add_documents_with_geo_field() {
|
||||
{
|
||||
"id": "4",
|
||||
"_geo": {
|
||||
"lat": "1",
|
||||
"lng": "1"
|
||||
"lat": "2",
|
||||
"lng": "2"
|
||||
}
|
||||
}
|
||||
],
|
||||
@ -1848,14 +1848,6 @@ async fn add_documents_with_geo_field() {
|
||||
@r###"
|
||||
{
|
||||
"hits": [
|
||||
{
|
||||
"id": "4",
|
||||
"_geo": {
|
||||
"lat": "1",
|
||||
"lng": "1"
|
||||
},
|
||||
"_geoDistance": 5522018
|
||||
},
|
||||
{
|
||||
"id": "3",
|
||||
"_geo": {
|
||||
@ -1864,6 +1856,14 @@ async fn add_documents_with_geo_field() {
|
||||
},
|
||||
"_geoDistance": 5522018
|
||||
},
|
||||
{
|
||||
"id": "4",
|
||||
"_geo": {
|
||||
"lat": "2",
|
||||
"lng": "2"
|
||||
},
|
||||
"_geoDistance": 5408322
|
||||
},
|
||||
{
|
||||
"id": "1"
|
||||
},
|
||||
|
@ -164,7 +164,7 @@ impl Search<'_> {
|
||||
sort_criteria: self.sort_criteria.clone(),
|
||||
distinct: self.distinct.clone(),
|
||||
searchable_attributes: self.searchable_attributes,
|
||||
geo_strategy: self.geo_strategy,
|
||||
geo_param: self.geo_param,
|
||||
terms_matching_strategy: self.terms_matching_strategy,
|
||||
scoring_strategy: ScoringStrategy::Detailed,
|
||||
words_limit: self.words_limit,
|
||||
|
@ -45,7 +45,7 @@ pub struct Search<'a> {
|
||||
sort_criteria: Option<Vec<AscDesc>>,
|
||||
distinct: Option<String>,
|
||||
searchable_attributes: Option<&'a [String]>,
|
||||
geo_strategy: new::GeoSortStrategy,
|
||||
geo_param: new::GeoSortParameter,
|
||||
terms_matching_strategy: TermsMatchingStrategy,
|
||||
scoring_strategy: ScoringStrategy,
|
||||
words_limit: usize,
|
||||
@ -68,7 +68,7 @@ impl<'a> Search<'a> {
|
||||
sort_criteria: None,
|
||||
distinct: None,
|
||||
searchable_attributes: None,
|
||||
geo_strategy: new::GeoSortStrategy::default(),
|
||||
geo_param: new::GeoSortParameter::default(),
|
||||
terms_matching_strategy: TermsMatchingStrategy::default(),
|
||||
scoring_strategy: Default::default(),
|
||||
exhaustive_number_hits: false,
|
||||
@ -145,7 +145,13 @@ impl<'a> Search<'a> {
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn geo_sort_strategy(&mut self, strategy: new::GeoSortStrategy) -> &mut Search<'a> {
|
||||
self.geo_strategy = strategy;
|
||||
self.geo_param.strategy = strategy;
|
||||
self
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub fn geo_max_bucket_size(&mut self, max_size: u64) -> &mut Search<'a> {
|
||||
self.geo_param.max_bucket_size = max_size;
|
||||
self
|
||||
}
|
||||
|
||||
@ -232,7 +238,7 @@ impl<'a> Search<'a> {
|
||||
universe,
|
||||
&self.sort_criteria,
|
||||
&self.distinct,
|
||||
self.geo_strategy,
|
||||
self.geo_param,
|
||||
self.offset,
|
||||
self.limit,
|
||||
embedder_name,
|
||||
@ -251,7 +257,7 @@ impl<'a> Search<'a> {
|
||||
universe,
|
||||
&self.sort_criteria,
|
||||
&self.distinct,
|
||||
self.geo_strategy,
|
||||
self.geo_param,
|
||||
self.offset,
|
||||
self.limit,
|
||||
Some(self.words_limit),
|
||||
@ -290,7 +296,7 @@ impl fmt::Debug for Search<'_> {
|
||||
sort_criteria,
|
||||
distinct,
|
||||
searchable_attributes,
|
||||
geo_strategy: _,
|
||||
geo_param: _,
|
||||
terms_matching_strategy,
|
||||
scoring_strategy,
|
||||
words_limit,
|
||||
|
@ -1,10 +1,8 @@
|
||||
use std::collections::VecDeque;
|
||||
use std::iter::FromIterator;
|
||||
|
||||
use heed::types::{Bytes, Unit};
|
||||
use heed::{RoPrefix, RoTxn};
|
||||
use roaring::RoaringBitmap;
|
||||
use rstar::RTree;
|
||||
use std::collections::VecDeque;
|
||||
|
||||
use super::facet_string_values;
|
||||
use super::ranking_rules::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait};
|
||||
@ -41,6 +39,21 @@ fn facet_number_values<'a>(
|
||||
Ok(iter)
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct Parameter {
|
||||
// Define the strategy used by the geo sort
|
||||
pub strategy: Strategy,
|
||||
// Limit the number of docs in a single bucket to avoid unexpectedly large overhead
|
||||
pub max_bucket_size: u64,
|
||||
// Considering the errors of GPS and geographical calculations, distances less than distance_error_margin will be treated as equal
|
||||
pub distance_error_margin: f64,
|
||||
}
|
||||
|
||||
impl Default for Parameter {
|
||||
fn default() -> Self {
|
||||
Self { strategy: Strategy::default(), max_bucket_size: 1000, distance_error_margin: 1.0 }
|
||||
}
|
||||
}
|
||||
/// Define the strategy used by the geo sort.
|
||||
/// The parameter represents the cache size, and, in the case of the Dynamic strategy,
|
||||
/// the point where we move from using the iterative strategy to the rtree.
|
||||
@ -84,15 +97,21 @@ pub struct GeoSort<Q: RankingRuleQueryTrait> {
|
||||
|
||||
cached_sorted_docids: VecDeque<(u32, [f64; 2])>,
|
||||
geo_candidates: RoaringBitmap,
|
||||
|
||||
// Limit the number of docs in a single bucket to avoid unexpectedly large overhead
|
||||
max_bucket_size: u64,
|
||||
// Considering the errors of GPS and geographical calculations, distances less than distance_error_margin will be treated as equal
|
||||
distance_error_margin: f64,
|
||||
}
|
||||
|
||||
impl<Q: RankingRuleQueryTrait> GeoSort<Q> {
|
||||
pub fn new(
|
||||
strategy: Strategy,
|
||||
parameter: Parameter,
|
||||
geo_faceted_docids: RoaringBitmap,
|
||||
point: [f64; 2],
|
||||
ascending: bool,
|
||||
) -> Result<Self> {
|
||||
let Parameter { strategy, max_bucket_size, distance_error_margin } = parameter;
|
||||
Ok(Self {
|
||||
query: None,
|
||||
strategy,
|
||||
@ -102,6 +121,8 @@ impl<Q: RankingRuleQueryTrait> GeoSort<Q> {
|
||||
field_ids: None,
|
||||
rtree: None,
|
||||
cached_sorted_docids: VecDeque::new(),
|
||||
max_bucket_size,
|
||||
distance_error_margin,
|
||||
})
|
||||
}
|
||||
|
||||
@ -240,12 +261,12 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort<Q> {
|
||||
fn next_bucket(
|
||||
&mut self,
|
||||
ctx: &mut SearchContext<'ctx>,
|
||||
logger: &mut dyn SearchLogger<Q>,
|
||||
_logger: &mut dyn SearchLogger<Q>,
|
||||
universe: &RoaringBitmap,
|
||||
) -> Result<Option<RankingRuleOutput<Q>>> {
|
||||
let query = self.query.as_ref().unwrap().clone();
|
||||
|
||||
let geo_candidates = &self.geo_candidates & universe;
|
||||
let mut geo_candidates = &self.geo_candidates & universe;
|
||||
|
||||
if geo_candidates.is_empty() {
|
||||
return Ok(Some(RankingRuleOutput {
|
||||
@ -267,24 +288,102 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort<Q> {
|
||||
cache.pop_back()
|
||||
}
|
||||
};
|
||||
while let Some((id, point)) = next(&mut self.cached_sorted_docids) {
|
||||
let put_back = |cache: &mut VecDeque<_>, x: _| {
|
||||
if ascending {
|
||||
cache.push_front(x)
|
||||
} else {
|
||||
cache.push_back(x)
|
||||
}
|
||||
};
|
||||
|
||||
let mut current_bucket = RoaringBitmap::new();
|
||||
// current_distance stores the first point and distance in current bucket
|
||||
let mut current_distance: Option<([f64; 2], f64)> = None;
|
||||
loop {
|
||||
// The loop will only exit when we have found all points with equal distance or have exhausted the candidates.
|
||||
if let Some((id, point)) = next(&mut self.cached_sorted_docids) {
|
||||
if geo_candidates.contains(id) {
|
||||
let distance = distance_between_two_points(&self.point, &point);
|
||||
if let Some((point0, bucket_distance)) = current_distance.as_ref() {
|
||||
if (bucket_distance - distance).abs() > self.distance_error_margin {
|
||||
// different distance, point belongs to next bucket
|
||||
put_back(&mut self.cached_sorted_docids, (id, point));
|
||||
return Ok(Some(RankingRuleOutput {
|
||||
query,
|
||||
candidates: RoaringBitmap::from_iter([id]),
|
||||
candidates: current_bucket,
|
||||
score: ScoreDetails::GeoSort(score_details::GeoSort {
|
||||
target_point: self.point,
|
||||
ascending: self.ascending,
|
||||
value: Some(point),
|
||||
value: Some(point0.to_owned()),
|
||||
}),
|
||||
}));
|
||||
} else {
|
||||
// same distance, point belongs to current bucket
|
||||
current_bucket.insert(id);
|
||||
// remove from cadidates to prevent it from being added to the cache again
|
||||
geo_candidates.remove(id);
|
||||
// current bucket size reaches limit, force return
|
||||
if current_bucket.len() == self.max_bucket_size {
|
||||
return Ok(Some(RankingRuleOutput {
|
||||
query,
|
||||
candidates: current_bucket,
|
||||
score: ScoreDetails::GeoSort(score_details::GeoSort {
|
||||
target_point: self.point,
|
||||
ascending: self.ascending,
|
||||
value: Some(point0.to_owned()),
|
||||
}),
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
||||
// if we got out of this loop it means we've exhausted our cache.
|
||||
// we need to refill it and run the function again.
|
||||
} else {
|
||||
// first doc in current bucket
|
||||
current_distance = Some((point, distance));
|
||||
current_bucket.insert(id);
|
||||
geo_candidates.remove(id);
|
||||
// current bucket size reaches limit, force return
|
||||
if current_bucket.len() == self.max_bucket_size {
|
||||
return Ok(Some(RankingRuleOutput {
|
||||
query,
|
||||
candidates: current_bucket,
|
||||
score: ScoreDetails::GeoSort(score_details::GeoSort {
|
||||
target_point: self.point,
|
||||
ascending: self.ascending,
|
||||
value: Some(point.to_owned()),
|
||||
}),
|
||||
}));
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// cache exhausted, we need to refill it
|
||||
self.fill_buffer(ctx, &geo_candidates)?;
|
||||
self.next_bucket(ctx, logger, universe)
|
||||
|
||||
if self.cached_sorted_docids.is_empty() {
|
||||
// candidates exhausted, exit
|
||||
if let Some((point0, _)) = current_distance.as_ref() {
|
||||
return Ok(Some(RankingRuleOutput {
|
||||
query,
|
||||
candidates: current_bucket,
|
||||
score: ScoreDetails::GeoSort(score_details::GeoSort {
|
||||
target_point: self.point,
|
||||
ascending: self.ascending,
|
||||
value: Some(point0.to_owned()),
|
||||
}),
|
||||
}));
|
||||
} else {
|
||||
return Ok(Some(RankingRuleOutput {
|
||||
query,
|
||||
candidates: universe.clone(),
|
||||
score: ScoreDetails::GeoSort(score_details::GeoSort {
|
||||
target_point: self.point,
|
||||
ascending: self.ascending,
|
||||
value: None,
|
||||
}),
|
||||
}));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "trace", skip_all, target = "search::geo_sort")]
|
||||
|
@ -513,7 +513,7 @@ mod tests {
|
||||
universe,
|
||||
&None,
|
||||
&None,
|
||||
crate::search::new::GeoSortStrategy::default(),
|
||||
crate::search::new::GeoSortParameter::default(),
|
||||
0,
|
||||
100,
|
||||
Some(10),
|
||||
|
@ -45,6 +45,7 @@ use sort::Sort;
|
||||
|
||||
use self::distinct::facet_string_values;
|
||||
use self::geo_sort::GeoSort;
|
||||
pub use self::geo_sort::Parameter as GeoSortParameter;
|
||||
pub use self::geo_sort::Strategy as GeoSortStrategy;
|
||||
use self::graph_based_ranking_rule::Words;
|
||||
use self::interner::Interned;
|
||||
@ -274,7 +275,7 @@ fn resolve_negative_phrases(
|
||||
fn get_ranking_rules_for_placeholder_search<'ctx>(
|
||||
ctx: &SearchContext<'ctx>,
|
||||
sort_criteria: &Option<Vec<AscDesc>>,
|
||||
geo_strategy: geo_sort::Strategy,
|
||||
geo_param: geo_sort::Parameter,
|
||||
) -> Result<Vec<BoxRankingRule<'ctx, PlaceholderQuery>>> {
|
||||
let mut sort = false;
|
||||
let mut sorted_fields = HashSet::new();
|
||||
@ -299,7 +300,7 @@ fn get_ranking_rules_for_placeholder_search<'ctx>(
|
||||
&mut ranking_rules,
|
||||
&mut sorted_fields,
|
||||
&mut geo_sorted,
|
||||
geo_strategy,
|
||||
geo_param,
|
||||
)?;
|
||||
sort = true;
|
||||
}
|
||||
@ -326,7 +327,7 @@ fn get_ranking_rules_for_placeholder_search<'ctx>(
|
||||
fn get_ranking_rules_for_vector<'ctx>(
|
||||
ctx: &SearchContext<'ctx>,
|
||||
sort_criteria: &Option<Vec<AscDesc>>,
|
||||
geo_strategy: geo_sort::Strategy,
|
||||
geo_param: geo_sort::Parameter,
|
||||
limit_plus_offset: usize,
|
||||
target: &[f32],
|
||||
embedder_name: &str,
|
||||
@ -375,7 +376,7 @@ fn get_ranking_rules_for_vector<'ctx>(
|
||||
&mut ranking_rules,
|
||||
&mut sorted_fields,
|
||||
&mut geo_sorted,
|
||||
geo_strategy,
|
||||
geo_param,
|
||||
)?;
|
||||
sort = true;
|
||||
}
|
||||
@ -403,7 +404,7 @@ fn get_ranking_rules_for_vector<'ctx>(
|
||||
fn get_ranking_rules_for_query_graph_search<'ctx>(
|
||||
ctx: &SearchContext<'ctx>,
|
||||
sort_criteria: &Option<Vec<AscDesc>>,
|
||||
geo_strategy: geo_sort::Strategy,
|
||||
geo_param: geo_sort::Parameter,
|
||||
terms_matching_strategy: TermsMatchingStrategy,
|
||||
) -> Result<Vec<BoxRankingRule<'ctx, QueryGraph>>> {
|
||||
// query graph search
|
||||
@ -477,7 +478,7 @@ fn get_ranking_rules_for_query_graph_search<'ctx>(
|
||||
&mut ranking_rules,
|
||||
&mut sorted_fields,
|
||||
&mut geo_sorted,
|
||||
geo_strategy,
|
||||
geo_param,
|
||||
)?;
|
||||
sort = true;
|
||||
}
|
||||
@ -514,7 +515,7 @@ fn resolve_sort_criteria<'ctx, Query: RankingRuleQueryTrait>(
|
||||
ranking_rules: &mut Vec<BoxRankingRule<'ctx, Query>>,
|
||||
sorted_fields: &mut HashSet<String>,
|
||||
geo_sorted: &mut bool,
|
||||
geo_strategy: geo_sort::Strategy,
|
||||
geo_param: geo_sort::Parameter,
|
||||
) -> Result<()> {
|
||||
let sort_criteria = sort_criteria.clone().unwrap_or_default();
|
||||
ranking_rules.reserve(sort_criteria.len());
|
||||
@ -540,7 +541,7 @@ fn resolve_sort_criteria<'ctx, Query: RankingRuleQueryTrait>(
|
||||
}
|
||||
let geo_faceted_docids = ctx.index.geo_faceted_documents_ids(ctx.txn)?;
|
||||
ranking_rules.push(Box::new(GeoSort::new(
|
||||
geo_strategy,
|
||||
geo_param,
|
||||
geo_faceted_docids,
|
||||
point,
|
||||
true,
|
||||
@ -552,7 +553,7 @@ fn resolve_sort_criteria<'ctx, Query: RankingRuleQueryTrait>(
|
||||
}
|
||||
let geo_faceted_docids = ctx.index.geo_faceted_documents_ids(ctx.txn)?;
|
||||
ranking_rules.push(Box::new(GeoSort::new(
|
||||
geo_strategy,
|
||||
geo_param,
|
||||
geo_faceted_docids,
|
||||
point,
|
||||
false,
|
||||
@ -584,7 +585,7 @@ pub fn execute_vector_search(
|
||||
universe: RoaringBitmap,
|
||||
sort_criteria: &Option<Vec<AscDesc>>,
|
||||
distinct: &Option<String>,
|
||||
geo_strategy: geo_sort::Strategy,
|
||||
geo_param: geo_sort::Parameter,
|
||||
from: usize,
|
||||
length: usize,
|
||||
embedder_name: &str,
|
||||
@ -600,7 +601,7 @@ pub fn execute_vector_search(
|
||||
let ranking_rules = get_ranking_rules_for_vector(
|
||||
ctx,
|
||||
sort_criteria,
|
||||
geo_strategy,
|
||||
geo_param,
|
||||
from + length,
|
||||
vector,
|
||||
embedder_name,
|
||||
@ -647,7 +648,7 @@ pub fn execute_search(
|
||||
mut universe: RoaringBitmap,
|
||||
sort_criteria: &Option<Vec<AscDesc>>,
|
||||
distinct: &Option<String>,
|
||||
geo_strategy: geo_sort::Strategy,
|
||||
geo_param: geo_sort::Parameter,
|
||||
from: usize,
|
||||
length: usize,
|
||||
words_limit: Option<usize>,
|
||||
@ -761,7 +762,7 @@ pub fn execute_search(
|
||||
let ranking_rules = get_ranking_rules_for_query_graph_search(
|
||||
ctx,
|
||||
sort_criteria,
|
||||
geo_strategy,
|
||||
geo_param,
|
||||
terms_matching_strategy,
|
||||
)?;
|
||||
|
||||
@ -783,7 +784,7 @@ pub fn execute_search(
|
||||
)?
|
||||
} else {
|
||||
let ranking_rules =
|
||||
get_ranking_rules_for_placeholder_search(ctx, sort_criteria, geo_strategy)?;
|
||||
get_ranking_rules_for_placeholder_search(ctx, sort_criteria, geo_param)?;
|
||||
bucket_sort(
|
||||
ctx,
|
||||
ranking_rules,
|
||||
|
@ -4,6 +4,7 @@ This module tests the `geo_sort` ranking rule
|
||||
|
||||
use big_s::S;
|
||||
use heed::RoTxn;
|
||||
use itertools::Itertools;
|
||||
use maplit::hashset;
|
||||
|
||||
use crate::constants::RESERVED_GEO_FIELD_NAME;
|
||||
@ -18,7 +19,7 @@ fn create_index() -> TempIndex {
|
||||
index
|
||||
.update_settings(|s| {
|
||||
s.set_primary_key("id".to_owned());
|
||||
s.set_sortable_fields(hashset! { S(RESERVED_GEO_FIELD_NAME) });
|
||||
s.set_sortable_fields(hashset! { S(RESERVED_GEO_FIELD_NAME), S("score") });
|
||||
s.set_criteria(vec![Criterion::Words, Criterion::Sort]);
|
||||
})
|
||||
.unwrap();
|
||||
@ -95,6 +96,112 @@ fn test_geo_sort() {
|
||||
insta::assert_snapshot!(format!("{scores:#?}"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_geo_sort_with_following_ranking_rules() {
|
||||
let index = create_index();
|
||||
|
||||
index
|
||||
.add_documents(documents!([
|
||||
{ "id": 1 }, { "id": 4 }, { "id": 3 }, { "id": 2 }, { "id": 5 },
|
||||
{ "id": 6, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score": 10 },
|
||||
{ "id": 7, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score": 9 },
|
||||
{ "id": 8, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score": 8 },
|
||||
{ "id": 9, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score": 7 },
|
||||
{ "id": 10, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score":6 },
|
||||
{ "id": 11, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score": 5 },
|
||||
{ "id": 12, RESERVED_GEO_FIELD_NAME: { "lat": 5, "lng": 5 }, "score": 10 },
|
||||
{ "id": 13, RESERVED_GEO_FIELD_NAME: { "lat": 5, "lng": 5 }, "score": 9 },
|
||||
{ "id": 14, RESERVED_GEO_FIELD_NAME: { "lat": 5, "lng": 5 }, "score": 8 },
|
||||
{ "id": 15, RESERVED_GEO_FIELD_NAME: { "lat": 5, "lng": 5 }, "score": 7 },
|
||||
]))
|
||||
.unwrap();
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = Search::new(&rtxn, &index);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
s.sort_criteria(vec![
|
||||
AscDesc::Asc(Member::Geo([0., 0.])),
|
||||
AscDesc::Desc(Member::Field("score".to_string())),
|
||||
]);
|
||||
let (ids, scores) = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s);
|
||||
insta::assert_snapshot!(format!("{ids:?}"), @"[6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 4, 3, 2, 5]");
|
||||
insta::assert_snapshot!(format!("{scores:#?}"));
|
||||
|
||||
s.sort_criteria(vec![
|
||||
AscDesc::Desc(Member::Geo([0., 0.])),
|
||||
AscDesc::Desc(Member::Field("score".to_string())),
|
||||
]);
|
||||
let (ids, scores) = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s);
|
||||
insta::assert_snapshot!(format!("{ids:?}"), @"[12, 13, 14, 15, 6, 7, 8, 9, 10, 11, 1, 4, 3, 2, 5]");
|
||||
insta::assert_snapshot!(format!("{scores:#?}"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_geo_sort_reached_max_bucket_size() {
|
||||
let index = create_index();
|
||||
|
||||
index
|
||||
.add_documents(documents!([
|
||||
{ "id": 1 }, { "id": 4 }, { "id": 3 }, { "id": 2 }, { "id": 5 },
|
||||
{ "id": 6, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score": 10 },
|
||||
{ "id": 7, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score": 9 },
|
||||
{ "id": 8, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score": 8 },
|
||||
{ "id": 9, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score": 7 },
|
||||
{ "id": 10, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score":6 },
|
||||
{ "id": 11, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score": 5 },
|
||||
{ "id": 12, RESERVED_GEO_FIELD_NAME: { "lat": 5, "lng": 5 }, "score": 10 },
|
||||
{ "id": 13, RESERVED_GEO_FIELD_NAME: { "lat": 5, "lng": 5 }, "score": 9 },
|
||||
{ "id": 14, RESERVED_GEO_FIELD_NAME: { "lat": 5, "lng": 5 }, "score": 8 },
|
||||
{ "id": 15, RESERVED_GEO_FIELD_NAME: { "lat": 5, "lng": 5 }, "score": 7 },
|
||||
]))
|
||||
.unwrap();
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = Search::new(&rtxn, &index);
|
||||
s.geo_max_bucket_size(2);
|
||||
s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed);
|
||||
s.sort_criteria(vec![
|
||||
AscDesc::Asc(Member::Geo([0., 0.])),
|
||||
AscDesc::Desc(Member::Field("score".to_string())),
|
||||
]);
|
||||
|
||||
/* We should not expect the results to obey the following ranking rules when the bucket size limit is reached,
|
||||
* nor should we expect Iteration and rtree to give exactly the same order for the same bucket in this case.*/
|
||||
s.geo_sort_strategy(GeoSortStrategy::AlwaysIterative(1000));
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
let iterative_ids = collect_field_values(&index, &rtxn, "id", &documents_ids);
|
||||
|
||||
assert_eq!(iterative_ids.len(), 15);
|
||||
for id_str in &iterative_ids[0..6] {
|
||||
let id = id_str.parse::<u32>().unwrap();
|
||||
assert!((6..=11).contains(&id))
|
||||
}
|
||||
for id_str in &iterative_ids[6..10] {
|
||||
let id = id_str.parse::<u32>().unwrap();
|
||||
assert!((12..=15).contains(&id))
|
||||
}
|
||||
let no_geo_ids = iterative_ids[10..].iter().collect_vec();
|
||||
insta::assert_snapshot!(format!("{no_geo_ids:?}"), @r#"["1", "4", "3", "2", "5"]"#);
|
||||
|
||||
s.geo_sort_strategy(GeoSortStrategy::AlwaysRtree(1000));
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
let rtree_ids = collect_field_values(&index, &rtxn, "id", &documents_ids);
|
||||
|
||||
assert_eq!(rtree_ids.len(), 15);
|
||||
for id_str in &rtree_ids[0..6] {
|
||||
let id = id_str.parse::<u32>().unwrap();
|
||||
assert!((6..=11).contains(&id))
|
||||
}
|
||||
for id_str in &rtree_ids[6..10] {
|
||||
let id = id_str.parse::<u32>().unwrap();
|
||||
assert!((12..=15).contains(&id))
|
||||
}
|
||||
let no_geo_ids = rtree_ids[10..].iter().collect_vec();
|
||||
insta::assert_snapshot!(format!("{no_geo_ids:?}"), @r#"["1", "4", "3", "2", "5"]"#);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_geo_sort_around_the_edge_of_the_flat_earth() {
|
||||
let index = create_index();
|
||||
|
@ -0,0 +1,356 @@
|
||||
---
|
||||
source: crates/milli/src/search/new/tests/geo_sort.rs
|
||||
expression: "format!(\"{scores:#?}\")"
|
||||
---
|
||||
[
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: true,
|
||||
value: Some(
|
||||
[
|
||||
2.0,
|
||||
2.0,
|
||||
],
|
||||
),
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Number(10.0),
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: true,
|
||||
value: Some(
|
||||
[
|
||||
2.0,
|
||||
2.0,
|
||||
],
|
||||
),
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Number(9.0),
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: true,
|
||||
value: Some(
|
||||
[
|
||||
2.0,
|
||||
2.0,
|
||||
],
|
||||
),
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Number(8.0),
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: true,
|
||||
value: Some(
|
||||
[
|
||||
2.0,
|
||||
2.0,
|
||||
],
|
||||
),
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Number(7.0),
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: true,
|
||||
value: Some(
|
||||
[
|
||||
2.0,
|
||||
2.0,
|
||||
],
|
||||
),
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Number(6.0),
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: true,
|
||||
value: Some(
|
||||
[
|
||||
2.0,
|
||||
2.0,
|
||||
],
|
||||
),
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Number(5.0),
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: true,
|
||||
value: Some(
|
||||
[
|
||||
5.0,
|
||||
5.0,
|
||||
],
|
||||
),
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Number(10.0),
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: true,
|
||||
value: Some(
|
||||
[
|
||||
5.0,
|
||||
5.0,
|
||||
],
|
||||
),
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Number(9.0),
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: true,
|
||||
value: Some(
|
||||
[
|
||||
5.0,
|
||||
5.0,
|
||||
],
|
||||
),
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Number(8.0),
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: true,
|
||||
value: Some(
|
||||
[
|
||||
5.0,
|
||||
5.0,
|
||||
],
|
||||
),
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Number(7.0),
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: true,
|
||||
value: None,
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Null,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: true,
|
||||
value: None,
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Null,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: true,
|
||||
value: None,
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Null,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: true,
|
||||
value: None,
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Null,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: true,
|
||||
value: None,
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Null,
|
||||
},
|
||||
),
|
||||
],
|
||||
]
|
@ -0,0 +1,356 @@
|
||||
---
|
||||
source: crates/milli/src/search/new/tests/geo_sort.rs
|
||||
expression: "format!(\"{scores:#?}\")"
|
||||
---
|
||||
[
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: false,
|
||||
value: Some(
|
||||
[
|
||||
5.0,
|
||||
5.0,
|
||||
],
|
||||
),
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Number(10.0),
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: false,
|
||||
value: Some(
|
||||
[
|
||||
5.0,
|
||||
5.0,
|
||||
],
|
||||
),
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Number(9.0),
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: false,
|
||||
value: Some(
|
||||
[
|
||||
5.0,
|
||||
5.0,
|
||||
],
|
||||
),
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Number(8.0),
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: false,
|
||||
value: Some(
|
||||
[
|
||||
5.0,
|
||||
5.0,
|
||||
],
|
||||
),
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Number(7.0),
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: false,
|
||||
value: Some(
|
||||
[
|
||||
2.0,
|
||||
2.0,
|
||||
],
|
||||
),
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Number(10.0),
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: false,
|
||||
value: Some(
|
||||
[
|
||||
2.0,
|
||||
2.0,
|
||||
],
|
||||
),
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Number(9.0),
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: false,
|
||||
value: Some(
|
||||
[
|
||||
2.0,
|
||||
2.0,
|
||||
],
|
||||
),
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Number(8.0),
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: false,
|
||||
value: Some(
|
||||
[
|
||||
2.0,
|
||||
2.0,
|
||||
],
|
||||
),
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Number(7.0),
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: false,
|
||||
value: Some(
|
||||
[
|
||||
2.0,
|
||||
2.0,
|
||||
],
|
||||
),
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Number(6.0),
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: false,
|
||||
value: Some(
|
||||
[
|
||||
2.0,
|
||||
2.0,
|
||||
],
|
||||
),
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Number(5.0),
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: false,
|
||||
value: None,
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Null,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: false,
|
||||
value: None,
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Null,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: false,
|
||||
value: None,
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Null,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: false,
|
||||
value: None,
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Null,
|
||||
},
|
||||
),
|
||||
],
|
||||
[
|
||||
GeoSort(
|
||||
GeoSort {
|
||||
target_point: [
|
||||
0.0,
|
||||
0.0,
|
||||
],
|
||||
ascending: false,
|
||||
value: None,
|
||||
},
|
||||
),
|
||||
Sort(
|
||||
Sort {
|
||||
field_name: "score",
|
||||
ascending: false,
|
||||
redacted: false,
|
||||
value: Null,
|
||||
},
|
||||
),
|
||||
],
|
||||
]
|
Loading…
x
Reference in New Issue
Block a user