From e4733dcd4273b017ab9bc89965e847ceeed86a05 Mon Sep 17 00:00:00 2001 From: hdt3213 Date: Wed, 2 Apr 2025 20:09:51 +0800 Subject: [PATCH 01/10] fix ranking rules after _geo do not work --- crates/milli/src/search/new/geo_sort.rs | 91 +++++++++++++++++++------ 1 file changed, 72 insertions(+), 19 deletions(-) diff --git a/crates/milli/src/search/new/geo_sort.rs b/crates/milli/src/search/new/geo_sort.rs index 789eaaafc..06a6f6ae5 100644 --- a/crates/milli/src/search/new/geo_sort.rs +++ b/crates/milli/src/search/new/geo_sort.rs @@ -1,6 +1,4 @@ use std::collections::VecDeque; -use std::iter::FromIterator; - use heed::types::{Bytes, Unit}; use heed::{RoPrefix, RoTxn}; use roaring::RoaringBitmap; @@ -245,7 +243,7 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort { ) -> Result>> { let query = self.query.as_ref().unwrap().clone(); - let geo_candidates = &self.geo_candidates & universe; + let mut geo_candidates = &self.geo_candidates & universe; if geo_candidates.is_empty() { return Ok(Some(RankingRuleOutput { @@ -267,24 +265,79 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort { cache.pop_back() } }; - while let Some((id, point)) = next(&mut self.cached_sorted_docids) { - if geo_candidates.contains(id) { - return Ok(Some(RankingRuleOutput { - query, - candidates: RoaringBitmap::from_iter([id]), - score: ScoreDetails::GeoSort(score_details::GeoSort { - target_point: self.point, - ascending: self.ascending, - value: Some(point), - }), - })); + let put_back = |cache: &mut VecDeque<_>, x: _| { + if ascending { + cache.push_front(x) + } else { + cache.push_back(x) + } + }; + + let mut current_bucket = RoaringBitmap::new(); + // current_distance stores the first point and distance in current bucket + // The farthest distance between two points on earth is about 2e7 meters, u32 is big enough to hold any distance. + let mut current_distance: Option<([f64; 2], u32)> = None; + loop { + // The loop will only exit when we have found all points with equal distance or have exhausted the candidates. + if let Some((id, point)) = next(&mut self.cached_sorted_docids) { + if geo_candidates.contains(id) { + let distance = distance_between_two_points(&self.point, &point).round() as u32; + if let Some((point0, bucket_distance)) = current_distance.as_ref() { + if bucket_distance != &distance { + // different distance, point belongs to next bucket + put_back(&mut self.cached_sorted_docids, (id, point)); + return Ok(Some(RankingRuleOutput { + query, + candidates: current_bucket, + score: ScoreDetails::GeoSort(score_details::GeoSort { + target_point: self.point, + ascending: self.ascending, + value: Some(point0.to_owned()), + }), + })); + } else { + // same distance, point belongs to current bucket + current_bucket.push(id); + // remove from cadidates to prevent it from being added to the cache again + geo_candidates.remove(id); + } + } else { + // first doc in current bucket + current_distance = Some((point, distance)); + current_bucket.push(id); + geo_candidates.remove(id); + } + } + } else { + // cache exhausted, we need to refill it + self.fill_buffer(ctx, &geo_candidates)?; + + if self.cached_sorted_docids.is_empty() { + // candidates exhausted, exit + if let Some((point0, _)) = current_distance.as_ref() { + return Ok(Some(RankingRuleOutput { + query, + candidates: current_bucket, + score: ScoreDetails::GeoSort(score_details::GeoSort { + target_point: self.point, + ascending: self.ascending, + value: Some(point0.to_owned()), + }), + })); + } else { + return Ok(Some(RankingRuleOutput { + query, + candidates: universe.clone(), + score: ScoreDetails::GeoSort(score_details::GeoSort { + target_point: self.point, + ascending: self.ascending, + value: None, + }), + })); + } + } } } - - // if we got out of this loop it means we've exhausted our cache. - // we need to refill it and run the function again. - self.fill_buffer(ctx, &geo_candidates)?; - self.next_bucket(ctx, logger, universe) } #[tracing::instrument(level = "trace", skip_all, target = "search::geo_sort")] From 326a728434945ced1502f11f711bc90f80d57e23 Mon Sep 17 00:00:00 2001 From: hdt3213 Date: Wed, 2 Apr 2025 20:33:42 +0800 Subject: [PATCH 02/10] fix code style --- crates/milli/src/search/new/geo_sort.rs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/crates/milli/src/search/new/geo_sort.rs b/crates/milli/src/search/new/geo_sort.rs index 06a6f6ae5..1a2bd2edc 100644 --- a/crates/milli/src/search/new/geo_sort.rs +++ b/crates/milli/src/search/new/geo_sort.rs @@ -1,8 +1,8 @@ -use std::collections::VecDeque; use heed::types::{Bytes, Unit}; use heed::{RoPrefix, RoTxn}; use roaring::RoaringBitmap; use rstar::RTree; +use std::collections::VecDeque; use super::facet_string_values; use super::ranking_rules::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait}; @@ -238,7 +238,7 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort { fn next_bucket( &mut self, ctx: &mut SearchContext<'ctx>, - logger: &mut dyn SearchLogger, + _logger: &mut dyn SearchLogger, universe: &RoaringBitmap, ) -> Result>> { let query = self.query.as_ref().unwrap().clone(); @@ -299,13 +299,13 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort { // same distance, point belongs to current bucket current_bucket.push(id); // remove from cadidates to prevent it from being added to the cache again - geo_candidates.remove(id); + geo_candidates.remove(id); } } else { // first doc in current bucket current_distance = Some((point, distance)); current_bucket.push(id); - geo_candidates.remove(id); + geo_candidates.remove(id); } } } else { From 0f07cfed143fd67e797fd4fe2fdaf6cabdc6b6b0 Mon Sep 17 00:00:00 2001 From: hdt3213 Date: Fri, 4 Apr 2025 16:35:34 +0800 Subject: [PATCH 03/10] GeoSort support max_bucket_size and distance_error_margin configuration --- crates/milli/src/search/new/geo_sort.rs | 42 ++- crates/milli/src/search/new/tests/geo_sort.rs | 37 +- ...o_sort_with_following_ranking_rules-2.snap | 356 ++++++++++++++++++ ...o_sort_with_following_ranking_rules-4.snap | 356 ++++++++++++++++++ 4 files changed, 784 insertions(+), 7 deletions(-) create mode 100644 crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_with_following_ranking_rules-2.snap create mode 100644 crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_with_following_ranking_rules-4.snap diff --git a/crates/milli/src/search/new/geo_sort.rs b/crates/milli/src/search/new/geo_sort.rs index 1a2bd2edc..ca5a4ab8b 100644 --- a/crates/milli/src/search/new/geo_sort.rs +++ b/crates/milli/src/search/new/geo_sort.rs @@ -82,6 +82,11 @@ pub struct GeoSort { cached_sorted_docids: VecDeque<(u32, [f64; 2])>, geo_candidates: RoaringBitmap, + + // Limit the number of docs in a single bucket to avoid unexpectedly large overhead + max_bucket_size: u64, + // Considering the errors of GPS and geographical calculations, distances less than distance_error_margin will be treated as equal + distance_error_margin: f64, } impl GeoSort { @@ -100,6 +105,8 @@ impl GeoSort { field_ids: None, rtree: None, cached_sorted_docids: VecDeque::new(), + max_bucket_size: 1000, + distance_error_margin: 1.0, }) } @@ -275,15 +282,14 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort { let mut current_bucket = RoaringBitmap::new(); // current_distance stores the first point and distance in current bucket - // The farthest distance between two points on earth is about 2e7 meters, u32 is big enough to hold any distance. - let mut current_distance: Option<([f64; 2], u32)> = None; + let mut current_distance: Option<([f64; 2], f64)> = None; loop { // The loop will only exit when we have found all points with equal distance or have exhausted the candidates. if let Some((id, point)) = next(&mut self.cached_sorted_docids) { if geo_candidates.contains(id) { - let distance = distance_between_two_points(&self.point, &point).round() as u32; + let distance = distance_between_two_points(&self.point, &point); if let Some((point0, bucket_distance)) = current_distance.as_ref() { - if bucket_distance != &distance { + if (bucket_distance - &distance).abs() > self.distance_error_margin { // different distance, point belongs to next bucket put_back(&mut self.cached_sorted_docids, (id, point)); return Ok(Some(RankingRuleOutput { @@ -297,15 +303,39 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort { })); } else { // same distance, point belongs to current bucket - current_bucket.push(id); + current_bucket.insert(id); // remove from cadidates to prevent it from being added to the cache again geo_candidates.remove(id); + // current bucket size reaches limit, force return + if current_bucket.len() == self.max_bucket_size { + return Ok(Some(RankingRuleOutput { + query, + candidates: current_bucket, + score: ScoreDetails::GeoSort(score_details::GeoSort { + target_point: self.point, + ascending: self.ascending, + value: Some(point0.to_owned()), + }), + })); + } } } else { // first doc in current bucket current_distance = Some((point, distance)); - current_bucket.push(id); + current_bucket.insert(id); geo_candidates.remove(id); + // current bucket size reaches limit, force return + if current_bucket.len() == self.max_bucket_size { + return Ok(Some(RankingRuleOutput { + query, + candidates: current_bucket, + score: ScoreDetails::GeoSort(score_details::GeoSort { + target_point: self.point, + ascending: self.ascending, + value: Some(point.to_owned()), + }), + })); + } } } } else { diff --git a/crates/milli/src/search/new/tests/geo_sort.rs b/crates/milli/src/search/new/tests/geo_sort.rs index 2eda39ba1..3d89f5d2f 100644 --- a/crates/milli/src/search/new/tests/geo_sort.rs +++ b/crates/milli/src/search/new/tests/geo_sort.rs @@ -18,7 +18,7 @@ fn create_index() -> TempIndex { index .update_settings(|s| { s.set_primary_key("id".to_owned()); - s.set_sortable_fields(hashset! { S(RESERVED_GEO_FIELD_NAME) }); + s.set_sortable_fields(hashset! { S(RESERVED_GEO_FIELD_NAME), S("score") }); s.set_criteria(vec![Criterion::Words, Criterion::Sort]); }) .unwrap(); @@ -95,6 +95,41 @@ fn test_geo_sort() { insta::assert_snapshot!(format!("{scores:#?}")); } +#[test] +fn test_geo_sort_with_following_ranking_rules() { + let index = create_index(); + + index + .add_documents(documents!([ + { "id": 1 }, { "id": 4 }, { "id": 3 }, { "id": 2 }, { "id": 5 }, + { "id": 6, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score": 10 }, + { "id": 7, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score": 9 }, + { "id": 8, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score": 8 }, + { "id": 9, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score": 7 }, + { "id": 10, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score":6 }, + { "id": 11, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score": 5 }, + { "id": 12, RESERVED_GEO_FIELD_NAME: { "lat": 5, "lng": 5 }, "score": 10 }, + { "id": 13, RESERVED_GEO_FIELD_NAME: { "lat": 5, "lng": 5 }, "score": 9 }, + { "id": 14, RESERVED_GEO_FIELD_NAME: { "lat": 5, "lng": 5 }, "score": 8 }, + { "id": 15, RESERVED_GEO_FIELD_NAME: { "lat": 5, "lng": 5 }, "score": 7 }, + ])) + .unwrap(); + + let rtxn = index.read_txn().unwrap(); + + let mut s = Search::new(&rtxn, &index); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); + s.sort_criteria(vec![AscDesc::Asc(Member::Geo([0., 0.])), AscDesc::Desc(Member::Field("score".to_string()))]); + let (ids, scores) = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s); + insta::assert_snapshot!(format!("{ids:?}"), @"[6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 4, 3, 2, 5]"); + insta::assert_snapshot!(format!("{scores:#?}")); + + s.sort_criteria(vec![AscDesc::Desc(Member::Geo([0., 0.])), AscDesc::Desc(Member::Field("score".to_string()))]); + let (ids, scores) = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s); + insta::assert_snapshot!(format!("{ids:?}"), @"[12, 13, 14, 15, 6, 7, 8, 9, 10, 11, 1, 4, 3, 2, 5]"); + insta::assert_snapshot!(format!("{scores:#?}")); +} + #[test] fn test_geo_sort_around_the_edge_of_the_flat_earth() { let index = create_index(); diff --git a/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_with_following_ranking_rules-2.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_with_following_ranking_rules-2.snap new file mode 100644 index 000000000..b8b6e33e3 --- /dev/null +++ b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_with_following_ranking_rules-2.snap @@ -0,0 +1,356 @@ +--- +source: crates/milli/src/search/new/tests/geo_sort.rs +expression: "format!(\"{scores:#?}\")" +--- +[ + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: true, + value: Some( + [ + 2.0, + 2.0, + ], + ), + }, + ), + Sort( + Sort { + field_name: "score", + ascending: false, + redacted: false, + value: Number(10.0), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: true, + value: Some( + [ + 2.0, + 2.0, + ], + ), + }, + ), + Sort( + Sort { + field_name: "score", + ascending: false, + redacted: false, + value: Number(9.0), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: true, + value: Some( + [ + 2.0, + 2.0, + ], + ), + }, + ), + Sort( + Sort { + field_name: "score", + ascending: false, + redacted: false, + value: Number(8.0), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: true, + value: Some( + [ + 2.0, + 2.0, + ], + ), + }, + ), + Sort( + Sort { + field_name: "score", + ascending: false, + redacted: false, + value: Number(7.0), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: true, + value: Some( + [ + 2.0, + 2.0, + ], + ), + }, + ), + Sort( + Sort { + field_name: "score", + ascending: false, + redacted: false, + value: Number(6.0), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: true, + value: Some( + [ + 2.0, + 2.0, + ], + ), + }, + ), + Sort( + Sort { + field_name: "score", + ascending: false, + redacted: false, + value: Number(5.0), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: true, + value: Some( + [ + 5.0, + 5.0, + ], + ), + }, + ), + Sort( + Sort { + field_name: "score", + ascending: false, + redacted: false, + value: Number(10.0), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: true, + value: Some( + [ + 5.0, + 5.0, + ], + ), + }, + ), + Sort( + Sort { + field_name: "score", + ascending: false, + redacted: false, + value: Number(9.0), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: true, + value: Some( + [ + 5.0, + 5.0, + ], + ), + }, + ), + Sort( + Sort { + field_name: "score", + ascending: false, + redacted: false, + value: Number(8.0), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: true, + value: Some( + [ + 5.0, + 5.0, + ], + ), + }, + ), + Sort( + Sort { + field_name: "score", + ascending: false, + redacted: false, + value: Number(7.0), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: true, + value: None, + }, + ), + Sort( + Sort { + field_name: "score", + ascending: false, + redacted: false, + value: Null, + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: true, + value: None, + }, + ), + Sort( + Sort { + field_name: "score", + ascending: false, + redacted: false, + value: Null, + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: true, + value: None, + }, + ), + Sort( + Sort { + field_name: "score", + ascending: false, + redacted: false, + value: Null, + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: true, + value: None, + }, + ), + Sort( + Sort { + field_name: "score", + ascending: false, + redacted: false, + value: Null, + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: true, + value: None, + }, + ), + Sort( + Sort { + field_name: "score", + ascending: false, + redacted: false, + value: Null, + }, + ), + ], +] diff --git a/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_with_following_ranking_rules-4.snap b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_with_following_ranking_rules-4.snap new file mode 100644 index 000000000..82124cd90 --- /dev/null +++ b/crates/milli/src/search/new/tests/snapshots/milli__search__new__tests__geo_sort__geo_sort_with_following_ranking_rules-4.snap @@ -0,0 +1,356 @@ +--- +source: crates/milli/src/search/new/tests/geo_sort.rs +expression: "format!(\"{scores:#?}\")" +--- +[ + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: false, + value: Some( + [ + 5.0, + 5.0, + ], + ), + }, + ), + Sort( + Sort { + field_name: "score", + ascending: false, + redacted: false, + value: Number(10.0), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: false, + value: Some( + [ + 5.0, + 5.0, + ], + ), + }, + ), + Sort( + Sort { + field_name: "score", + ascending: false, + redacted: false, + value: Number(9.0), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: false, + value: Some( + [ + 5.0, + 5.0, + ], + ), + }, + ), + Sort( + Sort { + field_name: "score", + ascending: false, + redacted: false, + value: Number(8.0), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: false, + value: Some( + [ + 5.0, + 5.0, + ], + ), + }, + ), + Sort( + Sort { + field_name: "score", + ascending: false, + redacted: false, + value: Number(7.0), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: false, + value: Some( + [ + 2.0, + 2.0, + ], + ), + }, + ), + Sort( + Sort { + field_name: "score", + ascending: false, + redacted: false, + value: Number(10.0), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: false, + value: Some( + [ + 2.0, + 2.0, + ], + ), + }, + ), + Sort( + Sort { + field_name: "score", + ascending: false, + redacted: false, + value: Number(9.0), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: false, + value: Some( + [ + 2.0, + 2.0, + ], + ), + }, + ), + Sort( + Sort { + field_name: "score", + ascending: false, + redacted: false, + value: Number(8.0), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: false, + value: Some( + [ + 2.0, + 2.0, + ], + ), + }, + ), + Sort( + Sort { + field_name: "score", + ascending: false, + redacted: false, + value: Number(7.0), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: false, + value: Some( + [ + 2.0, + 2.0, + ], + ), + }, + ), + Sort( + Sort { + field_name: "score", + ascending: false, + redacted: false, + value: Number(6.0), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: false, + value: Some( + [ + 2.0, + 2.0, + ], + ), + }, + ), + Sort( + Sort { + field_name: "score", + ascending: false, + redacted: false, + value: Number(5.0), + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: false, + value: None, + }, + ), + Sort( + Sort { + field_name: "score", + ascending: false, + redacted: false, + value: Null, + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: false, + value: None, + }, + ), + Sort( + Sort { + field_name: "score", + ascending: false, + redacted: false, + value: Null, + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: false, + value: None, + }, + ), + Sort( + Sort { + field_name: "score", + ascending: false, + redacted: false, + value: Null, + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: false, + value: None, + }, + ), + Sort( + Sort { + field_name: "score", + ascending: false, + redacted: false, + value: Null, + }, + ), + ], + [ + GeoSort( + GeoSort { + target_point: [ + 0.0, + 0.0, + ], + ascending: false, + value: None, + }, + ), + Sort( + Sort { + field_name: "score", + ascending: false, + redacted: false, + value: Null, + }, + ), + ], +] From ffe3faeca75d7716993cbfa9e5c269d529159ffc Mon Sep 17 00:00:00 2001 From: hdt3213 Date: Fri, 4 Apr 2025 18:40:28 +0800 Subject: [PATCH 04/10] cargo fmt --- crates/milli/src/search/new/tests/geo_sort.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/crates/milli/src/search/new/tests/geo_sort.rs b/crates/milli/src/search/new/tests/geo_sort.rs index 3d89f5d2f..ff946d226 100644 --- a/crates/milli/src/search/new/tests/geo_sort.rs +++ b/crates/milli/src/search/new/tests/geo_sort.rs @@ -119,12 +119,18 @@ fn test_geo_sort_with_following_ranking_rules() { let mut s = Search::new(&rtxn, &index); s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); - s.sort_criteria(vec![AscDesc::Asc(Member::Geo([0., 0.])), AscDesc::Desc(Member::Field("score".to_string()))]); + s.sort_criteria(vec![ + AscDesc::Asc(Member::Geo([0., 0.])), + AscDesc::Desc(Member::Field("score".to_string())), + ]); let (ids, scores) = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s); insta::assert_snapshot!(format!("{ids:?}"), @"[6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1, 4, 3, 2, 5]"); insta::assert_snapshot!(format!("{scores:#?}")); - s.sort_criteria(vec![AscDesc::Desc(Member::Geo([0., 0.])), AscDesc::Desc(Member::Field("score".to_string()))]); + s.sort_criteria(vec![ + AscDesc::Desc(Member::Geo([0., 0.])), + AscDesc::Desc(Member::Field("score".to_string())), + ]); let (ids, scores) = execute_iterative_and_rtree_returns_the_same(&rtxn, &index, &mut s); insta::assert_snapshot!(format!("{ids:?}"), @"[12, 13, 14, 15, 6, 7, 8, 9, 10, 11, 1, 4, 3, 2, 5]"); insta::assert_snapshot!(format!("{scores:#?}")); From c4a8b84dc03867b2ae78a9ccc584bf6f7801e747 Mon Sep 17 00:00:00 2001 From: hdt3213 Date: Fri, 4 Apr 2025 19:22:33 +0800 Subject: [PATCH 05/10] code style --- crates/milli/src/search/new/geo_sort.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/milli/src/search/new/geo_sort.rs b/crates/milli/src/search/new/geo_sort.rs index ca5a4ab8b..47694c6ce 100644 --- a/crates/milli/src/search/new/geo_sort.rs +++ b/crates/milli/src/search/new/geo_sort.rs @@ -289,7 +289,7 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort { if geo_candidates.contains(id) { let distance = distance_between_two_points(&self.point, &point); if let Some((point0, bucket_distance)) = current_distance.as_ref() { - if (bucket_distance - &distance).abs() > self.distance_error_margin { + if (bucket_distance - distance).abs() > self.distance_error_margin { // different distance, point belongs to next bucket put_back(&mut self.cached_sorted_docids, (id, point)); return Ok(Some(RankingRuleOutput { From 5da92a3d53a56af7719afe5ba097c32bf41ca93e Mon Sep 17 00:00:00 2001 From: hdt3213 Date: Mon, 14 Apr 2025 22:50:32 +0800 Subject: [PATCH 06/10] test geo sort reached max_bucket_size --- crates/milli/src/search/new/geo_sort.rs | 22 ++++++- crates/milli/src/search/new/tests/geo_sort.rs | 66 +++++++++++++++++++ 2 files changed, 87 insertions(+), 1 deletion(-) diff --git a/crates/milli/src/search/new/geo_sort.rs b/crates/milli/src/search/new/geo_sort.rs index 47694c6ce..1c52b0a5b 100644 --- a/crates/milli/src/search/new/geo_sort.rs +++ b/crates/milli/src/search/new/geo_sort.rs @@ -71,6 +71,26 @@ impl Strategy { } } +#[cfg(not(test))] +fn default_max_bucket_size() -> u64 { + 1000 +} + +#[cfg(test)] +static DEFAULT_MAX_BUCKET_SIZE: std::sync::Mutex = std::sync::Mutex::new(1000); + +#[cfg(test)] +pub fn set_default_max_bucket_size(n: u64) { + let mut size = DEFAULT_MAX_BUCKET_SIZE.lock().unwrap(); + *size = n; +} + +#[cfg(test)] +fn default_max_bucket_size() -> u64 { + let max_size = *(DEFAULT_MAX_BUCKET_SIZE.lock().unwrap()); + max_size +} + pub struct GeoSort { query: Option, @@ -105,7 +125,7 @@ impl GeoSort { field_ids: None, rtree: None, cached_sorted_docids: VecDeque::new(), - max_bucket_size: 1000, + max_bucket_size: default_max_bucket_size(), distance_error_margin: 1.0, }) } diff --git a/crates/milli/src/search/new/tests/geo_sort.rs b/crates/milli/src/search/new/tests/geo_sort.rs index ff946d226..e5993925a 100644 --- a/crates/milli/src/search/new/tests/geo_sort.rs +++ b/crates/milli/src/search/new/tests/geo_sort.rs @@ -4,6 +4,7 @@ This module tests the `geo_sort` ranking rule use big_s::S; use heed::RoTxn; +use itertools::Itertools; use maplit::hashset; use crate::constants::RESERVED_GEO_FIELD_NAME; @@ -136,6 +137,71 @@ fn test_geo_sort_with_following_ranking_rules() { insta::assert_snapshot!(format!("{scores:#?}")); } +#[test] +fn test_geo_sort_reached_max_bucket_size() { + let index = create_index(); + + index + .add_documents(documents!([ + { "id": 1 }, { "id": 4 }, { "id": 3 }, { "id": 2 }, { "id": 5 }, + { "id": 6, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score": 10 }, + { "id": 7, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score": 9 }, + { "id": 8, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score": 8 }, + { "id": 9, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score": 7 }, + { "id": 10, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score":6 }, + { "id": 11, RESERVED_GEO_FIELD_NAME: { "lat": 2, "lng": 2 }, "score": 5 }, + { "id": 12, RESERVED_GEO_FIELD_NAME: { "lat": 5, "lng": 5 }, "score": 10 }, + { "id": 13, RESERVED_GEO_FIELD_NAME: { "lat": 5, "lng": 5 }, "score": 9 }, + { "id": 14, RESERVED_GEO_FIELD_NAME: { "lat": 5, "lng": 5 }, "score": 8 }, + { "id": 15, RESERVED_GEO_FIELD_NAME: { "lat": 5, "lng": 5 }, "score": 7 }, + ])) + .unwrap(); + + crate::search::new::geo_sort::set_default_max_bucket_size(2); + let rtxn = index.read_txn().unwrap(); + + let mut s = Search::new(&rtxn, &index); + s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); + s.sort_criteria(vec![ + AscDesc::Asc(Member::Geo([0., 0.])), + AscDesc::Desc(Member::Field("score".to_string())), + ]); + + /* We should not expect the results to obey the following ranking rules when the bucket size limit is reached, + * nor should we expect Iteration and rtree to give exactly the same order for the same bucket in this case.*/ + s.geo_sort_strategy(GeoSortStrategy::AlwaysIterative(1000)); + let SearchResult { documents_ids, .. } = s.execute().unwrap(); + let iterative_ids = collect_field_values(&index, &rtxn, "id", &documents_ids); + + assert_eq!(iterative_ids.len(), 15); + for id_str in &iterative_ids[0..6] { + let id = id_str.parse::().unwrap(); + assert!(id >= 6 && id <= 11) + } + for id_str in &iterative_ids[6..10] { + let id = id_str.parse::().unwrap(); + assert!(id >= 12 && id <= 15) + } + let no_geo_ids = iterative_ids[10..].iter().collect_vec(); + insta::assert_snapshot!(format!("{no_geo_ids:?}"), @r#"["1", "4", "3", "2", "5"]"#); + + s.geo_sort_strategy(GeoSortStrategy::AlwaysRtree(1000)); + let SearchResult { documents_ids, .. } = s.execute().unwrap(); + let rtree_ids = collect_field_values(&index, &rtxn, "id", &documents_ids); + + assert_eq!(rtree_ids.len(), 15); + for id_str in &rtree_ids[0..6] { + let id = id_str.parse::().unwrap(); + assert!(id >= 6 && id <= 11) + } + for id_str in &rtree_ids[6..10] { + let id = id_str.parse::().unwrap(); + assert!(id >= 12 && id <= 15) + } + let no_geo_ids = rtree_ids[10..].iter().collect_vec(); + insta::assert_snapshot!(format!("{no_geo_ids:?}"), @r#"["1", "4", "3", "2", "5"]"#); +} + #[test] fn test_geo_sort_around_the_edge_of_the_flat_earth() { let index = create_index(); From 1f5412003d470b1d0cc7485edb6ea6cb3b579d38 Mon Sep 17 00:00:00 2001 From: hdt3213 Date: Tue, 15 Apr 2025 07:17:47 +0800 Subject: [PATCH 07/10] optimize test suite --- .../tests/documents/add_documents.rs | 22 +++++++++---------- crates/milli/src/search/new/tests/geo_sort.rs | 8 +++---- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/crates/meilisearch/tests/documents/add_documents.rs b/crates/meilisearch/tests/documents/add_documents.rs index 8c05cd177..6569bb9a5 100644 --- a/crates/meilisearch/tests/documents/add_documents.rs +++ b/crates/meilisearch/tests/documents/add_documents.rs @@ -1777,7 +1777,7 @@ async fn add_documents_with_geo_field() { }, { "id": "4", - "_geo": { "lat": "1", "lng": "1" }, + "_geo": { "lat": "2", "lng": "2" }, }, ]); @@ -1828,8 +1828,8 @@ async fn add_documents_with_geo_field() { { "id": "4", "_geo": { - "lat": "1", - "lng": "1" + "lat": "2", + "lng": "2" } } ], @@ -1848,14 +1848,6 @@ async fn add_documents_with_geo_field() { @r###" { "hits": [ - { - "id": "4", - "_geo": { - "lat": "1", - "lng": "1" - }, - "_geoDistance": 5522018 - }, { "id": "3", "_geo": { @@ -1864,6 +1856,14 @@ async fn add_documents_with_geo_field() { }, "_geoDistance": 5522018 }, + { + "id": "4", + "_geo": { + "lat": "2", + "lng": "2" + }, + "_geoDistance": 5408322 + }, { "id": "1" }, diff --git a/crates/milli/src/search/new/tests/geo_sort.rs b/crates/milli/src/search/new/tests/geo_sort.rs index e5993925a..73d55de19 100644 --- a/crates/milli/src/search/new/tests/geo_sort.rs +++ b/crates/milli/src/search/new/tests/geo_sort.rs @@ -176,11 +176,11 @@ fn test_geo_sort_reached_max_bucket_size() { assert_eq!(iterative_ids.len(), 15); for id_str in &iterative_ids[0..6] { let id = id_str.parse::().unwrap(); - assert!(id >= 6 && id <= 11) + assert!((6..=11).contains(&id)) } for id_str in &iterative_ids[6..10] { let id = id_str.parse::().unwrap(); - assert!(id >= 12 && id <= 15) + assert!((12..=15).contains(&id)) } let no_geo_ids = iterative_ids[10..].iter().collect_vec(); insta::assert_snapshot!(format!("{no_geo_ids:?}"), @r#"["1", "4", "3", "2", "5"]"#); @@ -192,11 +192,11 @@ fn test_geo_sort_reached_max_bucket_size() { assert_eq!(rtree_ids.len(), 15); for id_str in &rtree_ids[0..6] { let id = id_str.parse::().unwrap(); - assert!(id >= 6 && id <= 11) + assert!((6..=11).contains(&id)) } for id_str in &rtree_ids[6..10] { let id = id_str.parse::().unwrap(); - assert!(id >= 12 && id <= 15) + assert!((12..=15).contains(&id)) } let no_geo_ids = rtree_ids[10..].iter().collect_vec(); insta::assert_snapshot!(format!("{no_geo_ids:?}"), @r#"["1", "4", "3", "2", "5"]"#); From 7c1c4f9c26b732af1f6f54c16537758c3f871506 Mon Sep 17 00:00:00 2001 From: hdt3213 Date: Tue, 15 Apr 2025 08:10:03 +0800 Subject: [PATCH 08/10] fix test_geo_sort_reached_max_bucket_size --- crates/milli/src/search/new/tests/geo_sort.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/crates/milli/src/search/new/tests/geo_sort.rs b/crates/milli/src/search/new/tests/geo_sort.rs index 73d55de19..6d3df5262 100644 --- a/crates/milli/src/search/new/tests/geo_sort.rs +++ b/crates/milli/src/search/new/tests/geo_sort.rs @@ -200,6 +200,9 @@ fn test_geo_sort_reached_max_bucket_size() { } let no_geo_ids = rtree_ids[10..].iter().collect_vec(); insta::assert_snapshot!(format!("{no_geo_ids:?}"), @r#"["1", "4", "3", "2", "5"]"#); + + // recover settings + crate::search::new::geo_sort::set_default_max_bucket_size(1000); } #[test] From fd7fbfa9eb7f9ad58f2e794c0321c97738e55de2 Mon Sep 17 00:00:00 2001 From: hdt3213 Date: Tue, 15 Apr 2025 20:05:44 +0800 Subject: [PATCH 09/10] Refactor geo_max_bucket_size injection --- crates/milli/src/search/hybrid.rs | 2 +- crates/milli/src/search/mod.rs | 18 +++++--- crates/milli/src/search/new/geo_sort.rs | 45 +++++++++---------- crates/milli/src/search/new/matches/mod.rs | 2 +- crates/milli/src/search/new/mod.rs | 29 ++++++------ crates/milli/src/search/new/tests/geo_sort.rs | 5 +-- 6 files changed, 51 insertions(+), 50 deletions(-) diff --git a/crates/milli/src/search/hybrid.rs b/crates/milli/src/search/hybrid.rs index 81f74fdad..e07f886c9 100644 --- a/crates/milli/src/search/hybrid.rs +++ b/crates/milli/src/search/hybrid.rs @@ -164,7 +164,7 @@ impl Search<'_> { sort_criteria: self.sort_criteria.clone(), distinct: self.distinct.clone(), searchable_attributes: self.searchable_attributes, - geo_strategy: self.geo_strategy, + geo_param: self.geo_param, terms_matching_strategy: self.terms_matching_strategy, scoring_strategy: ScoringStrategy::Detailed, words_limit: self.words_limit, diff --git a/crates/milli/src/search/mod.rs b/crates/milli/src/search/mod.rs index 0dd639c59..def00ec92 100644 --- a/crates/milli/src/search/mod.rs +++ b/crates/milli/src/search/mod.rs @@ -45,7 +45,7 @@ pub struct Search<'a> { sort_criteria: Option>, distinct: Option, searchable_attributes: Option<&'a [String]>, - geo_strategy: new::GeoSortStrategy, + geo_param: new::GeoSortParameter, terms_matching_strategy: TermsMatchingStrategy, scoring_strategy: ScoringStrategy, words_limit: usize, @@ -68,7 +68,7 @@ impl<'a> Search<'a> { sort_criteria: None, distinct: None, searchable_attributes: None, - geo_strategy: new::GeoSortStrategy::default(), + geo_param: new::GeoSortParameter::default(), terms_matching_strategy: TermsMatchingStrategy::default(), scoring_strategy: Default::default(), exhaustive_number_hits: false, @@ -145,7 +145,13 @@ impl<'a> Search<'a> { #[cfg(test)] pub fn geo_sort_strategy(&mut self, strategy: new::GeoSortStrategy) -> &mut Search<'a> { - self.geo_strategy = strategy; + self.geo_param.strategy = strategy; + self + } + + #[cfg(test)] + pub fn geo_max_bucket_size(&mut self, max_size: u64) -> &mut Search<'a> { + self.geo_param.max_bucket_size = max_size; self } @@ -232,7 +238,7 @@ impl<'a> Search<'a> { universe, &self.sort_criteria, &self.distinct, - self.geo_strategy, + self.geo_param, self.offset, self.limit, embedder_name, @@ -251,7 +257,7 @@ impl<'a> Search<'a> { universe, &self.sort_criteria, &self.distinct, - self.geo_strategy, + self.geo_param, self.offset, self.limit, Some(self.words_limit), @@ -290,7 +296,7 @@ impl fmt::Debug for Search<'_> { sort_criteria, distinct, searchable_attributes, - geo_strategy: _, + geo_param: _, terms_matching_strategy, scoring_strategy, words_limit, diff --git a/crates/milli/src/search/new/geo_sort.rs b/crates/milli/src/search/new/geo_sort.rs index 1c52b0a5b..7f1c0feff 100644 --- a/crates/milli/src/search/new/geo_sort.rs +++ b/crates/milli/src/search/new/geo_sort.rs @@ -39,6 +39,22 @@ fn facet_number_values<'a>( Ok(iter) } +#[derive(Debug, Clone, Copy)] + +pub struct Parameter { + // Define the strategy used by the geo sort + pub strategy: Strategy, + // Limit the number of docs in a single bucket to avoid unexpectedly large overhead + pub max_bucket_size: u64, + // Considering the errors of GPS and geographical calculations, distances less than distance_error_margin will be treated as equal + pub distance_error_margin: f64, +} + +impl Default for Parameter { + fn default() -> Self { + Self { strategy: Strategy::default(), max_bucket_size: 1000, distance_error_margin: 1.0 } + } +} /// Define the strategy used by the geo sort. /// The parameter represents the cache size, and, in the case of the Dynamic strategy, /// the point where we move from using the iterative strategy to the rtree. @@ -71,26 +87,6 @@ impl Strategy { } } -#[cfg(not(test))] -fn default_max_bucket_size() -> u64 { - 1000 -} - -#[cfg(test)] -static DEFAULT_MAX_BUCKET_SIZE: std::sync::Mutex = std::sync::Mutex::new(1000); - -#[cfg(test)] -pub fn set_default_max_bucket_size(n: u64) { - let mut size = DEFAULT_MAX_BUCKET_SIZE.lock().unwrap(); - *size = n; -} - -#[cfg(test)] -fn default_max_bucket_size() -> u64 { - let max_size = *(DEFAULT_MAX_BUCKET_SIZE.lock().unwrap()); - max_size -} - pub struct GeoSort { query: Option, @@ -111,22 +107,23 @@ pub struct GeoSort { impl GeoSort { pub fn new( - strategy: Strategy, + parameter: &Parameter, geo_faceted_docids: RoaringBitmap, point: [f64; 2], ascending: bool, ) -> Result { + let Parameter { strategy, max_bucket_size, distance_error_margin } = parameter; Ok(Self { query: None, - strategy, + strategy: *strategy, ascending, point, geo_candidates: geo_faceted_docids, field_ids: None, rtree: None, cached_sorted_docids: VecDeque::new(), - max_bucket_size: default_max_bucket_size(), - distance_error_margin: 1.0, + max_bucket_size: *max_bucket_size, + distance_error_margin: *distance_error_margin, }) } diff --git a/crates/milli/src/search/new/matches/mod.rs b/crates/milli/src/search/new/matches/mod.rs index e30f11e94..2d6f2cf17 100644 --- a/crates/milli/src/search/new/matches/mod.rs +++ b/crates/milli/src/search/new/matches/mod.rs @@ -513,7 +513,7 @@ mod tests { universe, &None, &None, - crate::search::new::GeoSortStrategy::default(), + crate::search::new::GeoSortParameter::default(), 0, 100, Some(10), diff --git a/crates/milli/src/search/new/mod.rs b/crates/milli/src/search/new/mod.rs index b9161b417..5042fb3b7 100644 --- a/crates/milli/src/search/new/mod.rs +++ b/crates/milli/src/search/new/mod.rs @@ -45,6 +45,7 @@ use sort::Sort; use self::distinct::facet_string_values; use self::geo_sort::GeoSort; +pub use self::geo_sort::Parameter as GeoSortParameter; pub use self::geo_sort::Strategy as GeoSortStrategy; use self::graph_based_ranking_rule::Words; use self::interner::Interned; @@ -274,7 +275,7 @@ fn resolve_negative_phrases( fn get_ranking_rules_for_placeholder_search<'ctx>( ctx: &SearchContext<'ctx>, sort_criteria: &Option>, - geo_strategy: geo_sort::Strategy, + geo_param: geo_sort::Parameter, ) -> Result>> { let mut sort = false; let mut sorted_fields = HashSet::new(); @@ -299,7 +300,7 @@ fn get_ranking_rules_for_placeholder_search<'ctx>( &mut ranking_rules, &mut sorted_fields, &mut geo_sorted, - geo_strategy, + &geo_param, )?; sort = true; } @@ -326,7 +327,7 @@ fn get_ranking_rules_for_placeholder_search<'ctx>( fn get_ranking_rules_for_vector<'ctx>( ctx: &SearchContext<'ctx>, sort_criteria: &Option>, - geo_strategy: geo_sort::Strategy, + geo_param: geo_sort::Parameter, limit_plus_offset: usize, target: &[f32], embedder_name: &str, @@ -375,7 +376,7 @@ fn get_ranking_rules_for_vector<'ctx>( &mut ranking_rules, &mut sorted_fields, &mut geo_sorted, - geo_strategy, + &geo_param, )?; sort = true; } @@ -403,7 +404,7 @@ fn get_ranking_rules_for_vector<'ctx>( fn get_ranking_rules_for_query_graph_search<'ctx>( ctx: &SearchContext<'ctx>, sort_criteria: &Option>, - geo_strategy: geo_sort::Strategy, + geo_param: geo_sort::Parameter, terms_matching_strategy: TermsMatchingStrategy, ) -> Result>> { // query graph search @@ -477,7 +478,7 @@ fn get_ranking_rules_for_query_graph_search<'ctx>( &mut ranking_rules, &mut sorted_fields, &mut geo_sorted, - geo_strategy, + &geo_param, )?; sort = true; } @@ -514,7 +515,7 @@ fn resolve_sort_criteria<'ctx, Query: RankingRuleQueryTrait>( ranking_rules: &mut Vec>, sorted_fields: &mut HashSet, geo_sorted: &mut bool, - geo_strategy: geo_sort::Strategy, + geo_param: &geo_sort::Parameter, ) -> Result<()> { let sort_criteria = sort_criteria.clone().unwrap_or_default(); ranking_rules.reserve(sort_criteria.len()); @@ -540,7 +541,7 @@ fn resolve_sort_criteria<'ctx, Query: RankingRuleQueryTrait>( } let geo_faceted_docids = ctx.index.geo_faceted_documents_ids(ctx.txn)?; ranking_rules.push(Box::new(GeoSort::new( - geo_strategy, + geo_param, geo_faceted_docids, point, true, @@ -552,7 +553,7 @@ fn resolve_sort_criteria<'ctx, Query: RankingRuleQueryTrait>( } let geo_faceted_docids = ctx.index.geo_faceted_documents_ids(ctx.txn)?; ranking_rules.push(Box::new(GeoSort::new( - geo_strategy, + geo_param, geo_faceted_docids, point, false, @@ -584,7 +585,7 @@ pub fn execute_vector_search( universe: RoaringBitmap, sort_criteria: &Option>, distinct: &Option, - geo_strategy: geo_sort::Strategy, + geo_param: geo_sort::Parameter, from: usize, length: usize, embedder_name: &str, @@ -600,7 +601,7 @@ pub fn execute_vector_search( let ranking_rules = get_ranking_rules_for_vector( ctx, sort_criteria, - geo_strategy, + geo_param, from + length, vector, embedder_name, @@ -647,7 +648,7 @@ pub fn execute_search( mut universe: RoaringBitmap, sort_criteria: &Option>, distinct: &Option, - geo_strategy: geo_sort::Strategy, + geo_param: geo_sort::Parameter, from: usize, length: usize, words_limit: Option, @@ -761,7 +762,7 @@ pub fn execute_search( let ranking_rules = get_ranking_rules_for_query_graph_search( ctx, sort_criteria, - geo_strategy, + geo_param, terms_matching_strategy, )?; @@ -783,7 +784,7 @@ pub fn execute_search( )? } else { let ranking_rules = - get_ranking_rules_for_placeholder_search(ctx, sort_criteria, geo_strategy)?; + get_ranking_rules_for_placeholder_search(ctx, sort_criteria, geo_param)?; bucket_sort( ctx, ranking_rules, diff --git a/crates/milli/src/search/new/tests/geo_sort.rs b/crates/milli/src/search/new/tests/geo_sort.rs index 6d3df5262..f67d3ffdf 100644 --- a/crates/milli/src/search/new/tests/geo_sort.rs +++ b/crates/milli/src/search/new/tests/geo_sort.rs @@ -157,10 +157,10 @@ fn test_geo_sort_reached_max_bucket_size() { ])) .unwrap(); - crate::search::new::geo_sort::set_default_max_bucket_size(2); let rtxn = index.read_txn().unwrap(); let mut s = Search::new(&rtxn, &index); + s.geo_max_bucket_size(2); s.scoring_strategy(crate::score_details::ScoringStrategy::Detailed); s.sort_criteria(vec![ AscDesc::Asc(Member::Geo([0., 0.])), @@ -200,9 +200,6 @@ fn test_geo_sort_reached_max_bucket_size() { } let no_geo_ids = rtree_ids[10..].iter().collect_vec(); insta::assert_snapshot!(format!("{no_geo_ids:?}"), @r#"["1", "4", "3", "2", "5"]"#); - - // recover settings - crate::search::new::geo_sort::set_default_max_bucket_size(1000); } #[test] From 55adbac2dddc3b706ffbff4b9b99596abc61f844 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 15 Apr 2025 14:43:07 +0200 Subject: [PATCH 10/10] Apply suggestions from code review --- crates/milli/src/search/new/geo_sort.rs | 9 ++++----- crates/milli/src/search/new/mod.rs | 8 ++++---- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/crates/milli/src/search/new/geo_sort.rs b/crates/milli/src/search/new/geo_sort.rs index 7f1c0feff..663599553 100644 --- a/crates/milli/src/search/new/geo_sort.rs +++ b/crates/milli/src/search/new/geo_sort.rs @@ -40,7 +40,6 @@ fn facet_number_values<'a>( } #[derive(Debug, Clone, Copy)] - pub struct Parameter { // Define the strategy used by the geo sort pub strategy: Strategy, @@ -107,7 +106,7 @@ pub struct GeoSort { impl GeoSort { pub fn new( - parameter: &Parameter, + parameter: Parameter, geo_faceted_docids: RoaringBitmap, point: [f64; 2], ascending: bool, @@ -115,15 +114,15 @@ impl GeoSort { let Parameter { strategy, max_bucket_size, distance_error_margin } = parameter; Ok(Self { query: None, - strategy: *strategy, + strategy, ascending, point, geo_candidates: geo_faceted_docids, field_ids: None, rtree: None, cached_sorted_docids: VecDeque::new(), - max_bucket_size: *max_bucket_size, - distance_error_margin: *distance_error_margin, + max_bucket_size, + distance_error_margin, }) } diff --git a/crates/milli/src/search/new/mod.rs b/crates/milli/src/search/new/mod.rs index 5042fb3b7..8b04d8c6a 100644 --- a/crates/milli/src/search/new/mod.rs +++ b/crates/milli/src/search/new/mod.rs @@ -300,7 +300,7 @@ fn get_ranking_rules_for_placeholder_search<'ctx>( &mut ranking_rules, &mut sorted_fields, &mut geo_sorted, - &geo_param, + geo_param, )?; sort = true; } @@ -376,7 +376,7 @@ fn get_ranking_rules_for_vector<'ctx>( &mut ranking_rules, &mut sorted_fields, &mut geo_sorted, - &geo_param, + geo_param, )?; sort = true; } @@ -478,7 +478,7 @@ fn get_ranking_rules_for_query_graph_search<'ctx>( &mut ranking_rules, &mut sorted_fields, &mut geo_sorted, - &geo_param, + geo_param, )?; sort = true; } @@ -515,7 +515,7 @@ fn resolve_sort_criteria<'ctx, Query: RankingRuleQueryTrait>( ranking_rules: &mut Vec>, sorted_fields: &mut HashSet, geo_sorted: &mut bool, - geo_param: &geo_sort::Parameter, + geo_param: geo_sort::Parameter, ) -> Result<()> { let sort_criteria = sort_criteria.clone().unwrap_or_default(); ranking_rules.reserve(sort_criteria.len());