mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-11 05:54:30 +01:00
Fix errors, clippy warnings, and add review comments
This commit is contained in:
parent
48f5bb1693
commit
59b12fca87
@ -105,7 +105,7 @@ impl<Q: RankingRuleQueryTrait> GeoSort<Q> {
|
|||||||
|
|
||||||
/// Refill the internal buffer of cached docids based on the strategy.
|
/// Refill the internal buffer of cached docids based on the strategy.
|
||||||
/// Drop the rtree if we don't need it anymore.
|
/// Drop the rtree if we don't need it anymore.
|
||||||
fn fill_buffer<'ctx>(&mut self, ctx: &mut SearchContext<'ctx>) -> Result<()> {
|
fn fill_buffer(&mut self, ctx: &mut SearchContext) -> Result<()> {
|
||||||
debug_assert!(self.field_ids.is_some(), "fill_buffer can't be called without the lat&lng");
|
debug_assert!(self.field_ids.is_some(), "fill_buffer can't be called without the lat&lng");
|
||||||
debug_assert!(self.cached_sorted_docids.is_empty());
|
debug_assert!(self.cached_sorted_docids.is_empty());
|
||||||
|
|
||||||
@ -133,7 +133,13 @@ impl<Q: RankingRuleQueryTrait> GeoSort<Q> {
|
|||||||
// and only keep the latest candidates.
|
// and only keep the latest candidates.
|
||||||
for point in rtree.nearest_neighbor_iter(&point) {
|
for point in rtree.nearest_neighbor_iter(&point) {
|
||||||
if self.geo_candidates.contains(point.data.0) {
|
if self.geo_candidates.contains(point.data.0) {
|
||||||
self.cached_sorted_docids.pop_front();
|
// REVIEW COMMENT: that doesn't look right, because we only keep the furthest point in the cache.
|
||||||
|
// Then the cache will be exhausted after the first bucket and we'll need to repopulate it again immediately.
|
||||||
|
// I think it's okay if we keep every document id in the cache instead. It's a high memory usage,
|
||||||
|
// but we already have the whole rtree in memory, which is bigger than a vector of all document ids.
|
||||||
|
//
|
||||||
|
// self.cached_sorted_docids.pop_front();
|
||||||
|
//
|
||||||
self.cached_sorted_docids.push_back(point.data.0);
|
self.cached_sorted_docids.push_back(point.data.0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -163,7 +169,10 @@ impl<Q: RankingRuleQueryTrait> GeoSort<Q> {
|
|||||||
))
|
))
|
||||||
})
|
})
|
||||||
.collect::<Result<Vec<(u32, [f64; 2])>>>()?;
|
.collect::<Result<Vec<(u32, [f64; 2])>>>()?;
|
||||||
documents.sort_by_key(|(_, p)| distance_between_two_points(&self.point, &p) as usize);
|
// REVIEW COMMENT: the haversine distance function can be quite expensive, I think, so it's probably faster
|
||||||
|
// to use `sort_by_cached_key` instead of `sort_by_key`.
|
||||||
|
documents
|
||||||
|
.sort_by_cached_key(|(_, p)| distance_between_two_points(&self.point, p) as usize);
|
||||||
self.cached_sorted_docids.extend(documents.into_iter().map(|(doc_id, _)| doc_id));
|
self.cached_sorted_docids.extend(documents.into_iter().map(|(doc_id, _)| doc_id));
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -193,7 +202,7 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort<Q> {
|
|||||||
self.query = Some(query.clone());
|
self.query = Some(query.clone());
|
||||||
self.geo_candidates &= universe;
|
self.geo_candidates &= universe;
|
||||||
|
|
||||||
if self.geo_candidates.len() == 0 {
|
if self.geo_candidates.is_empty() {
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -203,6 +212,10 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort<Q> {
|
|||||||
self.field_ids = Some([lat, lng]);
|
self.field_ids = Some([lat, lng]);
|
||||||
|
|
||||||
if self.strategy.use_rtree(self.geo_candidates.len() as usize) {
|
if self.strategy.use_rtree(self.geo_candidates.len() as usize) {
|
||||||
|
// REVIEW COMMENT: I would prefer to always keep the rtree in memory so that we don't have to deserialize it
|
||||||
|
// every time the geosort ranking rule starts iterating.
|
||||||
|
// So we'd initialize it in `::new` and never drop it.
|
||||||
|
//
|
||||||
self.rtree = Some(ctx.index.geo_rtree(ctx.txn)?.expect("geo candidates but no rtree"));
|
self.rtree = Some(ctx.index.geo_rtree(ctx.txn)?.expect("geo candidates but no rtree"));
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -210,6 +223,7 @@ impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort<Q> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[allow(clippy::only_used_in_recursion)]
|
||||||
fn next_bucket(
|
fn next_bucket(
|
||||||
&mut self,
|
&mut self,
|
||||||
ctx: &mut SearchContext<'ctx>,
|
ctx: &mut SearchContext<'ctx>,
|
||||||
|
@ -401,8 +401,12 @@ pub fn execute_search(
|
|||||||
let graph = QueryGraph::from_query(ctx, &query_terms)?;
|
let graph = QueryGraph::from_query(ctx, &query_terms)?;
|
||||||
located_query_terms = Some(query_terms);
|
located_query_terms = Some(query_terms);
|
||||||
|
|
||||||
let ranking_rules =
|
let ranking_rules = get_ranking_rules_for_query_graph_search(
|
||||||
get_ranking_rules_for_query_graph_search(ctx, sort_criteria, terms_matching_strategy)?;
|
ctx,
|
||||||
|
sort_criteria,
|
||||||
|
geo_strategy,
|
||||||
|
terms_matching_strategy,
|
||||||
|
)?;
|
||||||
|
|
||||||
universe =
|
universe =
|
||||||
resolve_universe(ctx, &universe, &graph, terms_matching_strategy, query_graph_logger)?;
|
resolve_universe(ctx, &universe, &graph, terms_matching_strategy, query_graph_logger)?;
|
||||||
|
@ -1,14 +1,9 @@
|
|||||||
/*!
|
/*!
|
||||||
This module tests the `geo_sort` ranking rule:
|
This module tests the `geo_sort` ranking rule:
|
||||||
|
|
||||||
1. an error is returned if the sort ranking rule exists but no fields-to-sort were given at search time
|
REVIEW COMMENT:
|
||||||
2. an error is returned if the fields-to-sort are not sortable
|
- nice tests :)
|
||||||
3. it is possible to add multiple fields-to-sort at search time
|
- add anything that seems not obvious about the behaviour of the geosort ranking rule here
|
||||||
4. custom sort ranking rules can be added to the settings, they interact with the generic `sort` ranking rule as expected
|
|
||||||
5. numbers appear before strings
|
|
||||||
6. documents with either: (1) no value, (2) null, or (3) an object for the field-to-sort appear at the end of the bucket
|
|
||||||
7. boolean values are translated to strings
|
|
||||||
8. if a field contains an array, it is sorted by the best value in the array according to the sort rule
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
use big_s::S;
|
use big_s::S;
|
||||||
@ -40,21 +35,21 @@ fn execute_iterative_and_rtree_returns_the_same<'a>(
|
|||||||
) -> Vec<usize> {
|
) -> Vec<usize> {
|
||||||
search.geo_sort_strategy(GeoSortStrategy::AlwaysIterative(2));
|
search.geo_sort_strategy(GeoSortStrategy::AlwaysIterative(2));
|
||||||
let SearchResult { documents_ids, .. } = search.execute().unwrap();
|
let SearchResult { documents_ids, .. } = search.execute().unwrap();
|
||||||
let iterative_ids_bucketed = collect_field_values(&index, rtxn, "id", &documents_ids);
|
let iterative_ids_bucketed = collect_field_values(index, rtxn, "id", &documents_ids);
|
||||||
|
|
||||||
search.geo_sort_strategy(GeoSortStrategy::AlwaysIterative(1000));
|
search.geo_sort_strategy(GeoSortStrategy::AlwaysIterative(1000));
|
||||||
let SearchResult { documents_ids, .. } = search.execute().unwrap();
|
let SearchResult { documents_ids, .. } = search.execute().unwrap();
|
||||||
let iterative_ids = collect_field_values(&index, rtxn, "id", &documents_ids);
|
let iterative_ids = collect_field_values(index, rtxn, "id", &documents_ids);
|
||||||
|
|
||||||
assert_eq!(iterative_ids_bucketed, iterative_ids, "iterative bucket");
|
assert_eq!(iterative_ids_bucketed, iterative_ids, "iterative bucket");
|
||||||
|
|
||||||
search.geo_sort_strategy(GeoSortStrategy::AlwaysRtree(2));
|
search.geo_sort_strategy(GeoSortStrategy::AlwaysRtree(2));
|
||||||
let SearchResult { documents_ids, .. } = search.execute().unwrap();
|
let SearchResult { documents_ids, .. } = search.execute().unwrap();
|
||||||
let rtree_ids_bucketed = collect_field_values(&index, rtxn, "id", &documents_ids);
|
let rtree_ids_bucketed = collect_field_values(index, rtxn, "id", &documents_ids);
|
||||||
|
|
||||||
search.geo_sort_strategy(GeoSortStrategy::AlwaysRtree(1000));
|
search.geo_sort_strategy(GeoSortStrategy::AlwaysRtree(1000));
|
||||||
let SearchResult { documents_ids, .. } = search.execute().unwrap();
|
let SearchResult { documents_ids, .. } = search.execute().unwrap();
|
||||||
let rtree_ids = collect_field_values(&index, rtxn, "id", &documents_ids);
|
let rtree_ids = collect_field_values(index, rtxn, "id", &documents_ids);
|
||||||
|
|
||||||
assert_eq!(rtree_ids_bucketed, rtree_ids, "rtree bucket");
|
assert_eq!(rtree_ids_bucketed, rtree_ids, "rtree bucket");
|
||||||
|
|
||||||
@ -67,15 +62,24 @@ fn execute_iterative_and_rtree_returns_the_same<'a>(
|
|||||||
fn test_geo_sort() {
|
fn test_geo_sort() {
|
||||||
let index = create_index();
|
let index = create_index();
|
||||||
|
|
||||||
|
// REVIEW COMMENT:
|
||||||
|
// I prefer to make the external ids correspond to the internal ids so that
|
||||||
|
// we can check whether the ranking rules are actually doing work instead of
|
||||||
|
// returning documents in order of their internal ids.
|
||||||
|
//
|
||||||
index
|
index
|
||||||
.add_documents(documents!([
|
.add_documents(documents!([
|
||||||
{ "id": 2, "_geo": { "lat": 2, "lng": -1 } },
|
{ "id": 0, "_geo": { "lat": 2, "lng": -1 } },
|
||||||
{ "id": 3, "_geo": { "lat": -2, "lng": -2 } },
|
{ "id": 1, "_geo": { "lat": -2, "lng": -2 } },
|
||||||
{ "id": 5, "_geo": { "lat": 6, "lng": -5 } },
|
{ "id": 2, "_geo": { "lat": 6, "lng": -5 } },
|
||||||
{ "id": 4, "_geo": { "lat": 3, "lng": 5 } },
|
{ "id": 3, "_geo": { "lat": 3, "lng": 5 } },
|
||||||
{ "id": 0, "_geo": { "lat": 0, "lng": 0 } },
|
{ "id": 4, "_geo": { "lat": 0, "lng": 0 } },
|
||||||
{ "id": 1, "_geo": { "lat": 1, "lng": 1 } },
|
{ "id": 5, "_geo": { "lat": 1, "lng": 1 } },
|
||||||
{ "id": 6 }, { "id": 8 }, { "id": 7 }, { "id": 10 }, { "id": 9 },
|
{ "id": 6 },
|
||||||
|
{ "id": 7 },
|
||||||
|
{ "id": 8 },
|
||||||
|
{ "id": 9 },
|
||||||
|
{ "id": 10 },
|
||||||
]))
|
]))
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
@ -88,66 +92,54 @@ fn test_geo_sort() {
|
|||||||
|
|
||||||
s.geo_sort_strategy(GeoSortStrategy::Dynamic(100));
|
s.geo_sort_strategy(GeoSortStrategy::Dynamic(100));
|
||||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||||
let ids = collect_field_values(&index, &txn, "id", &documents_ids);
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[4, 5, 0, 1, 3, 2, 6, 7, 8, 9, 10]");
|
||||||
insta::assert_snapshot!(format!("{ids:?}"), @r###"["0", "1", "2", "3", "4", "5", "6", "8", "7", "10", "9"]"###);
|
|
||||||
|
|
||||||
s.geo_sort_strategy(GeoSortStrategy::Dynamic(3));
|
s.geo_sort_strategy(GeoSortStrategy::Dynamic(3));
|
||||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||||
let ids = collect_field_values(&index, &txn, "id", &documents_ids);
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[4, 5, 0, 1, 3, 2, 6, 7, 8, 9, 10]");
|
||||||
insta::assert_snapshot!(format!("{ids:?}"), @r###"["0", "1", "2", "3", "4", "5", "6", "8", "7", "10", "9"]"###);
|
|
||||||
|
|
||||||
s.geo_sort_strategy(GeoSortStrategy::AlwaysIterative(100));
|
s.geo_sort_strategy(GeoSortStrategy::AlwaysIterative(100));
|
||||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||||
let ids = collect_field_values(&index, &txn, "id", &documents_ids);
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[4, 5, 0, 1, 3, 2, 6, 7, 8, 9, 10]");
|
||||||
insta::assert_snapshot!(format!("{ids:?}"), @r###"["0", "1", "2", "3", "4", "5", "6", "8", "7", "10", "9"]"###);
|
|
||||||
|
|
||||||
s.geo_sort_strategy(GeoSortStrategy::AlwaysIterative(3));
|
s.geo_sort_strategy(GeoSortStrategy::AlwaysIterative(3));
|
||||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||||
let ids = collect_field_values(&index, &txn, "id", &documents_ids);
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[4, 5, 0, 1, 3, 2, 6, 7, 8, 9, 10]");
|
||||||
insta::assert_snapshot!(format!("{ids:?}"), @r###"["0", "1", "2", "3", "4", "5", "6", "8", "7", "10", "9"]"###);
|
|
||||||
|
|
||||||
s.geo_sort_strategy(GeoSortStrategy::AlwaysRtree(100));
|
s.geo_sort_strategy(GeoSortStrategy::AlwaysRtree(100));
|
||||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||||
let ids = collect_field_values(&index, &txn, "id", &documents_ids);
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[4, 5, 0, 1, 3, 2, 6, 7, 8, 9, 10]");
|
||||||
insta::assert_snapshot!(format!("{ids:?}"), @r###"["0", "1", "2", "3", "4", "5", "6", "8", "7", "10", "9"]"###);
|
|
||||||
|
|
||||||
s.geo_sort_strategy(GeoSortStrategy::AlwaysRtree(3));
|
s.geo_sort_strategy(GeoSortStrategy::AlwaysRtree(3));
|
||||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||||
let ids = collect_field_values(&index, &txn, "id", &documents_ids);
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[4, 5, 0, 1, 3, 2, 6, 7, 8, 9, 10]");
|
||||||
insta::assert_snapshot!(format!("{ids:?}"), @r###"["0", "1", "2", "3", "4", "5", "6", "8", "7", "10", "9"]"###);
|
|
||||||
|
|
||||||
// --- desc
|
// --- desc
|
||||||
s.sort_criteria(vec![AscDesc::Desc(Member::Geo([0., 0.]))]);
|
s.sort_criteria(vec![AscDesc::Desc(Member::Geo([0., 0.]))]);
|
||||||
|
|
||||||
s.geo_sort_strategy(GeoSortStrategy::Dynamic(100));
|
s.geo_sort_strategy(GeoSortStrategy::Dynamic(100));
|
||||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||||
let ids = collect_field_values(&index, &txn, "id", &documents_ids);
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 3, 1, 0, 5, 4, 6, 7, 8, 9, 10]");
|
||||||
insta::assert_snapshot!(format!("{ids:?}"), @r###"["5", "4", "3", "2", "1", "0", "6", "8", "7", "10", "9"]"###);
|
|
||||||
|
|
||||||
s.geo_sort_strategy(GeoSortStrategy::Dynamic(3));
|
s.geo_sort_strategy(GeoSortStrategy::Dynamic(3));
|
||||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||||
let ids = collect_field_values(&index, &txn, "id", &documents_ids);
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 3, 1, 0, 5, 4, 6, 7, 8, 9, 10]");
|
||||||
insta::assert_snapshot!(format!("{ids:?}"), @r###"["5", "4", "3", "2", "1", "0", "6", "8", "7", "10", "9"]"###);
|
|
||||||
|
|
||||||
s.geo_sort_strategy(GeoSortStrategy::AlwaysIterative(100));
|
s.geo_sort_strategy(GeoSortStrategy::AlwaysIterative(100));
|
||||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||||
let ids = collect_field_values(&index, &txn, "id", &documents_ids);
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 3, 1, 0, 5, 4, 6, 7, 8, 9, 10]");
|
||||||
insta::assert_snapshot!(format!("{ids:?}"), @r###"["5", "4", "3", "2", "1", "0", "6", "8", "7", "10", "9"]"###);
|
|
||||||
|
|
||||||
s.geo_sort_strategy(GeoSortStrategy::AlwaysIterative(3));
|
s.geo_sort_strategy(GeoSortStrategy::AlwaysIterative(3));
|
||||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||||
let ids = collect_field_values(&index, &txn, "id", &documents_ids);
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 3, 1, 0, 5, 4, 6, 7, 8, 9, 10]");
|
||||||
insta::assert_snapshot!(format!("{ids:?}"), @r###"["5", "4", "3", "2", "1", "0", "6", "8", "7", "10", "9"]"###);
|
|
||||||
|
|
||||||
s.geo_sort_strategy(GeoSortStrategy::AlwaysRtree(100));
|
s.geo_sort_strategy(GeoSortStrategy::AlwaysRtree(100));
|
||||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||||
let ids = collect_field_values(&index, &txn, "id", &documents_ids);
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 3, 1, 0, 5, 4, 6, 7, 8, 9, 10]");
|
||||||
insta::assert_snapshot!(format!("{ids:?}"), @r###"["5", "4", "3", "2", "1", "0", "6", "8", "7", "10", "9"]"###);
|
|
||||||
|
|
||||||
s.geo_sort_strategy(GeoSortStrategy::AlwaysRtree(3));
|
s.geo_sort_strategy(GeoSortStrategy::AlwaysRtree(3));
|
||||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||||
let ids = collect_field_values(&index, &txn, "id", &documents_ids);
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 3, 1, 0, 5, 4, 6, 7, 8, 9, 10]");
|
||||||
insta::assert_snapshot!(format!("{ids:?}"), @r###"["5", "4", "3", "2", "1", "0", "6", "8", "7", "10", "9"]"###);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
Loading…
x
Reference in New Issue
Block a user