mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 06:44:27 +01:00
Merge #3757
3757: Adjust the cost of edges in the `position` ranking rule by bucketing positions more aggressively r=loiclec a=loiclec This PR significantly improves the performance of the `position` ranking rule when: 1. a query contains many words 2. the `position` ranking rule needs to be called many times 3. the score of the documents according to `position` is high These conditions greatly increase: 1. the number of edge traversals that are needed to find a valid path from the `start` node to the `end` node 2. the number of edges that need to be deleted from the graph, and therefore the number of times that we need to recompute all the possible costs from START to END As a result, a majority of the search time is spent in `visit_condition`, `visit_node`, and `update_all_costs_before_node`. This is frustrating because it often happens when the "universe" given to the rule consists of only a handful of document ids. By limiting the number of possible edges between two nodes from `20` to `10`, we: 1. reduce the number of possible costs from START to END 2. reduce the number of edges that will be deleted 3. make it faster to update the costs after deleting an edge 4. reduce the number of buckets that need to be computed In terms of relevancy, I don't think we lose or gain much. We still prefer terms that are in a lower positions, with decreasing precision as we go further. The previous choice of bucketing wasn't chosen in a principled way, and neither is this one. They both "feel" right to me. Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com> Co-authored-by: meili-bors[bot] <89034592+meili-bors[bot]@users.noreply.github.com>
This commit is contained in:
commit
101f5a20d2
@ -111,23 +111,16 @@ impl RankingRuleGraphTrait for PositionGraph {
|
||||
|
||||
fn cost_from_position(sum_positions: u32) -> u32 {
|
||||
match sum_positions {
|
||||
0 | 1 | 2 | 3 => sum_positions,
|
||||
4 | 5 => 4,
|
||||
6 | 7 => 5,
|
||||
8 | 9 => 6,
|
||||
10 | 11 => 7,
|
||||
12 | 13 => 8,
|
||||
14 | 15 => 9,
|
||||
16 | 17..=24 => 10,
|
||||
25..=32 => 11,
|
||||
33..=64 => 12,
|
||||
65..=128 => 13,
|
||||
129..=256 => 14,
|
||||
257..=512 => 15,
|
||||
513..=1024 => 16,
|
||||
1025..=2048 => 17,
|
||||
2049..=4096 => 18,
|
||||
4097..=8192 => 19,
|
||||
_ => 20,
|
||||
0 => 0,
|
||||
1 => 1,
|
||||
2..=4 => 2,
|
||||
5..=7 => 3,
|
||||
8..=11 => 4,
|
||||
12..=16 => 5,
|
||||
17..=24 => 6,
|
||||
25..=64 => 7,
|
||||
65..=256 => 8,
|
||||
257..=1024 => 9,
|
||||
_ => 10,
|
||||
}
|
||||
}
|
||||
|
@ -138,7 +138,7 @@ fn test_attribute_position_simple() {
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("quick brown");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 11, 12, 13, 3, 4, 2, 1, 0, 6, 8, 7, 9, 5]");
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 11, 12, 13, 2, 3, 4, 1, 0, 6, 8, 7, 9, 5]");
|
||||
}
|
||||
#[test]
|
||||
fn test_attribute_position_repeated() {
|
||||
@ -163,7 +163,7 @@ fn test_attribute_position_different_fields() {
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("quick brown");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 11, 12, 13, 3, 4, 2, 1, 0, 6, 8, 7, 9, 5]");
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 11, 12, 13, 2, 3, 4, 1, 0, 6, 8, 7, 9, 5]");
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -176,5 +176,5 @@ fn test_attribute_position_ngrams() {
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("quick brown");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 11, 12, 13, 3, 4, 2, 1, 0, 6, 8, 7, 9, 5]");
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 11, 12, 13, 2, 3, 4, 1, 0, 6, 8, 7, 9, 5]");
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user