mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-23 13:10:06 +01:00
Fix bugs and add tests to exactness ranking rule
This commit is contained in:
parent
8f2e971879
commit
d3a94e8b25
@ -91,6 +91,12 @@ impl State {
|
||||
universe: &RoaringBitmap,
|
||||
query_graph: &QueryGraph,
|
||||
) -> Result<Self> {
|
||||
// An ordered list of the (remaining) query terms, with data extracted from them:
|
||||
// 0. exact subterm. If it doesn't exist, the term is skipped.
|
||||
// 1. start position of the term
|
||||
// 2. id of the term
|
||||
let mut count_all_positions = 0;
|
||||
|
||||
let mut exact_term_position_ids: Vec<(ExactTerm, u16, u8)> =
|
||||
Vec::with_capacity(query_graph.nodes.len() as usize);
|
||||
for (_, node) in query_graph.nodes.iter() {
|
||||
@ -101,6 +107,7 @@ impl State {
|
||||
} else {
|
||||
continue;
|
||||
};
|
||||
count_all_positions += term.positions.len();
|
||||
exact_term_position_ids.push((
|
||||
exact_term,
|
||||
*term.positions.start(),
|
||||
@ -195,13 +202,15 @@ impl State {
|
||||
if !intersection.is_empty() {
|
||||
// TODO: although not really worth it in terms of performance,
|
||||
// if would be good to put this in cache for the sake of consistency
|
||||
let candidates_with_exact_word_count = ctx
|
||||
.index
|
||||
.field_id_word_count_docids
|
||||
.get(ctx.txn, &(fid, exact_term_position_ids.len() as u8))?
|
||||
.unwrap_or_default();
|
||||
// TODO: consider if we must store the candidates as arrays, or if there is a way to perform the union
|
||||
// here.
|
||||
let candidates_with_exact_word_count = if count_all_positions < u8::MAX as usize {
|
||||
ctx.index
|
||||
.field_id_word_count_docids
|
||||
.get(ctx.txn, &(fid, count_all_positions as u8))?
|
||||
.unwrap_or_default()
|
||||
} else {
|
||||
RoaringBitmap::default()
|
||||
};
|
||||
|
||||
candidates_per_attribute.push(FieldCandidates {
|
||||
start_with_exact: intersection,
|
||||
exact_word_count: candidates_with_exact_word_count,
|
||||
|
@ -310,11 +310,11 @@ impl QueryGraph {
|
||||
rank as u16
|
||||
};
|
||||
let mut nodes_to_remove = BTreeMap::<u16, SmallBitmap<QueryNode>>::new();
|
||||
let mut at_least_one_phrase = false;
|
||||
let mut at_least_one_mandatory_term = false;
|
||||
for (node_id, node) in self.nodes.iter() {
|
||||
let QueryNodeData::Term(t) = &node.data else { continue };
|
||||
if t.term_subset.original_phrase(ctx).is_some() {
|
||||
at_least_one_phrase = true;
|
||||
if t.term_subset.original_phrase(ctx).is_some() || t.term_subset.is_mandatory() {
|
||||
at_least_one_mandatory_term = true;
|
||||
continue;
|
||||
}
|
||||
let mut cost = 0;
|
||||
@ -327,7 +327,7 @@ impl QueryGraph {
|
||||
.insert(node_id);
|
||||
}
|
||||
let mut res: Vec<_> = nodes_to_remove.into_values().collect();
|
||||
if !at_least_one_phrase {
|
||||
if !at_least_one_mandatory_term {
|
||||
res.pop();
|
||||
}
|
||||
res
|
||||
|
@ -4,6 +4,7 @@ mod parse_query;
|
||||
mod phrase;
|
||||
|
||||
use std::collections::BTreeSet;
|
||||
use std::iter::FromIterator;
|
||||
use std::ops::RangeInclusive;
|
||||
|
||||
use compute_derivations::partially_initialized_term_from_word;
|
||||
@ -30,6 +31,12 @@ pub struct QueryTermSubset {
|
||||
zero_typo_subset: NTypoTermSubset,
|
||||
one_typo_subset: NTypoTermSubset,
|
||||
two_typo_subset: NTypoTermSubset,
|
||||
/// `true` if the term cannot be deleted through the term matching strategy
|
||||
///
|
||||
/// Note that there are other reasons for which a term cannot be deleted, such as
|
||||
/// being a phrase. In that case, this field could be set to `false`, but it
|
||||
/// still wouldn't be deleteable by the term matching strategy.
|
||||
mandatory: bool,
|
||||
}
|
||||
|
||||
#[derive(Clone, PartialEq, Eq, Hash)]
|
||||
@ -114,6 +121,12 @@ impl ExactTerm {
|
||||
}
|
||||
|
||||
impl QueryTermSubset {
|
||||
pub fn is_mandatory(&self) -> bool {
|
||||
self.mandatory
|
||||
}
|
||||
pub fn make_mandatory(&mut self) {
|
||||
self.mandatory = true;
|
||||
}
|
||||
pub fn exact_term(&self, ctx: &SearchContext) -> Option<ExactTerm> {
|
||||
let full_query_term = ctx.term_interner.get(self.original);
|
||||
if full_query_term.ngram_words.is_some() {
|
||||
@ -135,6 +148,7 @@ impl QueryTermSubset {
|
||||
zero_typo_subset: NTypoTermSubset::Nothing,
|
||||
one_typo_subset: NTypoTermSubset::Nothing,
|
||||
two_typo_subset: NTypoTermSubset::Nothing,
|
||||
mandatory: false,
|
||||
}
|
||||
}
|
||||
pub fn full(for_term: Interned<QueryTerm>) -> Self {
|
||||
@ -143,6 +157,7 @@ impl QueryTermSubset {
|
||||
zero_typo_subset: NTypoTermSubset::All,
|
||||
one_typo_subset: NTypoTermSubset::All,
|
||||
two_typo_subset: NTypoTermSubset::All,
|
||||
mandatory: false,
|
||||
}
|
||||
}
|
||||
|
||||
@ -352,6 +367,28 @@ impl QueryTermSubset {
|
||||
_ => panic!(),
|
||||
}
|
||||
}
|
||||
pub fn keep_only_exact_term(&mut self, ctx: &SearchContext) {
|
||||
if let Some(term) = self.exact_term(ctx) {
|
||||
match term {
|
||||
ExactTerm::Phrase(p) => {
|
||||
self.zero_typo_subset = NTypoTermSubset::Subset {
|
||||
words: BTreeSet::new(),
|
||||
phrases: BTreeSet::from_iter([p]),
|
||||
};
|
||||
self.clear_one_typo_subset();
|
||||
self.clear_two_typo_subset();
|
||||
}
|
||||
ExactTerm::Word(w) => {
|
||||
self.zero_typo_subset = NTypoTermSubset::Subset {
|
||||
words: BTreeSet::from_iter([w]),
|
||||
phrases: BTreeSet::new(),
|
||||
};
|
||||
self.clear_one_typo_subset();
|
||||
self.clear_two_typo_subset();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
pub fn clear_zero_typo_subset(&mut self) {
|
||||
self.zero_typo_subset = NTypoTermSubset::Nothing;
|
||||
}
|
||||
|
@ -34,7 +34,7 @@ fn compute_docids(
|
||||
}
|
||||
}
|
||||
};
|
||||
// TODO: synonyms?
|
||||
|
||||
candidates &= universe;
|
||||
Ok(candidates)
|
||||
}
|
||||
@ -47,18 +47,21 @@ impl RankingRuleGraphTrait for ExactnessGraph {
|
||||
condition: &Self::Condition,
|
||||
universe: &RoaringBitmap,
|
||||
) -> Result<ComputedCondition> {
|
||||
let (docids, dest_node) = match condition {
|
||||
let (docids, end_term_subset) = match condition {
|
||||
ExactnessCondition::ExactInAttribute(dest_node) => {
|
||||
(compute_docids(ctx, dest_node, universe)?, dest_node)
|
||||
let mut end_term_subset = dest_node.clone();
|
||||
end_term_subset.term_subset.keep_only_exact_term(ctx);
|
||||
end_term_subset.term_subset.make_mandatory();
|
||||
(compute_docids(ctx, dest_node, universe)?, end_term_subset)
|
||||
}
|
||||
ExactnessCondition::Skip(dest_node) => (universe.clone(), dest_node),
|
||||
ExactnessCondition::Skip(dest_node) => (universe.clone(), dest_node.clone()),
|
||||
};
|
||||
|
||||
Ok(ComputedCondition {
|
||||
docids,
|
||||
universe_len: universe.len(),
|
||||
start_term_subset: None,
|
||||
// TODO/FIXME: modify `end_term_subset` to signal to the next ranking rules that the term cannot be removed
|
||||
end_term_subset: dest_node.clone(),
|
||||
end_term_subset,
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -165,8 +165,8 @@ pub fn compute_query_graph_docids(
|
||||
positions: _,
|
||||
term_ids: _,
|
||||
}) => {
|
||||
let phrase_docids = compute_query_term_subset_docids(ctx, term_subset)?;
|
||||
predecessors_docids & phrase_docids
|
||||
let node_docids = compute_query_term_subset_docids(ctx, term_subset)?;
|
||||
predecessors_docids & node_docids
|
||||
}
|
||||
QueryNodeData::Deleted => {
|
||||
panic!()
|
||||
|
@ -14,6 +14,11 @@ This module tests the following properties about the exactness ranking rule:
|
||||
1. those that have an attribute which is equal to the whole remaining query, if this query does not have any "gap"
|
||||
2. those that have an attribute which start with the whole remaining query, if this query does not have any "gap"
|
||||
3. those that contain the most exact words from the remaining query
|
||||
|
||||
- if it is followed by other ranking rules, then:
|
||||
1. `word` will not remove the exact terms matched by `exactness`
|
||||
2. graph-based ranking rules (`typo`, `proximity`, `attribute`) will only work with
|
||||
(1) the exact terms selected by `exactness` or (2) the full query term otherwise
|
||||
*/
|
||||
|
||||
use crate::{
|
||||
@ -21,7 +26,7 @@ use crate::{
|
||||
SearchResult, TermsMatchingStrategy,
|
||||
};
|
||||
|
||||
fn create_index_exact_words_simple_ordered() -> TempIndex {
|
||||
fn create_index_simple_ordered() -> TempIndex {
|
||||
let index = TempIndex::new();
|
||||
|
||||
index
|
||||
@ -80,7 +85,7 @@ fn create_index_exact_words_simple_ordered() -> TempIndex {
|
||||
index
|
||||
}
|
||||
|
||||
fn create_index_exact_words_simple_reversed() -> TempIndex {
|
||||
fn create_index_simple_reversed() -> TempIndex {
|
||||
let index = TempIndex::new();
|
||||
|
||||
index
|
||||
@ -138,7 +143,7 @@ fn create_index_exact_words_simple_reversed() -> TempIndex {
|
||||
index
|
||||
}
|
||||
|
||||
fn create_index_exact_words_simple_random() -> TempIndex {
|
||||
fn create_index_simple_random() -> TempIndex {
|
||||
let index = TempIndex::new();
|
||||
|
||||
index
|
||||
@ -242,9 +247,192 @@ fn create_index_attribute_starts_with() -> TempIndex {
|
||||
index
|
||||
}
|
||||
|
||||
fn create_index_simple_ordered_with_typos() -> TempIndex {
|
||||
let index = TempIndex::new();
|
||||
|
||||
index
|
||||
.update_settings(|s| {
|
||||
s.set_primary_key("id".to_owned());
|
||||
s.set_searchable_fields(vec!["text".to_owned()]);
|
||||
s.set_criteria(vec![Criterion::Exactness]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
index
|
||||
.add_documents(documents!([
|
||||
{
|
||||
"id": 0,
|
||||
"text": "",
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"text": "the",
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"text": "the quack",
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"text": "the quack briwn",
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"text": "the quack briwn fox",
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"text": "the quack briwn fox jlmps",
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
"text": "the quack briwn fox jlmps over",
|
||||
},
|
||||
{
|
||||
"id": 7,
|
||||
"text": "the quack briwn fox jlmps over the",
|
||||
},
|
||||
{
|
||||
"id": 8,
|
||||
"text": "the quack briwn fox jlmps over the lazy",
|
||||
},
|
||||
{
|
||||
"id": 9,
|
||||
"text": "the quack briwn fox jlmps over the lazy dog",
|
||||
},
|
||||
{
|
||||
"id": 10,
|
||||
"text": "",
|
||||
},
|
||||
{
|
||||
"id": 11,
|
||||
"text": "the",
|
||||
},
|
||||
{
|
||||
"id": 12,
|
||||
"text": "the quick",
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
"text": "the quick brown",
|
||||
},
|
||||
{
|
||||
"id": 14,
|
||||
"text": "the quick brown fox",
|
||||
},
|
||||
{
|
||||
"id": 15,
|
||||
"text": "the quick brown fox jumps",
|
||||
},
|
||||
|
||||
{
|
||||
"id": 16,
|
||||
"text": "the quick brown fox jumps over",
|
||||
},
|
||||
{
|
||||
"id": 17,
|
||||
"text": "the quick brown fox jumps over the",
|
||||
},
|
||||
{
|
||||
"id": 18,
|
||||
"text": "the quick brown fox jumps over the lazy",
|
||||
},
|
||||
{
|
||||
"id": 19,
|
||||
"text": "the quick brown fox jumps over the lazy dog",
|
||||
},
|
||||
]))
|
||||
.unwrap();
|
||||
index
|
||||
}
|
||||
|
||||
fn create_index_with_varying_proximities() -> TempIndex {
|
||||
let index = TempIndex::new();
|
||||
|
||||
index
|
||||
.update_settings(|s| {
|
||||
s.set_primary_key("id".to_owned());
|
||||
s.set_searchable_fields(vec!["text".to_owned()]);
|
||||
s.set_criteria(vec![Criterion::Exactness, Criterion::Words, Criterion::Proximity]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
index
|
||||
.add_documents(documents!([
|
||||
{
|
||||
"id": 0,
|
||||
"text": "lazy jumps dog brown quick the over fox the",
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"text": "the quick brown fox jumps over the very lazy dog"
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"text": "the quick brown fox jumps over the lazy dog",
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"text": "dog brown quick the over fox the lazy",
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"text": "the quick brown fox over the very lazy dog"
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"text": "the quick brown fox over the lazy dog",
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
"text": "brown quick the over fox",
|
||||
},
|
||||
{
|
||||
"id": 7,
|
||||
"text": "the very quick brown fox over"
|
||||
},
|
||||
{
|
||||
"id": 8,
|
||||
"text": "the quick brown fox over",
|
||||
},
|
||||
]))
|
||||
.unwrap();
|
||||
index
|
||||
}
|
||||
|
||||
fn create_index_all_equal_except_proximity_between_ignored_terms() -> TempIndex {
|
||||
let index = TempIndex::new();
|
||||
|
||||
index
|
||||
.update_settings(|s| {
|
||||
s.set_primary_key("id".to_owned());
|
||||
s.set_searchable_fields(vec!["text".to_owned()]);
|
||||
s.set_criteria(vec![Criterion::Exactness, Criterion::Words, Criterion::Proximity]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
index
|
||||
.add_documents(documents!([
|
||||
{
|
||||
"id": 0,
|
||||
"text": "lazy jumps dog brown quick the over fox the"
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"text": "lazy jumps dog brown quick the over fox the. quack briwn jlmps",
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"text": "lazy jumps dog brown quick the over fox the. quack briwn jlmps overt",
|
||||
},
|
||||
]))
|
||||
.unwrap();
|
||||
index
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_exactness_simple_ordered() {
|
||||
let index = create_index_exact_words_simple_ordered();
|
||||
let index = create_index_simple_ordered();
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
@ -271,7 +459,7 @@ fn test_exactness_simple_ordered() {
|
||||
|
||||
#[test]
|
||||
fn test_exactness_simple_reversed() {
|
||||
let index = create_index_exact_words_simple_reversed();
|
||||
let index = create_index_simple_reversed();
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
@ -318,7 +506,7 @@ fn test_exactness_simple_reversed() {
|
||||
|
||||
#[test]
|
||||
fn test_exactness_simple_random() {
|
||||
let index = create_index_exact_words_simple_random();
|
||||
let index = create_index_simple_random();
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
@ -377,13 +565,12 @@ fn test_exactness_attribute_starts_with_phrase() {
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.query("\"overlooking the sea\" is a beautiful balcony");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[5, 6, 4, 3, 1, 0, 2]");
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[6, 5, 4, 3, 1, 0, 2]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
// TODO: this is incorrect, the first document returned here should actually be the second one
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"overlooking the sea is a beautiful balcony, I love it\"",
|
||||
"\"overlooking the sea is a beautiful balcony\"",
|
||||
"\"overlooking the sea is a beautiful balcony, I love it\"",
|
||||
"\"a beautiful balcony is overlooking the sea\"",
|
||||
"\"over looking the sea is a beautiful balcony\"",
|
||||
"\"this balcony is overlooking the sea\"",
|
||||
@ -398,7 +585,6 @@ fn test_exactness_attribute_starts_with_phrase() {
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[6, 5, 4, 3, 1, 0, 2, 7]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
// TODO: this is correct, so the exactness ranking rule probably has a bug in the handling of phrases
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"overlooking the sea is a beautiful balcony\"",
|
||||
@ -440,3 +626,148 @@ fn test_exactness_all_candidates_with_typo() {
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_exactness_after_words() {
|
||||
let index = create_index_simple_ordered_with_typos();
|
||||
|
||||
index
|
||||
.update_settings(|s| {
|
||||
s.set_criteria(vec![Criterion::Words, Criterion::Exactness]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.query("the quick brown fox jumps over the lazy dog");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 9, 18, 8, 17, 16, 6, 7, 15, 5, 14, 4, 13, 3, 12, 2, 1, 11]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"the quick brown fox jumps over the lazy dog\"",
|
||||
"\"the quack briwn fox jlmps over the lazy dog\"",
|
||||
"\"the quick brown fox jumps over the lazy\"",
|
||||
"\"the quack briwn fox jlmps over the lazy\"",
|
||||
"\"the quick brown fox jumps over the\"",
|
||||
"\"the quick brown fox jumps over\"",
|
||||
"\"the quack briwn fox jlmps over\"",
|
||||
"\"the quack briwn fox jlmps over the\"",
|
||||
"\"the quick brown fox jumps\"",
|
||||
"\"the quack briwn fox jlmps\"",
|
||||
"\"the quick brown fox\"",
|
||||
"\"the quack briwn fox\"",
|
||||
"\"the quick brown\"",
|
||||
"\"the quack briwn\"",
|
||||
"\"the quick\"",
|
||||
"\"the quack\"",
|
||||
"\"the\"",
|
||||
"\"the\"",
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_words_after_exactness() {
|
||||
let index = create_index_simple_ordered_with_typos();
|
||||
|
||||
index
|
||||
.update_settings(|s| {
|
||||
s.set_criteria(vec![Criterion::Exactness, Criterion::Words]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.query("the quick brown fox jumps over the lazy dog");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 18, 16, 17, 9, 15, 8, 14, 6, 7, 13, 5, 4, 12, 3, 2, 1, 11]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"the quick brown fox jumps over the lazy dog\"",
|
||||
"\"the quick brown fox jumps over the lazy\"",
|
||||
"\"the quick brown fox jumps over\"",
|
||||
"\"the quick brown fox jumps over the\"",
|
||||
"\"the quack briwn fox jlmps over the lazy dog\"",
|
||||
"\"the quick brown fox jumps\"",
|
||||
"\"the quack briwn fox jlmps over the lazy\"",
|
||||
"\"the quick brown fox\"",
|
||||
"\"the quack briwn fox jlmps over\"",
|
||||
"\"the quack briwn fox jlmps over the\"",
|
||||
"\"the quick brown\"",
|
||||
"\"the quack briwn fox jlmps\"",
|
||||
"\"the quack briwn fox\"",
|
||||
"\"the quick\"",
|
||||
"\"the quack briwn\"",
|
||||
"\"the quack\"",
|
||||
"\"the\"",
|
||||
"\"the\"",
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_proximity_after_exactness() {
|
||||
let index = create_index_with_varying_proximities();
|
||||
|
||||
index
|
||||
.update_settings(|s| {
|
||||
s.set_criteria(vec![Criterion::Exactness, Criterion::Words, Criterion::Proximity]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.query("the quick brown fox jumps over the lazy dog");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 1, 0, 5, 4, 3, 8, 6, 7]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"the quick brown fox jumps over the lazy dog\"",
|
||||
"\"the quick brown fox jumps over the very lazy dog\"",
|
||||
"\"lazy jumps dog brown quick the over fox the\"",
|
||||
"\"the quick brown fox over the lazy dog\"",
|
||||
"\"the quick brown fox over the very lazy dog\"",
|
||||
"\"dog brown quick the over fox the lazy\"",
|
||||
"\"the quick brown fox over\"",
|
||||
"\"brown quick the over fox\"",
|
||||
"\"the very quick brown fox over\"",
|
||||
]
|
||||
"###);
|
||||
|
||||
let index = create_index_all_equal_except_proximity_between_ignored_terms();
|
||||
|
||||
index
|
||||
.update_settings(|s| {
|
||||
s.set_criteria(vec![Criterion::Exactness, Criterion::Words, Criterion::Proximity]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.query("the quick brown fox jumps over the lazy dog");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1, 2]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"lazy jumps dog brown quick the over fox the\"",
|
||||
"\"lazy jumps dog brown quick the over fox the. quack briwn jlmps\"",
|
||||
"\"lazy jumps dog brown quick the over fox the. quack briwn jlmps overt\"",
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
@ -33,7 +33,7 @@ impl<'ctx> RankingRule<'ctx, QueryGraph> for Words {
|
||||
&mut self,
|
||||
ctx: &mut SearchContext<'ctx>,
|
||||
_logger: &mut dyn SearchLogger<QueryGraph>,
|
||||
_parent_candidates: &RoaringBitmap,
|
||||
_universe: &RoaringBitmap,
|
||||
parent_query_graph: &QueryGraph,
|
||||
) -> Result<()> {
|
||||
self.exhausted = false;
|
||||
@ -77,7 +77,6 @@ impl<'ctx> RankingRule<'ctx, QueryGraph> for Words {
|
||||
let nodes_to_remove = self.nodes_to_remove.pop().unwrap();
|
||||
query_graph.remove_nodes_keep_edges(&nodes_to_remove.iter().collect::<Vec<_>>());
|
||||
}
|
||||
|
||||
Ok(Some(RankingRuleOutput { query: child_query_graph, candidates: this_bucket }))
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user