Merge forward and backward proximity conditions in proximity graph

This commit is contained in:
Loïc Lecrenier 2023-03-15 13:02:55 +01:00
parent c0cdaf9f53
commit 05fe856e6e
4 changed files with 50 additions and 58 deletions

View File

@ -303,7 +303,7 @@ mod tests {
let mut ctx = SearchContext::new(&index, &txn); let mut ctx = SearchContext::new(&index, &txn);
let results = execute_search( let results = execute_search(
&mut ctx, &mut ctx,
"releases from poison by the government", "which a the releases from poison by the government",
// "sun flower s are the best", // "sun flower s are the best",
// "zero config", // "zero config",
TermsMatchingStrategy::Last, TermsMatchingStrategy::Last,
@ -359,7 +359,7 @@ mod tests {
let start = Instant::now(); let start = Instant::now();
let mut s = Search::new(&txn, &index); let mut s = Search::new(&txn, &index);
s.query("releases from poison by the government"); s.query("which a the releases from poison by the government");
s.terms_matching_strategy(TermsMatchingStrategy::Last); s.terms_matching_strategy(TermsMatchingStrategy::Last);
// s.criterion_implementation_strategy(crate::CriterionImplementationStrategy::OnlySetBased); // s.criterion_implementation_strategy(crate::CriterionImplementationStrategy::OnlySetBased);
let docs = s.execute().unwrap(); let docs = s.execute().unwrap();

View File

@ -94,7 +94,7 @@ pub fn build_edges<'ctx>(
)]); )]);
} }
let mut cost_proximity_word_pairs = BTreeMap::<u8, BTreeMap<u8, Vec<WordPair>>>::new(); let mut cost_word_pairs = BTreeMap::<u8, Vec<WordPair>>::new();
if let Some(right_prefix) = right_term.use_prefix_db { if let Some(right_prefix) = right_term.use_prefix_db {
for (left_phrase, left_word) in last_word_of_term_iter(left_term, phrase_interner) { for (left_phrase, left_word) in last_word_of_term_iter(left_term, phrase_interner) {
@ -106,7 +106,7 @@ pub fn build_edges<'ctx>(
right_ngram_length, right_ngram_length,
left_word, left_word,
right_prefix, right_prefix,
&mut cost_proximity_word_pairs, &mut cost_word_pairs,
left_phrase, left_phrase,
)?; )?;
} }
@ -129,28 +129,22 @@ pub fn build_edges<'ctx>(
right_ngram_length, right_ngram_length,
left_word, left_word,
right_word, right_word,
&mut cost_proximity_word_pairs, &mut cost_word_pairs,
&[left_phrase, right_phrase].iter().copied().flatten().collect::<Vec<_>>(), &[left_phrase, right_phrase].iter().copied().flatten().collect::<Vec<_>>(),
)?; )?;
} }
} }
let mut new_edges = cost_proximity_word_pairs let mut new_edges = cost_word_pairs
.into_iter() .into_iter()
.flat_map(|(cost, proximity_word_pairs)| { .map(|(cost, word_pairs)| {
let mut edges = vec![]; (
for (proximity, word_pairs) in proximity_word_pairs { cost,
edges.push(( EdgeCondition::Conditional(
cost, conditions_interner
EdgeCondition::Conditional(conditions_interner.insert( .insert(ProximityCondition::Pairs { pairs: word_pairs.into_boxed_slice() }),
ProximityCondition::Pairs { ),
pairs: word_pairs.into_boxed_slice(), )
proximity,
},
)),
))
}
edges
}) })
.collect::<Vec<_>>(); .collect::<Vec<_>>();
new_edges.push(( new_edges.push((
@ -170,7 +164,7 @@ fn add_prefix_edges<'ctx>(
right_ngram_length: usize, right_ngram_length: usize,
left_word: Interned<String>, left_word: Interned<String>,
right_prefix: Interned<String>, right_prefix: Interned<String>,
cost_proximity_word_pairs: &mut BTreeMap<u8, BTreeMap<u8, Vec<WordPair>>>, cost_proximity_word_pairs: &mut BTreeMap<u8, Vec<WordPair>>,
left_phrase: Option<Interned<Phrase>>, left_phrase: Option<Interned<Phrase>>,
) -> Result<()> { ) -> Result<()> {
for proximity in 1..=(8 - right_ngram_length) { for proximity in 1..=(8 - right_ngram_length) {
@ -188,16 +182,12 @@ fn add_prefix_edges<'ctx>(
)? )?
.is_some() .is_some()
{ {
cost_proximity_word_pairs cost_proximity_word_pairs.entry(cost).or_default().push(WordPair::WordPrefix {
.entry(cost) phrases: left_phrase.into_iter().collect(),
.or_default() left: left_word,
.entry(proximity as u8) right_prefix,
.or_default() proximity: proximity as u8,
.push(WordPair::WordPrefix { });
phrases: left_phrase.into_iter().collect(),
left: left_word,
right_prefix,
});
} }
// No swapping when computing the proximity between a phrase and a word // No swapping when computing the proximity between a phrase and a word
@ -213,12 +203,11 @@ fn add_prefix_edges<'ctx>(
)? )?
.is_some() .is_some()
{ {
cost_proximity_word_pairs cost_proximity_word_pairs.entry(cost).or_default().push(WordPair::WordPrefixSwapped {
.entry(cost) left_prefix: right_prefix,
.or_default() right: left_word,
.entry(proximity as u8) proximity: proximity as u8 - 1,
.or_default() });
.push(WordPair::WordPrefixSwapped { left_prefix: right_prefix, right: left_word });
} }
} }
Ok(()) Ok(())
@ -232,7 +221,7 @@ fn add_non_prefix_edges<'ctx>(
right_ngram_length: usize, right_ngram_length: usize,
word1: Interned<String>, word1: Interned<String>,
word2: Interned<String>, word2: Interned<String>,
cost_proximity_word_pairs: &mut BTreeMap<u8, BTreeMap<u8, Vec<WordPair>>>, cost_proximity_word_pairs: &mut BTreeMap<u8, Vec<WordPair>>,
phrases: &[Interned<Phrase>], phrases: &[Interned<Phrase>],
) -> Result<()> { ) -> Result<()> {
for proximity in 1..=(8 - right_ngram_length) { for proximity in 1..=(8 - right_ngram_length) {
@ -248,12 +237,12 @@ fn add_non_prefix_edges<'ctx>(
)? )?
.is_some() .is_some()
{ {
cost_proximity_word_pairs cost_proximity_word_pairs.entry(cost).or_default().push(WordPair::Words {
.entry(cost) phrases: phrases.to_vec(),
.or_default() left: word1,
.entry(proximity as u8) right: word2,
.or_default() proximity: proximity as u8,
.push(WordPair::Words { phrases: phrases.to_vec(), left: word1, right: word2 }); });
} }
if proximity > 1 if proximity > 1
// no swapping when either term is a phrase // no swapping when either term is a phrase
@ -269,12 +258,12 @@ fn add_non_prefix_edges<'ctx>(
)? )?
.is_some() .is_some()
{ {
cost_proximity_word_pairs cost_proximity_word_pairs.entry(cost).or_default().push(WordPair::Words {
.entry(cost) phrases: vec![],
.or_default() left: word2,
.entry(proximity as u8 - 1) right: word1,
.or_default() proximity: proximity as u8 - 1,
.push(WordPair::Words { phrases: vec![], left: word2, right: word1 }); });
} }
} }
Ok(()) Ok(())

View File

@ -18,7 +18,7 @@ pub fn compute_docids<'ctx>(
phrase_interner, phrase_interner,
term_interner, term_interner,
} = ctx; } = ctx;
let (pairs, proximity) = match edge { let pairs = match edge {
ProximityCondition::Term { term } => { ProximityCondition::Term { term } => {
return term_docids return term_docids
.get_query_term_docids( .get_query_term_docids(
@ -32,12 +32,12 @@ pub fn compute_docids<'ctx>(
) )
.cloned() .cloned()
} }
ProximityCondition::Pairs { pairs, proximity } => (pairs, proximity), ProximityCondition::Pairs { pairs } => pairs,
}; };
let mut pair_docids = RoaringBitmap::new(); let mut pair_docids = RoaringBitmap::new();
for pair in pairs.iter() { for pair in pairs.iter() {
let pair = match pair { let pair = match pair {
WordPair::Words { phrases, left, right } => { WordPair::Words { phrases, left, right, proximity } => {
let mut docids = db_cache let mut docids = db_cache
.get_word_pair_proximity_docids( .get_word_pair_proximity_docids(
index, index,
@ -64,7 +64,7 @@ pub fn compute_docids<'ctx>(
} }
docids docids
} }
WordPair::WordPrefix { phrases, left, right_prefix } => { WordPair::WordPrefix { phrases, left, right_prefix, proximity } => {
let mut docids = db_cache let mut docids = db_cache
.get_word_prefix_pair_proximity_docids( .get_word_prefix_pair_proximity_docids(
index, index,
@ -91,7 +91,7 @@ pub fn compute_docids<'ctx>(
} }
docids docids
} }
WordPair::WordPrefixSwapped { left_prefix, right } => db_cache WordPair::WordPrefixSwapped { left_prefix, right, proximity } => db_cache
.get_prefix_word_pair_proximity_docids( .get_prefix_word_pair_proximity_docids(
index, index,
txn, txn,

View File

@ -18,22 +18,25 @@ pub enum WordPair {
phrases: Vec<Interned<Phrase>>, phrases: Vec<Interned<Phrase>>,
left: Interned<String>, left: Interned<String>,
right: Interned<String>, right: Interned<String>,
proximity: u8,
}, },
WordPrefix { WordPrefix {
phrases: Vec<Interned<Phrase>>, phrases: Vec<Interned<Phrase>>,
left: Interned<String>, left: Interned<String>,
right_prefix: Interned<String>, right_prefix: Interned<String>,
proximity: u8,
}, },
WordPrefixSwapped { WordPrefixSwapped {
left_prefix: Interned<String>, left_prefix: Interned<String>,
right: Interned<String>, right: Interned<String>,
proximity: u8,
}, },
} }
#[derive(Clone, PartialEq, Eq, Hash)] #[derive(Clone, PartialEq, Eq, Hash)]
pub enum ProximityCondition { pub enum ProximityCondition {
Term { term: Interned<QueryTerm> }, Term { term: Interned<QueryTerm> },
Pairs { pairs: Box<[WordPair]>, proximity: u8 }, Pairs { pairs: Box<[WordPair]> },
} }
pub enum ProximityGraph {} pub enum ProximityGraph {}
@ -46,8 +49,8 @@ impl RankingRuleGraphTrait for ProximityGraph {
ProximityCondition::Term { term } => { ProximityCondition::Term { term } => {
format!("term {term}") format!("term {term}")
} }
ProximityCondition::Pairs { pairs, proximity } => { ProximityCondition::Pairs { pairs } => {
format!("prox {proximity}, {} pairs", pairs.len()) format!("pairs {}", pairs.len())
} }
} }
} }