Merge forward and backward proximity conditions in proximity graph

This commit is contained in:
Loïc Lecrenier 2023-03-15 13:02:55 +01:00
parent c0cdaf9f53
commit 05fe856e6e
4 changed files with 50 additions and 58 deletions

View File

@ -303,7 +303,7 @@ mod tests {
let mut ctx = SearchContext::new(&index, &txn);
let results = execute_search(
&mut ctx,
"releases from poison by the government",
"which a the releases from poison by the government",
// "sun flower s are the best",
// "zero config",
TermsMatchingStrategy::Last,
@ -359,7 +359,7 @@ mod tests {
let start = Instant::now();
let mut s = Search::new(&txn, &index);
s.query("releases from poison by the government");
s.query("which a the releases from poison by the government");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
// s.criterion_implementation_strategy(crate::CriterionImplementationStrategy::OnlySetBased);
let docs = s.execute().unwrap();

View File

@ -94,7 +94,7 @@ pub fn build_edges<'ctx>(
)]);
}
let mut cost_proximity_word_pairs = BTreeMap::<u8, BTreeMap<u8, Vec<WordPair>>>::new();
let mut cost_word_pairs = BTreeMap::<u8, Vec<WordPair>>::new();
if let Some(right_prefix) = right_term.use_prefix_db {
for (left_phrase, left_word) in last_word_of_term_iter(left_term, phrase_interner) {
@ -106,7 +106,7 @@ pub fn build_edges<'ctx>(
right_ngram_length,
left_word,
right_prefix,
&mut cost_proximity_word_pairs,
&mut cost_word_pairs,
left_phrase,
)?;
}
@ -129,28 +129,22 @@ pub fn build_edges<'ctx>(
right_ngram_length,
left_word,
right_word,
&mut cost_proximity_word_pairs,
&mut cost_word_pairs,
&[left_phrase, right_phrase].iter().copied().flatten().collect::<Vec<_>>(),
)?;
}
}
let mut new_edges = cost_proximity_word_pairs
let mut new_edges = cost_word_pairs
.into_iter()
.flat_map(|(cost, proximity_word_pairs)| {
let mut edges = vec![];
for (proximity, word_pairs) in proximity_word_pairs {
edges.push((
cost,
EdgeCondition::Conditional(conditions_interner.insert(
ProximityCondition::Pairs {
pairs: word_pairs.into_boxed_slice(),
proximity,
},
)),
))
}
edges
.map(|(cost, word_pairs)| {
(
cost,
EdgeCondition::Conditional(
conditions_interner
.insert(ProximityCondition::Pairs { pairs: word_pairs.into_boxed_slice() }),
),
)
})
.collect::<Vec<_>>();
new_edges.push((
@ -170,7 +164,7 @@ fn add_prefix_edges<'ctx>(
right_ngram_length: usize,
left_word: Interned<String>,
right_prefix: Interned<String>,
cost_proximity_word_pairs: &mut BTreeMap<u8, BTreeMap<u8, Vec<WordPair>>>,
cost_proximity_word_pairs: &mut BTreeMap<u8, Vec<WordPair>>,
left_phrase: Option<Interned<Phrase>>,
) -> Result<()> {
for proximity in 1..=(8 - right_ngram_length) {
@ -188,16 +182,12 @@ fn add_prefix_edges<'ctx>(
)?
.is_some()
{
cost_proximity_word_pairs
.entry(cost)
.or_default()
.entry(proximity as u8)
.or_default()
.push(WordPair::WordPrefix {
phrases: left_phrase.into_iter().collect(),
left: left_word,
right_prefix,
});
cost_proximity_word_pairs.entry(cost).or_default().push(WordPair::WordPrefix {
phrases: left_phrase.into_iter().collect(),
left: left_word,
right_prefix,
proximity: proximity as u8,
});
}
// No swapping when computing the proximity between a phrase and a word
@ -213,12 +203,11 @@ fn add_prefix_edges<'ctx>(
)?
.is_some()
{
cost_proximity_word_pairs
.entry(cost)
.or_default()
.entry(proximity as u8)
.or_default()
.push(WordPair::WordPrefixSwapped { left_prefix: right_prefix, right: left_word });
cost_proximity_word_pairs.entry(cost).or_default().push(WordPair::WordPrefixSwapped {
left_prefix: right_prefix,
right: left_word,
proximity: proximity as u8 - 1,
});
}
}
Ok(())
@ -232,7 +221,7 @@ fn add_non_prefix_edges<'ctx>(
right_ngram_length: usize,
word1: Interned<String>,
word2: Interned<String>,
cost_proximity_word_pairs: &mut BTreeMap<u8, BTreeMap<u8, Vec<WordPair>>>,
cost_proximity_word_pairs: &mut BTreeMap<u8, Vec<WordPair>>,
phrases: &[Interned<Phrase>],
) -> Result<()> {
for proximity in 1..=(8 - right_ngram_length) {
@ -248,12 +237,12 @@ fn add_non_prefix_edges<'ctx>(
)?
.is_some()
{
cost_proximity_word_pairs
.entry(cost)
.or_default()
.entry(proximity as u8)
.or_default()
.push(WordPair::Words { phrases: phrases.to_vec(), left: word1, right: word2 });
cost_proximity_word_pairs.entry(cost).or_default().push(WordPair::Words {
phrases: phrases.to_vec(),
left: word1,
right: word2,
proximity: proximity as u8,
});
}
if proximity > 1
// no swapping when either term is a phrase
@ -269,12 +258,12 @@ fn add_non_prefix_edges<'ctx>(
)?
.is_some()
{
cost_proximity_word_pairs
.entry(cost)
.or_default()
.entry(proximity as u8 - 1)
.or_default()
.push(WordPair::Words { phrases: vec![], left: word2, right: word1 });
cost_proximity_word_pairs.entry(cost).or_default().push(WordPair::Words {
phrases: vec![],
left: word2,
right: word1,
proximity: proximity as u8 - 1,
});
}
}
Ok(())

View File

@ -18,7 +18,7 @@ pub fn compute_docids<'ctx>(
phrase_interner,
term_interner,
} = ctx;
let (pairs, proximity) = match edge {
let pairs = match edge {
ProximityCondition::Term { term } => {
return term_docids
.get_query_term_docids(
@ -32,12 +32,12 @@ pub fn compute_docids<'ctx>(
)
.cloned()
}
ProximityCondition::Pairs { pairs, proximity } => (pairs, proximity),
ProximityCondition::Pairs { pairs } => pairs,
};
let mut pair_docids = RoaringBitmap::new();
for pair in pairs.iter() {
let pair = match pair {
WordPair::Words { phrases, left, right } => {
WordPair::Words { phrases, left, right, proximity } => {
let mut docids = db_cache
.get_word_pair_proximity_docids(
index,
@ -64,7 +64,7 @@ pub fn compute_docids<'ctx>(
}
docids
}
WordPair::WordPrefix { phrases, left, right_prefix } => {
WordPair::WordPrefix { phrases, left, right_prefix, proximity } => {
let mut docids = db_cache
.get_word_prefix_pair_proximity_docids(
index,
@ -91,7 +91,7 @@ pub fn compute_docids<'ctx>(
}
docids
}
WordPair::WordPrefixSwapped { left_prefix, right } => db_cache
WordPair::WordPrefixSwapped { left_prefix, right, proximity } => db_cache
.get_prefix_word_pair_proximity_docids(
index,
txn,

View File

@ -18,22 +18,25 @@ pub enum WordPair {
phrases: Vec<Interned<Phrase>>,
left: Interned<String>,
right: Interned<String>,
proximity: u8,
},
WordPrefix {
phrases: Vec<Interned<Phrase>>,
left: Interned<String>,
right_prefix: Interned<String>,
proximity: u8,
},
WordPrefixSwapped {
left_prefix: Interned<String>,
right: Interned<String>,
proximity: u8,
},
}
#[derive(Clone, PartialEq, Eq, Hash)]
pub enum ProximityCondition {
Term { term: Interned<QueryTerm> },
Pairs { pairs: Box<[WordPair]>, proximity: u8 },
Pairs { pairs: Box<[WordPair]> },
}
pub enum ProximityGraph {}
@ -46,8 +49,8 @@ impl RankingRuleGraphTrait for ProximityGraph {
ProximityCondition::Term { term } => {
format!("term {term}")
}
ProximityCondition::Pairs { pairs, proximity } => {
format!("prox {proximity}, {} pairs", pairs.len())
ProximityCondition::Pairs { pairs } => {
format!("pairs {}", pairs.len())
}
}
}