mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-11 05:54:30 +01:00
Merge forward and backward proximity conditions in proximity graph
This commit is contained in:
parent
c0cdaf9f53
commit
05fe856e6e
@ -303,7 +303,7 @@ mod tests {
|
||||
let mut ctx = SearchContext::new(&index, &txn);
|
||||
let results = execute_search(
|
||||
&mut ctx,
|
||||
"releases from poison by the government",
|
||||
"which a the releases from poison by the government",
|
||||
// "sun flower s are the best",
|
||||
// "zero config",
|
||||
TermsMatchingStrategy::Last,
|
||||
@ -359,7 +359,7 @@ mod tests {
|
||||
let start = Instant::now();
|
||||
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.query("releases from poison by the government");
|
||||
s.query("which a the releases from poison by the government");
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
// s.criterion_implementation_strategy(crate::CriterionImplementationStrategy::OnlySetBased);
|
||||
let docs = s.execute().unwrap();
|
||||
|
@ -94,7 +94,7 @@ pub fn build_edges<'ctx>(
|
||||
)]);
|
||||
}
|
||||
|
||||
let mut cost_proximity_word_pairs = BTreeMap::<u8, BTreeMap<u8, Vec<WordPair>>>::new();
|
||||
let mut cost_word_pairs = BTreeMap::<u8, Vec<WordPair>>::new();
|
||||
|
||||
if let Some(right_prefix) = right_term.use_prefix_db {
|
||||
for (left_phrase, left_word) in last_word_of_term_iter(left_term, phrase_interner) {
|
||||
@ -106,7 +106,7 @@ pub fn build_edges<'ctx>(
|
||||
right_ngram_length,
|
||||
left_word,
|
||||
right_prefix,
|
||||
&mut cost_proximity_word_pairs,
|
||||
&mut cost_word_pairs,
|
||||
left_phrase,
|
||||
)?;
|
||||
}
|
||||
@ -129,28 +129,22 @@ pub fn build_edges<'ctx>(
|
||||
right_ngram_length,
|
||||
left_word,
|
||||
right_word,
|
||||
&mut cost_proximity_word_pairs,
|
||||
&mut cost_word_pairs,
|
||||
&[left_phrase, right_phrase].iter().copied().flatten().collect::<Vec<_>>(),
|
||||
)?;
|
||||
}
|
||||
}
|
||||
|
||||
let mut new_edges = cost_proximity_word_pairs
|
||||
let mut new_edges = cost_word_pairs
|
||||
.into_iter()
|
||||
.flat_map(|(cost, proximity_word_pairs)| {
|
||||
let mut edges = vec![];
|
||||
for (proximity, word_pairs) in proximity_word_pairs {
|
||||
edges.push((
|
||||
cost,
|
||||
EdgeCondition::Conditional(conditions_interner.insert(
|
||||
ProximityCondition::Pairs {
|
||||
pairs: word_pairs.into_boxed_slice(),
|
||||
proximity,
|
||||
},
|
||||
)),
|
||||
))
|
||||
}
|
||||
edges
|
||||
.map(|(cost, word_pairs)| {
|
||||
(
|
||||
cost,
|
||||
EdgeCondition::Conditional(
|
||||
conditions_interner
|
||||
.insert(ProximityCondition::Pairs { pairs: word_pairs.into_boxed_slice() }),
|
||||
),
|
||||
)
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
new_edges.push((
|
||||
@ -170,7 +164,7 @@ fn add_prefix_edges<'ctx>(
|
||||
right_ngram_length: usize,
|
||||
left_word: Interned<String>,
|
||||
right_prefix: Interned<String>,
|
||||
cost_proximity_word_pairs: &mut BTreeMap<u8, BTreeMap<u8, Vec<WordPair>>>,
|
||||
cost_proximity_word_pairs: &mut BTreeMap<u8, Vec<WordPair>>,
|
||||
left_phrase: Option<Interned<Phrase>>,
|
||||
) -> Result<()> {
|
||||
for proximity in 1..=(8 - right_ngram_length) {
|
||||
@ -188,16 +182,12 @@ fn add_prefix_edges<'ctx>(
|
||||
)?
|
||||
.is_some()
|
||||
{
|
||||
cost_proximity_word_pairs
|
||||
.entry(cost)
|
||||
.or_default()
|
||||
.entry(proximity as u8)
|
||||
.or_default()
|
||||
.push(WordPair::WordPrefix {
|
||||
phrases: left_phrase.into_iter().collect(),
|
||||
left: left_word,
|
||||
right_prefix,
|
||||
});
|
||||
cost_proximity_word_pairs.entry(cost).or_default().push(WordPair::WordPrefix {
|
||||
phrases: left_phrase.into_iter().collect(),
|
||||
left: left_word,
|
||||
right_prefix,
|
||||
proximity: proximity as u8,
|
||||
});
|
||||
}
|
||||
|
||||
// No swapping when computing the proximity between a phrase and a word
|
||||
@ -213,12 +203,11 @@ fn add_prefix_edges<'ctx>(
|
||||
)?
|
||||
.is_some()
|
||||
{
|
||||
cost_proximity_word_pairs
|
||||
.entry(cost)
|
||||
.or_default()
|
||||
.entry(proximity as u8)
|
||||
.or_default()
|
||||
.push(WordPair::WordPrefixSwapped { left_prefix: right_prefix, right: left_word });
|
||||
cost_proximity_word_pairs.entry(cost).or_default().push(WordPair::WordPrefixSwapped {
|
||||
left_prefix: right_prefix,
|
||||
right: left_word,
|
||||
proximity: proximity as u8 - 1,
|
||||
});
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
@ -232,7 +221,7 @@ fn add_non_prefix_edges<'ctx>(
|
||||
right_ngram_length: usize,
|
||||
word1: Interned<String>,
|
||||
word2: Interned<String>,
|
||||
cost_proximity_word_pairs: &mut BTreeMap<u8, BTreeMap<u8, Vec<WordPair>>>,
|
||||
cost_proximity_word_pairs: &mut BTreeMap<u8, Vec<WordPair>>,
|
||||
phrases: &[Interned<Phrase>],
|
||||
) -> Result<()> {
|
||||
for proximity in 1..=(8 - right_ngram_length) {
|
||||
@ -248,12 +237,12 @@ fn add_non_prefix_edges<'ctx>(
|
||||
)?
|
||||
.is_some()
|
||||
{
|
||||
cost_proximity_word_pairs
|
||||
.entry(cost)
|
||||
.or_default()
|
||||
.entry(proximity as u8)
|
||||
.or_default()
|
||||
.push(WordPair::Words { phrases: phrases.to_vec(), left: word1, right: word2 });
|
||||
cost_proximity_word_pairs.entry(cost).or_default().push(WordPair::Words {
|
||||
phrases: phrases.to_vec(),
|
||||
left: word1,
|
||||
right: word2,
|
||||
proximity: proximity as u8,
|
||||
});
|
||||
}
|
||||
if proximity > 1
|
||||
// no swapping when either term is a phrase
|
||||
@ -269,12 +258,12 @@ fn add_non_prefix_edges<'ctx>(
|
||||
)?
|
||||
.is_some()
|
||||
{
|
||||
cost_proximity_word_pairs
|
||||
.entry(cost)
|
||||
.or_default()
|
||||
.entry(proximity as u8 - 1)
|
||||
.or_default()
|
||||
.push(WordPair::Words { phrases: vec![], left: word2, right: word1 });
|
||||
cost_proximity_word_pairs.entry(cost).or_default().push(WordPair::Words {
|
||||
phrases: vec![],
|
||||
left: word2,
|
||||
right: word1,
|
||||
proximity: proximity as u8 - 1,
|
||||
});
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
|
@ -18,7 +18,7 @@ pub fn compute_docids<'ctx>(
|
||||
phrase_interner,
|
||||
term_interner,
|
||||
} = ctx;
|
||||
let (pairs, proximity) = match edge {
|
||||
let pairs = match edge {
|
||||
ProximityCondition::Term { term } => {
|
||||
return term_docids
|
||||
.get_query_term_docids(
|
||||
@ -32,12 +32,12 @@ pub fn compute_docids<'ctx>(
|
||||
)
|
||||
.cloned()
|
||||
}
|
||||
ProximityCondition::Pairs { pairs, proximity } => (pairs, proximity),
|
||||
ProximityCondition::Pairs { pairs } => pairs,
|
||||
};
|
||||
let mut pair_docids = RoaringBitmap::new();
|
||||
for pair in pairs.iter() {
|
||||
let pair = match pair {
|
||||
WordPair::Words { phrases, left, right } => {
|
||||
WordPair::Words { phrases, left, right, proximity } => {
|
||||
let mut docids = db_cache
|
||||
.get_word_pair_proximity_docids(
|
||||
index,
|
||||
@ -64,7 +64,7 @@ pub fn compute_docids<'ctx>(
|
||||
}
|
||||
docids
|
||||
}
|
||||
WordPair::WordPrefix { phrases, left, right_prefix } => {
|
||||
WordPair::WordPrefix { phrases, left, right_prefix, proximity } => {
|
||||
let mut docids = db_cache
|
||||
.get_word_prefix_pair_proximity_docids(
|
||||
index,
|
||||
@ -91,7 +91,7 @@ pub fn compute_docids<'ctx>(
|
||||
}
|
||||
docids
|
||||
}
|
||||
WordPair::WordPrefixSwapped { left_prefix, right } => db_cache
|
||||
WordPair::WordPrefixSwapped { left_prefix, right, proximity } => db_cache
|
||||
.get_prefix_word_pair_proximity_docids(
|
||||
index,
|
||||
txn,
|
||||
|
@ -18,22 +18,25 @@ pub enum WordPair {
|
||||
phrases: Vec<Interned<Phrase>>,
|
||||
left: Interned<String>,
|
||||
right: Interned<String>,
|
||||
proximity: u8,
|
||||
},
|
||||
WordPrefix {
|
||||
phrases: Vec<Interned<Phrase>>,
|
||||
left: Interned<String>,
|
||||
right_prefix: Interned<String>,
|
||||
proximity: u8,
|
||||
},
|
||||
WordPrefixSwapped {
|
||||
left_prefix: Interned<String>,
|
||||
right: Interned<String>,
|
||||
proximity: u8,
|
||||
},
|
||||
}
|
||||
|
||||
#[derive(Clone, PartialEq, Eq, Hash)]
|
||||
pub enum ProximityCondition {
|
||||
Term { term: Interned<QueryTerm> },
|
||||
Pairs { pairs: Box<[WordPair]>, proximity: u8 },
|
||||
Pairs { pairs: Box<[WordPair]> },
|
||||
}
|
||||
|
||||
pub enum ProximityGraph {}
|
||||
@ -46,8 +49,8 @@ impl RankingRuleGraphTrait for ProximityGraph {
|
||||
ProximityCondition::Term { term } => {
|
||||
format!("term {term}")
|
||||
}
|
||||
ProximityCondition::Pairs { pairs, proximity } => {
|
||||
format!("prox {proximity}, {} pairs", pairs.len())
|
||||
ProximityCondition::Pairs { pairs } => {
|
||||
format!("pairs {}", pairs.len())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user