mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-25 20:57:35 +01:00
Merge forward and backward proximity conditions in proximity graph
This commit is contained in:
parent
c0cdaf9f53
commit
05fe856e6e
@ -303,7 +303,7 @@ mod tests {
|
|||||||
let mut ctx = SearchContext::new(&index, &txn);
|
let mut ctx = SearchContext::new(&index, &txn);
|
||||||
let results = execute_search(
|
let results = execute_search(
|
||||||
&mut ctx,
|
&mut ctx,
|
||||||
"releases from poison by the government",
|
"which a the releases from poison by the government",
|
||||||
// "sun flower s are the best",
|
// "sun flower s are the best",
|
||||||
// "zero config",
|
// "zero config",
|
||||||
TermsMatchingStrategy::Last,
|
TermsMatchingStrategy::Last,
|
||||||
@ -359,7 +359,7 @@ mod tests {
|
|||||||
let start = Instant::now();
|
let start = Instant::now();
|
||||||
|
|
||||||
let mut s = Search::new(&txn, &index);
|
let mut s = Search::new(&txn, &index);
|
||||||
s.query("releases from poison by the government");
|
s.query("which a the releases from poison by the government");
|
||||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||||
// s.criterion_implementation_strategy(crate::CriterionImplementationStrategy::OnlySetBased);
|
// s.criterion_implementation_strategy(crate::CriterionImplementationStrategy::OnlySetBased);
|
||||||
let docs = s.execute().unwrap();
|
let docs = s.execute().unwrap();
|
||||||
|
@ -94,7 +94,7 @@ pub fn build_edges<'ctx>(
|
|||||||
)]);
|
)]);
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut cost_proximity_word_pairs = BTreeMap::<u8, BTreeMap<u8, Vec<WordPair>>>::new();
|
let mut cost_word_pairs = BTreeMap::<u8, Vec<WordPair>>::new();
|
||||||
|
|
||||||
if let Some(right_prefix) = right_term.use_prefix_db {
|
if let Some(right_prefix) = right_term.use_prefix_db {
|
||||||
for (left_phrase, left_word) in last_word_of_term_iter(left_term, phrase_interner) {
|
for (left_phrase, left_word) in last_word_of_term_iter(left_term, phrase_interner) {
|
||||||
@ -106,7 +106,7 @@ pub fn build_edges<'ctx>(
|
|||||||
right_ngram_length,
|
right_ngram_length,
|
||||||
left_word,
|
left_word,
|
||||||
right_prefix,
|
right_prefix,
|
||||||
&mut cost_proximity_word_pairs,
|
&mut cost_word_pairs,
|
||||||
left_phrase,
|
left_phrase,
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
@ -129,28 +129,22 @@ pub fn build_edges<'ctx>(
|
|||||||
right_ngram_length,
|
right_ngram_length,
|
||||||
left_word,
|
left_word,
|
||||||
right_word,
|
right_word,
|
||||||
&mut cost_proximity_word_pairs,
|
&mut cost_word_pairs,
|
||||||
&[left_phrase, right_phrase].iter().copied().flatten().collect::<Vec<_>>(),
|
&[left_phrase, right_phrase].iter().copied().flatten().collect::<Vec<_>>(),
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut new_edges = cost_proximity_word_pairs
|
let mut new_edges = cost_word_pairs
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.flat_map(|(cost, proximity_word_pairs)| {
|
.map(|(cost, word_pairs)| {
|
||||||
let mut edges = vec![];
|
(
|
||||||
for (proximity, word_pairs) in proximity_word_pairs {
|
cost,
|
||||||
edges.push((
|
EdgeCondition::Conditional(
|
||||||
cost,
|
conditions_interner
|
||||||
EdgeCondition::Conditional(conditions_interner.insert(
|
.insert(ProximityCondition::Pairs { pairs: word_pairs.into_boxed_slice() }),
|
||||||
ProximityCondition::Pairs {
|
),
|
||||||
pairs: word_pairs.into_boxed_slice(),
|
)
|
||||||
proximity,
|
|
||||||
},
|
|
||||||
)),
|
|
||||||
))
|
|
||||||
}
|
|
||||||
edges
|
|
||||||
})
|
})
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
new_edges.push((
|
new_edges.push((
|
||||||
@ -170,7 +164,7 @@ fn add_prefix_edges<'ctx>(
|
|||||||
right_ngram_length: usize,
|
right_ngram_length: usize,
|
||||||
left_word: Interned<String>,
|
left_word: Interned<String>,
|
||||||
right_prefix: Interned<String>,
|
right_prefix: Interned<String>,
|
||||||
cost_proximity_word_pairs: &mut BTreeMap<u8, BTreeMap<u8, Vec<WordPair>>>,
|
cost_proximity_word_pairs: &mut BTreeMap<u8, Vec<WordPair>>,
|
||||||
left_phrase: Option<Interned<Phrase>>,
|
left_phrase: Option<Interned<Phrase>>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
for proximity in 1..=(8 - right_ngram_length) {
|
for proximity in 1..=(8 - right_ngram_length) {
|
||||||
@ -188,16 +182,12 @@ fn add_prefix_edges<'ctx>(
|
|||||||
)?
|
)?
|
||||||
.is_some()
|
.is_some()
|
||||||
{
|
{
|
||||||
cost_proximity_word_pairs
|
cost_proximity_word_pairs.entry(cost).or_default().push(WordPair::WordPrefix {
|
||||||
.entry(cost)
|
phrases: left_phrase.into_iter().collect(),
|
||||||
.or_default()
|
left: left_word,
|
||||||
.entry(proximity as u8)
|
right_prefix,
|
||||||
.or_default()
|
proximity: proximity as u8,
|
||||||
.push(WordPair::WordPrefix {
|
});
|
||||||
phrases: left_phrase.into_iter().collect(),
|
|
||||||
left: left_word,
|
|
||||||
right_prefix,
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// No swapping when computing the proximity between a phrase and a word
|
// No swapping when computing the proximity between a phrase and a word
|
||||||
@ -213,12 +203,11 @@ fn add_prefix_edges<'ctx>(
|
|||||||
)?
|
)?
|
||||||
.is_some()
|
.is_some()
|
||||||
{
|
{
|
||||||
cost_proximity_word_pairs
|
cost_proximity_word_pairs.entry(cost).or_default().push(WordPair::WordPrefixSwapped {
|
||||||
.entry(cost)
|
left_prefix: right_prefix,
|
||||||
.or_default()
|
right: left_word,
|
||||||
.entry(proximity as u8)
|
proximity: proximity as u8 - 1,
|
||||||
.or_default()
|
});
|
||||||
.push(WordPair::WordPrefixSwapped { left_prefix: right_prefix, right: left_word });
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
@ -232,7 +221,7 @@ fn add_non_prefix_edges<'ctx>(
|
|||||||
right_ngram_length: usize,
|
right_ngram_length: usize,
|
||||||
word1: Interned<String>,
|
word1: Interned<String>,
|
||||||
word2: Interned<String>,
|
word2: Interned<String>,
|
||||||
cost_proximity_word_pairs: &mut BTreeMap<u8, BTreeMap<u8, Vec<WordPair>>>,
|
cost_proximity_word_pairs: &mut BTreeMap<u8, Vec<WordPair>>,
|
||||||
phrases: &[Interned<Phrase>],
|
phrases: &[Interned<Phrase>],
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
for proximity in 1..=(8 - right_ngram_length) {
|
for proximity in 1..=(8 - right_ngram_length) {
|
||||||
@ -248,12 +237,12 @@ fn add_non_prefix_edges<'ctx>(
|
|||||||
)?
|
)?
|
||||||
.is_some()
|
.is_some()
|
||||||
{
|
{
|
||||||
cost_proximity_word_pairs
|
cost_proximity_word_pairs.entry(cost).or_default().push(WordPair::Words {
|
||||||
.entry(cost)
|
phrases: phrases.to_vec(),
|
||||||
.or_default()
|
left: word1,
|
||||||
.entry(proximity as u8)
|
right: word2,
|
||||||
.or_default()
|
proximity: proximity as u8,
|
||||||
.push(WordPair::Words { phrases: phrases.to_vec(), left: word1, right: word2 });
|
});
|
||||||
}
|
}
|
||||||
if proximity > 1
|
if proximity > 1
|
||||||
// no swapping when either term is a phrase
|
// no swapping when either term is a phrase
|
||||||
@ -269,12 +258,12 @@ fn add_non_prefix_edges<'ctx>(
|
|||||||
)?
|
)?
|
||||||
.is_some()
|
.is_some()
|
||||||
{
|
{
|
||||||
cost_proximity_word_pairs
|
cost_proximity_word_pairs.entry(cost).or_default().push(WordPair::Words {
|
||||||
.entry(cost)
|
phrases: vec![],
|
||||||
.or_default()
|
left: word2,
|
||||||
.entry(proximity as u8 - 1)
|
right: word1,
|
||||||
.or_default()
|
proximity: proximity as u8 - 1,
|
||||||
.push(WordPair::Words { phrases: vec![], left: word2, right: word1 });
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
|
@ -18,7 +18,7 @@ pub fn compute_docids<'ctx>(
|
|||||||
phrase_interner,
|
phrase_interner,
|
||||||
term_interner,
|
term_interner,
|
||||||
} = ctx;
|
} = ctx;
|
||||||
let (pairs, proximity) = match edge {
|
let pairs = match edge {
|
||||||
ProximityCondition::Term { term } => {
|
ProximityCondition::Term { term } => {
|
||||||
return term_docids
|
return term_docids
|
||||||
.get_query_term_docids(
|
.get_query_term_docids(
|
||||||
@ -32,12 +32,12 @@ pub fn compute_docids<'ctx>(
|
|||||||
)
|
)
|
||||||
.cloned()
|
.cloned()
|
||||||
}
|
}
|
||||||
ProximityCondition::Pairs { pairs, proximity } => (pairs, proximity),
|
ProximityCondition::Pairs { pairs } => pairs,
|
||||||
};
|
};
|
||||||
let mut pair_docids = RoaringBitmap::new();
|
let mut pair_docids = RoaringBitmap::new();
|
||||||
for pair in pairs.iter() {
|
for pair in pairs.iter() {
|
||||||
let pair = match pair {
|
let pair = match pair {
|
||||||
WordPair::Words { phrases, left, right } => {
|
WordPair::Words { phrases, left, right, proximity } => {
|
||||||
let mut docids = db_cache
|
let mut docids = db_cache
|
||||||
.get_word_pair_proximity_docids(
|
.get_word_pair_proximity_docids(
|
||||||
index,
|
index,
|
||||||
@ -64,7 +64,7 @@ pub fn compute_docids<'ctx>(
|
|||||||
}
|
}
|
||||||
docids
|
docids
|
||||||
}
|
}
|
||||||
WordPair::WordPrefix { phrases, left, right_prefix } => {
|
WordPair::WordPrefix { phrases, left, right_prefix, proximity } => {
|
||||||
let mut docids = db_cache
|
let mut docids = db_cache
|
||||||
.get_word_prefix_pair_proximity_docids(
|
.get_word_prefix_pair_proximity_docids(
|
||||||
index,
|
index,
|
||||||
@ -91,7 +91,7 @@ pub fn compute_docids<'ctx>(
|
|||||||
}
|
}
|
||||||
docids
|
docids
|
||||||
}
|
}
|
||||||
WordPair::WordPrefixSwapped { left_prefix, right } => db_cache
|
WordPair::WordPrefixSwapped { left_prefix, right, proximity } => db_cache
|
||||||
.get_prefix_word_pair_proximity_docids(
|
.get_prefix_word_pair_proximity_docids(
|
||||||
index,
|
index,
|
||||||
txn,
|
txn,
|
||||||
|
@ -18,22 +18,25 @@ pub enum WordPair {
|
|||||||
phrases: Vec<Interned<Phrase>>,
|
phrases: Vec<Interned<Phrase>>,
|
||||||
left: Interned<String>,
|
left: Interned<String>,
|
||||||
right: Interned<String>,
|
right: Interned<String>,
|
||||||
|
proximity: u8,
|
||||||
},
|
},
|
||||||
WordPrefix {
|
WordPrefix {
|
||||||
phrases: Vec<Interned<Phrase>>,
|
phrases: Vec<Interned<Phrase>>,
|
||||||
left: Interned<String>,
|
left: Interned<String>,
|
||||||
right_prefix: Interned<String>,
|
right_prefix: Interned<String>,
|
||||||
|
proximity: u8,
|
||||||
},
|
},
|
||||||
WordPrefixSwapped {
|
WordPrefixSwapped {
|
||||||
left_prefix: Interned<String>,
|
left_prefix: Interned<String>,
|
||||||
right: Interned<String>,
|
right: Interned<String>,
|
||||||
|
proximity: u8,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone, PartialEq, Eq, Hash)]
|
#[derive(Clone, PartialEq, Eq, Hash)]
|
||||||
pub enum ProximityCondition {
|
pub enum ProximityCondition {
|
||||||
Term { term: Interned<QueryTerm> },
|
Term { term: Interned<QueryTerm> },
|
||||||
Pairs { pairs: Box<[WordPair]>, proximity: u8 },
|
Pairs { pairs: Box<[WordPair]> },
|
||||||
}
|
}
|
||||||
|
|
||||||
pub enum ProximityGraph {}
|
pub enum ProximityGraph {}
|
||||||
@ -46,8 +49,8 @@ impl RankingRuleGraphTrait for ProximityGraph {
|
|||||||
ProximityCondition::Term { term } => {
|
ProximityCondition::Term { term } => {
|
||||||
format!("term {term}")
|
format!("term {term}")
|
||||||
}
|
}
|
||||||
ProximityCondition::Pairs { pairs, proximity } => {
|
ProximityCondition::Pairs { pairs } => {
|
||||||
format!("prox {proximity}, {} pairs", pairs.len())
|
format!("pairs {}", pairs.len())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user