mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-29 16:24:26 +01:00
Move MatchPosition impl to Match, adjust counting score for phrases
This commit is contained in:
parent
cab63abc84
commit
a2a16bf846
@ -117,30 +117,30 @@ pub struct Match {
|
|||||||
position: MatchPosition,
|
position: MatchPosition,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl MatchPosition {
|
impl Match {
|
||||||
fn get_first_word(m: &Match) -> usize {
|
fn get_first_word_pos(&self) -> usize {
|
||||||
match m.position {
|
match self.position {
|
||||||
MatchPosition::Word { word_position, .. } => word_position,
|
MatchPosition::Word { word_position, .. } => word_position,
|
||||||
MatchPosition::Phrase { word_positions: (fwp, _), .. } => fwp,
|
MatchPosition::Phrase { word_positions: (fwp, _), .. } => fwp,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_last_word(m: &Match) -> usize {
|
fn get_last_word_pos(&self) -> usize {
|
||||||
match m.position {
|
match self.position {
|
||||||
MatchPosition::Word { word_position, .. } => word_position,
|
MatchPosition::Word { word_position, .. } => word_position,
|
||||||
MatchPosition::Phrase { word_positions: (_, lwp), .. } => lwp,
|
MatchPosition::Phrase { word_positions: (_, lwp), .. } => lwp,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_first_token(m: &Match) -> usize {
|
fn get_first_token_pos(&self) -> usize {
|
||||||
match m.position {
|
match self.position {
|
||||||
MatchPosition::Word { token_position, .. } => token_position,
|
MatchPosition::Word { token_position, .. } => token_position,
|
||||||
MatchPosition::Phrase { token_positions: (ftp, _), .. } => ftp,
|
MatchPosition::Phrase { token_positions: (ftp, _), .. } => ftp,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn get_last_token(m: &Match) -> usize {
|
fn get_last_token_pos(&self) -> usize {
|
||||||
match m.position {
|
match self.position {
|
||||||
MatchPosition::Word { token_position, .. } => token_position,
|
MatchPosition::Word { token_position, .. } => token_position,
|
||||||
MatchPosition::Phrase { token_positions: (_, ltp), .. } => ltp,
|
MatchPosition::Phrase { token_positions: (_, ltp), .. } => ltp,
|
||||||
}
|
}
|
||||||
@ -272,7 +272,7 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
|
|||||||
Some((tokens, matches)) => matches
|
Some((tokens, matches)) => matches
|
||||||
.iter()
|
.iter()
|
||||||
.map(|m| MatchBounds {
|
.map(|m| MatchBounds {
|
||||||
start: tokens[MatchPosition::get_first_token(m)].byte_start,
|
start: tokens[m.get_first_token_pos()].byte_start,
|
||||||
length: m.match_len,
|
length: m.match_len,
|
||||||
})
|
})
|
||||||
.collect(),
|
.collect(),
|
||||||
@ -288,13 +288,11 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
|
|||||||
) -> (usize, usize) {
|
) -> (usize, usize) {
|
||||||
// if there is no match, we start from the beginning of the string by default.
|
// if there is no match, we start from the beginning of the string by default.
|
||||||
let first_match_word_position =
|
let first_match_word_position =
|
||||||
matches.first().map(|m| MatchPosition::get_first_word(m)).unwrap_or(0);
|
matches.first().map(|m| m.get_first_word_pos()).unwrap_or(0);
|
||||||
let first_match_token_position =
|
let first_match_token_position =
|
||||||
matches.first().map(|m| MatchPosition::get_first_token(m)).unwrap_or(0);
|
matches.first().map(|m| m.get_first_token_pos()).unwrap_or(0);
|
||||||
let last_match_word_position =
|
let last_match_word_position = matches.last().map(|m| m.get_last_word_pos()).unwrap_or(0);
|
||||||
matches.last().map(|m| MatchPosition::get_last_word(m)).unwrap_or(0);
|
let last_match_token_position = matches.last().map(|m| m.get_last_token_pos()).unwrap_or(0);
|
||||||
let last_match_token_position =
|
|
||||||
matches.last().map(|m| MatchPosition::get_last_token(m)).unwrap_or(0);
|
|
||||||
|
|
||||||
// matches needs to be counted in the crop len.
|
// matches needs to be counted in the crop len.
|
||||||
let mut remaining_words = crop_size + first_match_word_position - last_match_word_position;
|
let mut remaining_words = crop_size + first_match_word_position - last_match_word_position;
|
||||||
@ -389,6 +387,16 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
|
|||||||
let mut order_score = 0;
|
let mut order_score = 0;
|
||||||
let mut distance_score = 0;
|
let mut distance_score = 0;
|
||||||
|
|
||||||
|
// Count score for phrases
|
||||||
|
let tally_phrase_scores =
|
||||||
|
|fwp: &usize, lwp: &usize, order_score: &mut i16, distance_score: &mut i16| {
|
||||||
|
let words_in_phrase_minus_one = (lwp - fwp) as i16;
|
||||||
|
// will always be ordered, so +1 for each space between words
|
||||||
|
*order_score += words_in_phrase_minus_one;
|
||||||
|
// distance will always be 1, so -1 for each space between words
|
||||||
|
*distance_score -= words_in_phrase_minus_one;
|
||||||
|
};
|
||||||
|
|
||||||
let mut iter = matches.iter().peekable();
|
let mut iter = matches.iter().peekable();
|
||||||
while let Some(m) = iter.next() {
|
while let Some(m) = iter.next() {
|
||||||
if let Some(next_match) = iter.peek() {
|
if let Some(next_match) = iter.peek() {
|
||||||
@ -397,12 +405,24 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
|
|||||||
order_score += 1;
|
order_score += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
let next_match_first_word_pos = MatchPosition::get_first_word(next_match);
|
let m_last_word_pos = match m.position {
|
||||||
let current_match_first_word_pos = MatchPosition::get_first_word(m);
|
MatchPosition::Word { word_position, .. } => word_position,
|
||||||
|
MatchPosition::Phrase { word_positions: (fwp, lwp), .. } => {
|
||||||
|
tally_phrase_scores(&fwp, &lwp, &mut order_score, &mut distance_score);
|
||||||
|
lwp
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let next_match_first_word_pos = match next_match.position {
|
||||||
|
MatchPosition::Word { word_position, .. } => word_position,
|
||||||
|
MatchPosition::Phrase { word_positions: (fwp, _), .. } => fwp,
|
||||||
|
};
|
||||||
|
|
||||||
// compute distance between matches
|
// compute distance between matches
|
||||||
distance_score -=
|
distance_score -= (next_match_first_word_pos - m_last_word_pos).min(7) as i16;
|
||||||
(next_match_first_word_pos - current_match_first_word_pos).min(7) as i16;
|
} else if let MatchPosition::Phrase { word_positions: (fwp, lwp), .. } = m.position {
|
||||||
|
// in case last match is a phrase, count score for its words
|
||||||
|
tally_phrase_scores(&fwp, &lwp, &mut order_score, &mut distance_score);
|
||||||
}
|
}
|
||||||
|
|
||||||
ids.extend(m.ids.iter());
|
ids.extend(m.ids.iter());
|
||||||
@ -430,9 +450,9 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
|
|||||||
// if next match would make interval gross more than crop_size,
|
// if next match would make interval gross more than crop_size,
|
||||||
// we compare the current interval with the best one,
|
// we compare the current interval with the best one,
|
||||||
// then we increase `interval_first` until next match can be added.
|
// then we increase `interval_first` until next match can be added.
|
||||||
let next_match_word_pos = MatchPosition::get_first_word(next_match);
|
let next_match_word_pos = next_match.get_last_word_pos();
|
||||||
let mut interval_first_match_word_pos =
|
let mut interval_first_match_word_pos =
|
||||||
MatchPosition::get_last_word(&matches[interval_first]);
|
matches[interval_first].get_first_word_pos();
|
||||||
|
|
||||||
if next_match_word_pos - interval_first_match_word_pos >= crop_size {
|
if next_match_word_pos - interval_first_match_word_pos >= crop_size {
|
||||||
let interval_score =
|
let interval_score =
|
||||||
@ -448,7 +468,7 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
|
|||||||
loop {
|
loop {
|
||||||
interval_first += 1;
|
interval_first += 1;
|
||||||
interval_first_match_word_pos =
|
interval_first_match_word_pos =
|
||||||
MatchPosition::get_last_word(&matches[interval_first]);
|
matches[interval_first].get_first_word_pos();
|
||||||
|
|
||||||
if next_match_word_pos - interval_first_match_word_pos < crop_size {
|
if next_match_word_pos - interval_first_match_word_pos < crop_size {
|
||||||
break;
|
break;
|
||||||
|
Loading…
Reference in New Issue
Block a user