This commit is contained in:
F. Levi 2024-10-01 17:50:59 +03:00
parent 6d16230f17
commit d9e4db9983

View File

@ -166,19 +166,12 @@ enum SimpleTokenKind {
} }
impl SimpleTokenKind { impl SimpleTokenKind {
fn get(token: &&Token<'_>) -> Self { fn new(token: &&Token<'_>) -> Self {
match token.kind { match token.kind {
TokenKind::Separator(separaor_kind) => Self::Separator(separaor_kind), TokenKind::Separator(separaor_kind) => Self::Separator(separaor_kind),
_ => Self::NotSeparator, _ => Self::NotSeparator,
} }
} }
fn is_not_separator(&self) -> bool {
match self {
SimpleTokenKind::NotSeparator => true,
SimpleTokenKind::Separator(_) => false,
}
}
} }
#[derive(PartialEq, PartialOrd)] #[derive(PartialEq, PartialOrd)]
@ -259,9 +252,12 @@ impl MatchIntervalWithScore {
// positions of the first and the last match of the best matches interval in `matches`. // positions of the first and the last match of the best matches interval in `matches`.
let mut best_interval: Option<Self> = None; let mut best_interval: Option<Self> = None;
let mut save_best_interval = |interval_first, interval_last, interval_score| {
let mut save_best_interval = |interval_first, interval_last| {
let interval_score = MatchIntervalScore::new(&matches[interval_first..=interval_last]);
let is_interval_score_better = let is_interval_score_better =
&best_interval.as_ref().map_or(true, |Self { score, .. }| interval_score > *score); &best_interval.as_ref().map_or(true, |Self { score, .. }| interval_score > *score);
if *is_interval_score_better { if *is_interval_score_better {
best_interval = best_interval =
Some(Self { interval: (interval_first, interval_last), score: interval_score }); Some(Self { interval: (interval_first, interval_last), score: interval_score });
@ -286,11 +282,8 @@ impl MatchIntervalWithScore {
// if index is 0 there is no last viable match // if index is 0 there is no last viable match
if index != 0 { if index != 0 {
let interval_last = index - 1; let interval_last = index - 1;
let interval_score =
MatchIntervalScore::new(&matches[interval_first..=interval_last]);
// keep interval if it's the best // keep interval if it's the best
save_best_interval(interval_first, interval_last, interval_score); save_best_interval(interval_first, interval_last);
} }
// advance start of the interval while interval is longer than crop_size. // advance start of the interval while interval is longer than crop_size.
@ -314,8 +307,7 @@ impl MatchIntervalWithScore {
// if it's the last match with itself, we need to make sure it's // if it's the last match with itself, we need to make sure it's
// not a phrase longer than the crop window // not a phrase longer than the crop window
if interval_first != interval_last || matches[interval_first].get_word_count() < crop_size { if interval_first != interval_last || matches[interval_first].get_word_count() < crop_size {
let interval_score = MatchIntervalScore::new(&matches[interval_first..=interval_last]); save_best_interval(interval_first, interval_last);
save_best_interval(interval_first, interval_last, interval_score);
} }
// if none of the matches fit the criteria above, default to the first one // if none of the matches fit the criteria above, default to the first one
@ -359,6 +351,7 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
Some(MatchType::Full { ids, .. }) => { Some(MatchType::Full { ids, .. }) => {
// save the token that closes the partial match as a match. // save the token that closes the partial match as a match.
matches.push(Match { matches.push(Match {
// @TODO: Shouldn't this be +1?
match_len: word.char_end - *first_word_char_start, match_len: word.char_end - *first_word_char_start,
ids: ids.clone().collect(), ids: ids.clone().collect(),
position: MatchPosition::Phrase { position: MatchPosition::Phrase {
@ -484,8 +477,8 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
// grows the crop window peeking in both directions // grows the crop window peeking in both directions
// until the window contains the good number of words: // until the window contains the good number of words:
while remaining_words > 0 { while remaining_words > 0 {
let before_token_kind = before_tokens.peek().map(SimpleTokenKind::get); let before_token_kind = before_tokens.peek().map(SimpleTokenKind::new);
let after_token_kind = after_tokens.peek().map(SimpleTokenKind::get); let after_token_kind = after_tokens.peek().map(SimpleTokenKind::new);
match (before_token_kind, after_token_kind) { match (before_token_kind, after_token_kind) {
// we can expand both sides. // we can expand both sides.
@ -504,7 +497,8 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
if remaining_words > 1 { if remaining_words > 1 {
after_tokens.next(); after_tokens.next();
} }
} else if let SeparatorKind::Hard = before_token_separator_kind { } else if matches!(before_token_separator_kind, SeparatorKind::Hard)
{
after_tokens.next(); after_tokens.next();
} else { } else {
before_tokens.next(); before_tokens.next();
@ -536,14 +530,14 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
// the end of the text is reached, advance left. // the end of the text is reached, advance left.
(Some(before_token_kind), None) => { (Some(before_token_kind), None) => {
before_tokens.next(); before_tokens.next();
if let SimpleTokenKind::NotSeparator = before_token_kind { if matches!(before_token_kind, SimpleTokenKind::NotSeparator) {
remaining_words -= 1; remaining_words -= 1;
} }
} }
// the start of the text is reached, advance right. // the start of the text is reached, advance right.
(None, Some(after_token_kind)) => { (None, Some(after_token_kind)) => {
after_tokens.next(); after_tokens.next();
if let SimpleTokenKind::NotSeparator = after_token_kind { if matches!(after_token_kind, SimpleTokenKind::NotSeparator) {
remaining_words -= 1; remaining_words -= 1;
} }
} }
@ -566,9 +560,9 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
while remaining_extra_words > 0 { while remaining_extra_words > 0 {
let token_from_end_kind = tokens_from_end let token_from_end_kind = tokens_from_end
.peek() .peek()
.map(SimpleTokenKind::get) .map(SimpleTokenKind::new)
.expect("Expected iterator to not reach end"); .expect("Expected iterator to not reach end");
if token_from_end_kind.is_not_separator() { if matches!(token_from_end_kind, SimpleTokenKind::NotSeparator) {
remaining_extra_words -= 1; remaining_extra_words -= 1;
} }