mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-22 21:04:27 +01:00
Refactor
This commit is contained in:
parent
6d16230f17
commit
d9e4db9983
@ -166,19 +166,12 @@ enum SimpleTokenKind {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl SimpleTokenKind {
|
impl SimpleTokenKind {
|
||||||
fn get(token: &&Token<'_>) -> Self {
|
fn new(token: &&Token<'_>) -> Self {
|
||||||
match token.kind {
|
match token.kind {
|
||||||
TokenKind::Separator(separaor_kind) => Self::Separator(separaor_kind),
|
TokenKind::Separator(separaor_kind) => Self::Separator(separaor_kind),
|
||||||
_ => Self::NotSeparator,
|
_ => Self::NotSeparator,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn is_not_separator(&self) -> bool {
|
|
||||||
match self {
|
|
||||||
SimpleTokenKind::NotSeparator => true,
|
|
||||||
SimpleTokenKind::Separator(_) => false,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(PartialEq, PartialOrd)]
|
#[derive(PartialEq, PartialOrd)]
|
||||||
@ -259,9 +252,12 @@ impl MatchIntervalWithScore {
|
|||||||
|
|
||||||
// positions of the first and the last match of the best matches interval in `matches`.
|
// positions of the first and the last match of the best matches interval in `matches`.
|
||||||
let mut best_interval: Option<Self> = None;
|
let mut best_interval: Option<Self> = None;
|
||||||
let mut save_best_interval = |interval_first, interval_last, interval_score| {
|
|
||||||
|
let mut save_best_interval = |interval_first, interval_last| {
|
||||||
|
let interval_score = MatchIntervalScore::new(&matches[interval_first..=interval_last]);
|
||||||
let is_interval_score_better =
|
let is_interval_score_better =
|
||||||
&best_interval.as_ref().map_or(true, |Self { score, .. }| interval_score > *score);
|
&best_interval.as_ref().map_or(true, |Self { score, .. }| interval_score > *score);
|
||||||
|
|
||||||
if *is_interval_score_better {
|
if *is_interval_score_better {
|
||||||
best_interval =
|
best_interval =
|
||||||
Some(Self { interval: (interval_first, interval_last), score: interval_score });
|
Some(Self { interval: (interval_first, interval_last), score: interval_score });
|
||||||
@ -286,11 +282,8 @@ impl MatchIntervalWithScore {
|
|||||||
// if index is 0 there is no last viable match
|
// if index is 0 there is no last viable match
|
||||||
if index != 0 {
|
if index != 0 {
|
||||||
let interval_last = index - 1;
|
let interval_last = index - 1;
|
||||||
let interval_score =
|
|
||||||
MatchIntervalScore::new(&matches[interval_first..=interval_last]);
|
|
||||||
|
|
||||||
// keep interval if it's the best
|
// keep interval if it's the best
|
||||||
save_best_interval(interval_first, interval_last, interval_score);
|
save_best_interval(interval_first, interval_last);
|
||||||
}
|
}
|
||||||
|
|
||||||
// advance start of the interval while interval is longer than crop_size.
|
// advance start of the interval while interval is longer than crop_size.
|
||||||
@ -314,8 +307,7 @@ impl MatchIntervalWithScore {
|
|||||||
// if it's the last match with itself, we need to make sure it's
|
// if it's the last match with itself, we need to make sure it's
|
||||||
// not a phrase longer than the crop window
|
// not a phrase longer than the crop window
|
||||||
if interval_first != interval_last || matches[interval_first].get_word_count() < crop_size {
|
if interval_first != interval_last || matches[interval_first].get_word_count() < crop_size {
|
||||||
let interval_score = MatchIntervalScore::new(&matches[interval_first..=interval_last]);
|
save_best_interval(interval_first, interval_last);
|
||||||
save_best_interval(interval_first, interval_last, interval_score);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// if none of the matches fit the criteria above, default to the first one
|
// if none of the matches fit the criteria above, default to the first one
|
||||||
@ -359,6 +351,7 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
|
|||||||
Some(MatchType::Full { ids, .. }) => {
|
Some(MatchType::Full { ids, .. }) => {
|
||||||
// save the token that closes the partial match as a match.
|
// save the token that closes the partial match as a match.
|
||||||
matches.push(Match {
|
matches.push(Match {
|
||||||
|
// @TODO: Shouldn't this be +1?
|
||||||
match_len: word.char_end - *first_word_char_start,
|
match_len: word.char_end - *first_word_char_start,
|
||||||
ids: ids.clone().collect(),
|
ids: ids.clone().collect(),
|
||||||
position: MatchPosition::Phrase {
|
position: MatchPosition::Phrase {
|
||||||
@ -484,8 +477,8 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
|
|||||||
// grows the crop window peeking in both directions
|
// grows the crop window peeking in both directions
|
||||||
// until the window contains the good number of words:
|
// until the window contains the good number of words:
|
||||||
while remaining_words > 0 {
|
while remaining_words > 0 {
|
||||||
let before_token_kind = before_tokens.peek().map(SimpleTokenKind::get);
|
let before_token_kind = before_tokens.peek().map(SimpleTokenKind::new);
|
||||||
let after_token_kind = after_tokens.peek().map(SimpleTokenKind::get);
|
let after_token_kind = after_tokens.peek().map(SimpleTokenKind::new);
|
||||||
|
|
||||||
match (before_token_kind, after_token_kind) {
|
match (before_token_kind, after_token_kind) {
|
||||||
// we can expand both sides.
|
// we can expand both sides.
|
||||||
@ -504,7 +497,8 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
|
|||||||
if remaining_words > 1 {
|
if remaining_words > 1 {
|
||||||
after_tokens.next();
|
after_tokens.next();
|
||||||
}
|
}
|
||||||
} else if let SeparatorKind::Hard = before_token_separator_kind {
|
} else if matches!(before_token_separator_kind, SeparatorKind::Hard)
|
||||||
|
{
|
||||||
after_tokens.next();
|
after_tokens.next();
|
||||||
} else {
|
} else {
|
||||||
before_tokens.next();
|
before_tokens.next();
|
||||||
@ -536,14 +530,14 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
|
|||||||
// the end of the text is reached, advance left.
|
// the end of the text is reached, advance left.
|
||||||
(Some(before_token_kind), None) => {
|
(Some(before_token_kind), None) => {
|
||||||
before_tokens.next();
|
before_tokens.next();
|
||||||
if let SimpleTokenKind::NotSeparator = before_token_kind {
|
if matches!(before_token_kind, SimpleTokenKind::NotSeparator) {
|
||||||
remaining_words -= 1;
|
remaining_words -= 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// the start of the text is reached, advance right.
|
// the start of the text is reached, advance right.
|
||||||
(None, Some(after_token_kind)) => {
|
(None, Some(after_token_kind)) => {
|
||||||
after_tokens.next();
|
after_tokens.next();
|
||||||
if let SimpleTokenKind::NotSeparator = after_token_kind {
|
if matches!(after_token_kind, SimpleTokenKind::NotSeparator) {
|
||||||
remaining_words -= 1;
|
remaining_words -= 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -566,9 +560,9 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
|
|||||||
while remaining_extra_words > 0 {
|
while remaining_extra_words > 0 {
|
||||||
let token_from_end_kind = tokens_from_end
|
let token_from_end_kind = tokens_from_end
|
||||||
.peek()
|
.peek()
|
||||||
.map(SimpleTokenKind::get)
|
.map(SimpleTokenKind::new)
|
||||||
.expect("Expected iterator to not reach end");
|
.expect("Expected iterator to not reach end");
|
||||||
if token_from_end_kind.is_not_separator() {
|
if matches!(token_from_end_kind, SimpleTokenKind::NotSeparator) {
|
||||||
remaining_extra_words -= 1;
|
remaining_extra_words -= 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user