mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 14:54:27 +01:00
Make token_crop_bounds more rust idiomatic
This commit is contained in:
parent
c8ed1675a7
commit
a769e09dfa
@ -230,7 +230,7 @@ impl<'t> Matcher<'t, '_> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns token position of the window to crop around.
|
/// Returns the bounds in byte index of the crop window.
|
||||||
fn token_crop_bounds(&self, matches: &[Match]) -> (usize, usize) {
|
fn token_crop_bounds(&self, matches: &[Match]) -> (usize, usize) {
|
||||||
// if there is no match, we start from the beginning of the string by default.
|
// if there is no match, we start from the beginning of the string by default.
|
||||||
let first_match_word_position = matches.first().map(|m| m.word_position).unwrap_or(0);
|
let first_match_word_position = matches.first().map(|m| m.word_position).unwrap_or(0);
|
||||||
@ -241,70 +241,64 @@ impl<'t> Matcher<'t, '_> {
|
|||||||
// matches needs to be counted in the crop len.
|
// matches needs to be counted in the crop len.
|
||||||
let mut remaining_words =
|
let mut remaining_words =
|
||||||
self.crop_size + first_match_word_position - last_match_word_position;
|
self.crop_size + first_match_word_position - last_match_word_position;
|
||||||
// if first token is a word, then remove 1 to remaining_words.
|
|
||||||
if let Some(None) = self.tokens.get(first_match_token_position).map(|t| t.is_separator()) {
|
|
||||||
remaining_words -= 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
// we start from matches positions, then we expand the window in both sides.
|
let mut before_tokens = self.tokens[..first_match_token_position].iter().rev().peekable();
|
||||||
let mut first_token_position = first_match_token_position;
|
let mut after_tokens = self.tokens[last_match_token_position..].iter().peekable();
|
||||||
let mut last_token_position = last_match_token_position;
|
|
||||||
while remaining_words > 0 {
|
while remaining_words > 0 {
|
||||||
match (
|
let before_token = before_tokens.peek().map(|t| t.is_separator());
|
||||||
// try to expand left
|
let after_token = after_tokens.peek().map(|t| t.is_separator());
|
||||||
first_token_position.checked_sub(1).and_then(|i| self.tokens.get(i)),
|
|
||||||
// try to expand right
|
match (before_token, after_token) {
|
||||||
last_token_position.checked_add(1).and_then(|i| self.tokens.get(i)),
|
|
||||||
) {
|
|
||||||
// we can expand both sides.
|
// we can expand both sides.
|
||||||
(Some(ft), Some(lt)) => {
|
(Some(before_token), Some(after_token)) => {
|
||||||
match (ft.is_separator(), lt.is_separator()) {
|
match (before_token, after_token) {
|
||||||
// if they are both separators and are the same kind then advance both,
|
// if they are both separators and are the same kind then advance both,
|
||||||
// or expand in the soft separator separator side.
|
// or expand in the soft separator separator side.
|
||||||
(Some(f_kind), Some(s_kind)) => {
|
(Some(before_token_kind), Some(after_token_kind)) => {
|
||||||
if f_kind == s_kind {
|
if before_token_kind == after_token_kind {
|
||||||
first_token_position -= 1;
|
before_tokens.next();
|
||||||
last_token_position += 1;
|
after_tokens.next();
|
||||||
} else if f_kind == SeparatorKind::Hard {
|
} else if before_token_kind == SeparatorKind::Hard {
|
||||||
last_token_position += 1;
|
after_tokens.next();
|
||||||
} else {
|
} else {
|
||||||
first_token_position -= 1;
|
before_tokens.next();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// if one of the tokens is a word, we expend in the side of the word.
|
// if one of the tokens is a word, we expend in the side of the word.
|
||||||
// left is a word, advance left.
|
// left is a word, advance left.
|
||||||
(None, Some(_)) => {
|
(None, Some(_)) => {
|
||||||
first_token_position -= 1;
|
before_tokens.next();
|
||||||
remaining_words -= 1;
|
remaining_words -= 1;
|
||||||
}
|
}
|
||||||
// right is a word, advance right.
|
// right is a word, advance right.
|
||||||
(Some(_), None) => {
|
(Some(_), None) => {
|
||||||
last_token_position += 1;
|
after_tokens.next();
|
||||||
remaining_words -= 1;
|
remaining_words -= 1;
|
||||||
}
|
}
|
||||||
// both are words, advance left then right if remaining_word > 0.
|
// both are words, advance left then right if remaining_word > 0.
|
||||||
(None, None) => {
|
(None, None) => {
|
||||||
first_token_position -= 1;
|
before_tokens.next();
|
||||||
remaining_words -= 1;
|
remaining_words -= 1;
|
||||||
|
|
||||||
if remaining_words > 0 {
|
if remaining_words > 0 {
|
||||||
last_token_position += 1;
|
after_tokens.next();
|
||||||
remaining_words -= 1;
|
remaining_words -= 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// the end of the text is reached, advance left.
|
// the end of the text is reached, advance left.
|
||||||
(Some(ft), None) => {
|
(Some(before_token), None) => {
|
||||||
first_token_position -= 1;
|
before_tokens.next();
|
||||||
if ft.is_separator().is_none() {
|
if before_token.is_none() {
|
||||||
remaining_words -= 1;
|
remaining_words -= 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// the start of the text is reached, advance right.
|
// the start of the text is reached, advance right.
|
||||||
(None, Some(lt)) => {
|
(None, Some(after_token)) => {
|
||||||
last_token_position += 1;
|
after_tokens.next();
|
||||||
if lt.is_separator().is_none() {
|
if after_token.is_none() {
|
||||||
remaining_words -= 1;
|
remaining_words -= 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -313,7 +307,10 @@ impl<'t> Matcher<'t, '_> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
(first_token_position, last_token_position)
|
let crop_byte_start = before_tokens.next().map_or(0, |t| t.byte_end);
|
||||||
|
let crop_byte_end = after_tokens.next().map_or(self.text.len(), |t| t.byte_start);
|
||||||
|
|
||||||
|
(crop_byte_start, crop_byte_end)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Compute the score of a match interval:
|
/// Compute the score of a match interval:
|
||||||
@ -401,11 +398,7 @@ impl<'t> Matcher<'t, '_> {
|
|||||||
fn crop_bounds(&self, matches: &[Match]) -> (usize, usize) {
|
fn crop_bounds(&self, matches: &[Match]) -> (usize, usize) {
|
||||||
let match_interval = self.find_best_match_interval(matches);
|
let match_interval = self.find_best_match_interval(matches);
|
||||||
|
|
||||||
let (first_token_position, last_token_position) = self.token_crop_bounds(match_interval);
|
self.token_crop_bounds(match_interval)
|
||||||
|
|
||||||
let byte_start = self.tokens.get(first_token_position).map_or(0, |t| t.byte_start);
|
|
||||||
let byte_end = self.tokens.get(last_token_position).map_or(byte_start, |t| t.byte_end);
|
|
||||||
(byte_start, byte_end)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns the formatted version of the original text.
|
// Returns the formatted version of the original text.
|
||||||
|
Loading…
Reference in New Issue
Block a user