3786: Consistently use wrapping add to avoid overflow in debug when query s… r=dureuill a=dureuill

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/3785

## What does this PR do?
- Some of the code paths would erroneously use the default addition operator that has the semantics that "overflow is an error, checked at runtime in debug" instead of the intended "overflow is expected" semantics that this code use (this code is using `u16::MAX` as a sentinel). This PR makes it so the wrapping add operator is used everywhere.

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
This commit is contained in:
meili-bors[bot] 2023-05-29 12:39:54 +00:00 committed by GitHub
commit 0a7817a002
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -77,13 +77,9 @@ pub fn located_query_terms_from_tokens(
} }
} }
TokenKind::Separator(separator_kind) => { TokenKind::Separator(separator_kind) => {
match separator_kind { // add penalty for hard separators
SeparatorKind::Hard => { if let SeparatorKind::Hard = separator_kind {
position += 1; position = position.wrapping_add(1);
}
SeparatorKind::Soft => {
position += 0;
}
} }
phrase = 'phrase: { phrase = 'phrase: {
@ -288,3 +284,36 @@ impl PhraseBuilder {
}) })
} }
} }
#[cfg(test)]
mod tests {
use charabia::TokenizerBuilder;
use super::*;
use crate::index::tests::TempIndex;
fn temp_index_with_documents() -> TempIndex {
let temp_index = TempIndex::new();
temp_index
.add_documents(documents!([
{ "id": 1, "name": "split this world westfali westfalia the Ŵôřlḑôle" },
{ "id": 2, "name": "Westfália" },
{ "id": 3, "name": "Ŵôřlḑôle" },
]))
.unwrap();
temp_index
}
#[test]
fn start_with_hard_separator() -> Result<()> {
let tokenizer = TokenizerBuilder::new().build();
let tokens = tokenizer.tokenize(".");
let index = temp_index_with_documents();
let rtxn = index.read_txn()?;
let mut ctx = SearchContext::new(&index, &rtxn);
// panics with `attempt to add with overflow` before <https://github.com/meilisearch/meilisearch/issues/3785>
let located_query_terms = located_query_terms_from_tokens(&mut ctx, tokens, None)?;
assert!(located_query_terms.is_empty());
Ok(())
}
}