From 73198179f1d0cca0dca93aa4ee6e8a56cf6da1c7 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 29 May 2023 10:08:27 +0200 Subject: [PATCH 1/2] Consistently use wrapping add to avoid overflow in debug when query starts with a separator --- milli/src/search/new/query_term/parse_query.rs | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/milli/src/search/new/query_term/parse_query.rs b/milli/src/search/new/query_term/parse_query.rs index bf90748e4..0120772be 100644 --- a/milli/src/search/new/query_term/parse_query.rs +++ b/milli/src/search/new/query_term/parse_query.rs @@ -77,13 +77,9 @@ pub fn located_query_terms_from_tokens( } } TokenKind::Separator(separator_kind) => { - match separator_kind { - SeparatorKind::Hard => { - position += 1; - } - SeparatorKind::Soft => { - position += 0; - } + // add penalty for hard separators + if let SeparatorKind::Hard = separator_kind { + position = position.wrapping_add(1); } phrase = 'phrase: { From 1dfc4038abccf082f4961ac91c5faf77f546f671 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 29 May 2023 11:58:26 +0200 Subject: [PATCH 2/2] Add test that fails before PR and passes now --- .../src/search/new/query_term/parse_query.rs | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/milli/src/search/new/query_term/parse_query.rs b/milli/src/search/new/query_term/parse_query.rs index 0120772be..69c2cd9c9 100644 --- a/milli/src/search/new/query_term/parse_query.rs +++ b/milli/src/search/new/query_term/parse_query.rs @@ -284,3 +284,36 @@ impl PhraseBuilder { }) } } + +#[cfg(test)] +mod tests { + use charabia::TokenizerBuilder; + + use super::*; + use crate::index::tests::TempIndex; + + fn temp_index_with_documents() -> TempIndex { + let temp_index = TempIndex::new(); + temp_index + .add_documents(documents!([ + { "id": 1, "name": "split this world westfali westfalia the Ŵôřlḑôle" }, + { "id": 2, "name": "Westfália" }, + { "id": 3, "name": "Ŵôřlḑôle" }, + ])) + .unwrap(); + temp_index + } + + #[test] + fn start_with_hard_separator() -> Result<()> { + let tokenizer = TokenizerBuilder::new().build(); + let tokens = tokenizer.tokenize("."); + let index = temp_index_with_documents(); + let rtxn = index.read_txn()?; + let mut ctx = SearchContext::new(&index, &rtxn); + // panics with `attempt to add with overflow` before + let located_query_terms = located_query_terms_from_tokens(&mut ctx, tokens, None)?; + assert!(located_query_terms.is_empty()); + Ok(()) + } +}