From 959ca66125c9f70c20e6f54b44c49ff51a3014ff Mon Sep 17 00:00:00 2001 From: Irevoire Date: Mon, 8 Nov 2021 15:30:26 +0100 Subject: [PATCH] improve the error diagnostic when parsing values --- filter_parser/src/error.rs | 4 +++ filter_parser/src/lib.rs | 1 + filter_parser/src/value.rs | 59 ++++++++++++++++++++++++++++++++------ 3 files changed, 56 insertions(+), 8 deletions(-) diff --git a/filter_parser/src/error.rs b/filter_parser/src/error.rs index a1bbac47a..d52b17200 100644 --- a/filter_parser/src/error.rs +++ b/filter_parser/src/error.rs @@ -67,6 +67,10 @@ impl<'a> Error<'a> { &self.kind } + pub fn context(&self) -> &Span<'a> { + &self.context + } + pub fn new_from_kind(context: Span<'a>, kind: ErrorKind<'a>) -> Self { Self { context, kind } } diff --git a/filter_parser/src/lib.rs b/filter_parser/src/lib.rs index a1d66819f..7db80888b 100644 --- a/filter_parser/src/lib.rs +++ b/filter_parser/src/lib.rs @@ -551,6 +551,7 @@ pub mod tests { ("channel = Ponce = 12", "Found unexpected characters at the end of the filter: `= 12`. You probably forgot an `OR` or an `AND` rule."), ("channel = ", "Was expecting a value but instead got nothing."), ("channel = 🐻", "Was expecting a value but instead got `🐻`."), + ("channel = 🐻 AND followers < 100", "Was expecting a value but instead got `🐻`."), ("OR", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` at `OR`."), ("AND", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` at `AND`."), ("channel Ponce", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` at `channel Ponce`."), diff --git a/filter_parser/src/value.rs b/filter_parser/src/value.rs index 79fc00acd..4c769fe5f 100644 --- a/filter_parser/src/value.rs +++ b/filter_parser/src/value.rs @@ -9,7 +9,10 @@ use crate::{parse_geo_point, parse_geo_radius, Error, ErrorKind, IResult, Span, /// value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS* pub fn parse_value(input: Span) -> IResult { - // before anything we want to check if the user is misusing a geo expression + // to get better diagnostic message we are going to strip the left whitespaces from the input right now + let (input, _) = take_while(char::is_whitespace)(input)?; + + // then, we want to check if the user is misusing a geo expression let err = parse_geo_point(input).unwrap_err(); if err.is_failure() { return Err(err); @@ -29,23 +32,30 @@ pub fn parse_value(input: Span) -> IResult { // doubleQuoted = "\"" (word | spaces)* "\"" let double_quoted = |input| take_till(|c: char| c == '"')(input); // word = (alphanumeric | _ | - | .)+ - let word = |input| take_while1(is_key_component)(input); + let word = take_while1(is_key_component); + // this parser is only used when an error is encountered and it parse the + // largest string possible that do not contain any β€œlanguage” syntax. + // If we try to parse `name = πŸ¦€ AND language = rust` we want to return an + // error saying we could not parse `πŸ¦€`. Not that no value were found or that + // we could note parse `πŸ¦€ AND language = rust`. // we want to remove the space before entering the alt because if we don't, // when we create the errors from the output of the alt we have spaces everywhere - let (input, _) = take_while(char::is_whitespace)(input)?; + let error_word = take_till::<_, _, Error>(is_syntax_component); terminated( alt(( - delimited(char('\''), simple_quoted, cut(char('\''))), - delimited(char('"'), double_quoted, cut(char('"'))), + delimited(char('\''), cut(simple_quoted), cut(char('\''))), + delimited(char('"'), cut(double_quoted), cut(char('"'))), word, )), multispace0, )(input) .map(|(s, t)| (s, t.into())) - // if we found nothing in the alt it means the user did not input any value - .map_err(|e| e.map_err(|_| Error::new_from_kind(input, ErrorKind::ExpectedValue))) + // if we found nothing in the alt it means the user specified something that was not recognized as a value + .map_err(|e: nom::Err| { + e.map_err(|_| Error::new_from_kind(error_word(input).unwrap().1, ErrorKind::ExpectedValue)) + }) // if we found encountered a failure it means the user really tried to input a value, but had an unmatched quote .map_err(|e| { e.map_fail(|c| Error::new_from_kind(input, ErrorKind::MissingClosingDelimiter(c.char()))) @@ -56,8 +66,14 @@ fn is_key_component(c: char) -> bool { c.is_alphanumeric() || ['_', '-', '.'].contains(&c) } +fn is_syntax_component(c: char) -> bool { + c.is_whitespace() || ['(', ')', '=', '<', '>', '!'].contains(&c) +} + #[cfg(test)] -pub mod tests { +pub mod test { + use nom::Finish; + use super::*; use crate::tests::rtok; @@ -82,6 +98,7 @@ pub mod tests { ("\" some spaces \"", rtok("\"", " some spaces ")), ("\"cha'nnel\"", rtok("'", "cha'nnel")), ("\"cha'nnel\"", rtok("'", "cha'nnel")), + ("I'm tamo", rtok("'m tamo", "I")), ]; for (input, expected) in test_case { @@ -98,4 +115,30 @@ pub mod tests { assert_eq!(value, expected, "Filter `{}` failed.", input); } } + + #[test] + fn diagnostic() { + let test_case = [ + ("πŸ¦€", "πŸ¦€"), + (" πŸ¦€", "πŸ¦€"), + ("πŸ¦€ AND crab = truc", "πŸ¦€"), + ("πŸ¦€_in_name", "πŸ¦€_in_name"), + (" (name = ...", ""), + ]; + + for (input, expected) in test_case { + let input = Span::new_extra(input, input); + let result = parse_value(input); + + assert!( + result.is_err(), + "Filter `{}` wasn’t supposed to be parsed but it did with the following result: `{:?}`", + expected, + result.unwrap() + ); + // get the inner string referenced in the error + let value = *result.finish().unwrap_err().context().fragment(); + assert_eq!(value, expected, "Filter `{}` was supposed to fail with the following value: `{}`, but it failed with: `{}`.", input, expected, value); + } + } }