Improve syntax errors for IN filter

This commit is contained in:
Loïc Lecrenier 2022-06-15 10:13:34 +02:00
parent 2fd20fadfc
commit 4ecfb95d0c
3 changed files with 83 additions and 10 deletions

View File

@ -57,6 +57,10 @@ pub enum ErrorKind<'a> {
ExpectedEof,
ExpectedValue,
MalformedValue,
InOpeningBracket,
InClosingBracket,
InExpectedValue,
ReservedKeyword(String),
MissingClosingDelimiter(char),
Char(char),
InternalError(error::ErrorKind),
@ -109,12 +113,11 @@ impl<'a> ParseError<Span<'a>> for Error<'a> {
impl<'a> Display for Error<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let input = self.context.fragment();
// When printing our error message we want to escape all `\n` to be sure we keep our format with the
// first line being the diagnostic and the second line being the incriminated filter.
let escaped_input = input.escape_debug();
match self.kind {
match &self.kind {
ErrorKind::ExpectedValue if input.trim().is_empty() => {
writeln!(f, "Was expecting a value but instead got nothing.")?
}
@ -145,6 +148,18 @@ impl<'a> Display for Error<'a> {
ErrorKind::MisusedGeo => {
writeln!(f, "The `_geoRadius` filter is an operation and can't be used as a value.")?
}
ErrorKind::ReservedKeyword(word) => {
writeln!(f, "`{word}` is a reserved keyword and thus cannot be used as a field name unless it is put inside quotes. Use \"{word}\" or \'{word}\' instead.")?
}
ErrorKind::InOpeningBracket => {
writeln!(f, "Expected `[` after `IN` keyword.")?
}
ErrorKind::InClosingBracket => {
writeln!(f, "Expected matching `]` after the list of field names given to `IN[`")?
}
ErrorKind::InExpectedValue => {
writeln!(f, "Expected only comma-separated field names inside `IN[..]` but instead found `{escaped_input}`")?
}
ErrorKind::Char(c) => {
panic!("Tried to display a char error with `{}`", c)
}

View File

@ -167,6 +167,12 @@ fn ws<'a, O>(inner: impl FnMut(Span<'a>) -> IResult<O>) -> impl FnMut(Span<'a>)
/// value_list = (value ("," value)* ","?)?
fn parse_value_list<'a>(input: Span<'a>) -> IResult<Vec<Token<'a>>> {
// TODO: here, I should return a failure with a clear explanation whenever possible
// for example:
// * expected the name of a field, but got `AND`
// * expected closing square bracket, but got `AND`
let (input, first_value) = opt(parse_value)(input)?;
if let Some(first_value) = first_value {
let value_list_el_parser = preceded(ws(tag(",")), parse_value);
@ -186,9 +192,22 @@ fn parse_in(input: Span) -> IResult<FilterCondition> {
let (input, value) = parse_value(input)?;
let (input, _) = ws(tag("IN"))(input)?;
let mut els_parser = delimited(tag("["), parse_value_list, tag("]"));
// everything after `IN` can be a failure
let (input, _) = cut_with_err(tag("["), |_| {
Error::new_from_kind(input, ErrorKind::InOpeningBracket)
})(input)?;
let (input, content) = cut(parse_value_list)(input)?;
// everything after `IN` can be a failure
let (input, _) = cut_with_err(tag("]"), |_| {
if eof::<_, ()>(input).is_ok() {
Error::new_from_kind(input, ErrorKind::InClosingBracket)
} else {
Error::new_from_kind(input, ErrorKind::InExpectedValue)
}
})(input)?;
let (input, content) = els_parser(input)?;
let filter = FilterCondition::In { fid: value, els: content };
Ok((input, filter))
}
@ -199,9 +218,19 @@ fn parse_not_in(input: Span) -> IResult<FilterCondition> {
let (input, _) = multispace1(input)?;
let (input, _) = ws(tag("IN"))(input)?;
let mut els_parser = delimited(tag("["), parse_value_list, tag("]"));
let (input, content) = els_parser(input)?;
// everything after `IN` can be a failure
let (input, _) = cut_with_err(tag("["), |_| {
Error::new_from_kind(input, ErrorKind::InOpeningBracket)
})(input)?;
let (input, content) = cut(parse_value_list)(input)?;
// everything after `IN` can be a failure
let (input, _) = cut_with_err(tag("]"), |_| {
Error::new_from_kind(input, ErrorKind::InClosingBracket)
})(input)?;
let filter = FilterCondition::Not(Box::new(FilterCondition::In { fid: value, els: content }));
Ok((input, filter))
}
@ -313,6 +342,9 @@ fn parse_primary(input: Span) -> IResult<FilterCondition> {
))(input)
// if the inner parsers did not match enough information to return an accurate error
.map_err(|e| e.map_err(|_| Error::new_from_kind(input, ErrorKind::InvalidPrimary)))
// TODO: if the filter starts with a reserved keyword that is not NOT, then we should return the reserved keyword error
// TODO: if the filter is x = reserved, idem
}
/// expression = or
@ -344,6 +376,13 @@ pub mod tests {
let test_case = [
// simple test
(
"x = AND",
Fc::Not(Box::new(Fc::Not(Box::new(Fc::In {
fid: rtok("NOT NOT", "colour"),
els: vec![]
}))))
),
(
"colour IN[]",
Fc::In {
@ -734,6 +773,11 @@ pub mod tests {
("subscribers 100 TO1000", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `subscribers 100 TO1000`."),
("channel = ponce ORdog != 'bernese mountain'", "Found unexpected characters at the end of the filter: `ORdog != \\'bernese mountain\\'`. You probably forgot an `OR` or an `AND` rule."),
("channel = ponce AND'dog' != 'bernese mountain'", "Found unexpected characters at the end of the filter: `AND\\'dog\\' != \\'bernese mountain\\'`. You probably forgot an `OR` or an `AND` rule."),
("colour IN blue, green]", "Expected `[` after `IN` keyword."),
("colour IN [blue, green, 'blue' > 2]", "Expected only comma-separated field names inside `IN[..]` but instead found `> 2]`"),
("colour IN [blue, green, AND]", "Expected only comma-separated field names inside `IN[..]` but instead found `AND]`"),
("colour IN [blue, green", "Expected matching `]` after the list of field names given to `IN[`"),
("colour IN ['blue, green", "Expression `\\'blue, green` is missing the following closing delimiter: `'`."),
];
for (input, expected) in test_case {

View File

@ -71,9 +71,17 @@ pub fn parse_value<'a>(input: Span<'a>) -> IResult<Token<'a>> {
_ => (),
}
// word = (alphanumeric | _ | - | .)+
// word = (alphanumeric | _ | - | .)+ except for reserved keywords
let word = |input: Span<'a>| -> IResult<Token<'a>> {
take_while1(is_value_component)(input).map(|(s, t)| (s, t.into()))
let (input, word): (_, Token<'a>) =
take_while1(is_value_component)(input).map(|(s, t)| (s, t.into()))?;
if is_keyword(word.value()) {
return Err(nom::Err::Error(Error::new_from_kind(
input,
ErrorKind::ReservedKeyword(word.value().to_owned()),
)));
}
Ok((input, word))
};
// this parser is only used when an error is encountered and it parse the
@ -85,7 +93,7 @@ pub fn parse_value<'a>(input: Span<'a>) -> IResult<Token<'a>> {
// when we create the errors from the output of the alt we have spaces everywhere
let error_word = take_till::<_, _, Error>(is_syntax_component);
terminated(
let (input, value) = terminated(
alt((
delimited(char('\''), cut(|input| quoted_by('\'', input)), cut(char('\''))),
delimited(char('"'), cut(|input| quoted_by('"', input)), cut(char('"'))),
@ -107,7 +115,9 @@ pub fn parse_value<'a>(input: Span<'a>) -> IResult<Token<'a>> {
failure
}
})
})
})?;
Ok((input, value))
}
fn is_value_component(c: char) -> bool {
@ -118,6 +128,10 @@ fn is_syntax_component(c: char) -> bool {
c.is_whitespace() || ['(', ')', '=', '<', '>', '!'].contains(&c)
}
fn is_keyword(s: &str) -> bool {
matches!(s, "AND" | "OR" | "IN" | "NOT" | "TO" | "EXISTS" | "_geoRadius")
}
#[cfg(test)]
pub mod test {
use nom::Finish;