Fix filter parser handling of keywords and surrounding spaces

Now the following fragments are allowed:

AND(field =

AND'field' =

AND"field" =
This commit is contained in:
Loïc Lecrenier 2022-08-17 16:53:40 +02:00
parent b09a8f1b91
commit 238a7be58d
2 changed files with 45 additions and 30 deletions

View file

@ -3,7 +3,7 @@ use nom::bytes::complete::{take_till, take_while, take_while1};
use nom::character::complete::{char, multispace0};
use nom::combinator::cut;
use nom::sequence::{delimited, terminated};
use nom::{InputIter, InputLength, InputTake, Slice};
use nom::{error, InputIter, InputLength, InputTake, Slice};
use crate::error::{ExpectedValueKind, NomErrorExt};
use crate::{parse_geo_point, parse_geo_radius, Error, ErrorKind, IResult, Span, Token};
@ -48,6 +48,35 @@ fn quoted_by(quote: char, input: Span) -> IResult<Token> {
))
}
// word = (alphanumeric | _ | - | .)+ except for reserved keywords
pub fn word_not_keyword<'a>(input: Span<'a>) -> IResult<Token<'a>> {
let (input, word): (_, Token<'a>) =
take_while1(is_value_component)(input).map(|(s, t)| (s, t.into()))?;
if is_keyword(word.value()) {
return Err(nom::Err::Error(Error::new_from_kind(
input,
ErrorKind::ReservedKeyword(word.value().to_owned()),
)));
}
Ok((input, word))
}
// word = {tag}
pub fn word_exact<'a, 'b: 'a>(tag: &'b str) -> impl Fn(Span<'a>) -> IResult<'a, Token<'a>> {
move |input| {
let (input, word): (_, Token<'a>) =
take_while1(is_value_component)(input).map(|(s, t)| (s, t.into()))?;
if word.value() == tag {
Ok((input, word))
} else {
Err(nom::Err::Error(Error::new_from_kind(
input,
ErrorKind::InternalError(nom::error::ErrorKind::Tag),
)))
}
}
}
/// value = WS* ( word | singleQuoted | doubleQuoted) WS+
pub fn parse_value<'a>(input: Span<'a>) -> IResult<Token<'a>> {
// to get better diagnostic message we are going to strip the left whitespaces from the input right now
@ -71,19 +100,6 @@ pub fn parse_value<'a>(input: Span<'a>) -> IResult<Token<'a>> {
_ => (),
}
// word = (alphanumeric | _ | - | .)+ except for reserved keywords
let word = |input: Span<'a>| -> IResult<Token<'a>> {
let (input, word): (_, Token<'a>) =
take_while1(is_value_component)(input).map(|(s, t)| (s, t.into()))?;
if is_keyword(word.value()) {
return Err(nom::Err::Error(Error::new_from_kind(
input,
ErrorKind::ReservedKeyword(word.value().to_owned()),
)));
}
Ok((input, word))
};
// this parser is only used when an error is encountered and it parse the
// largest string possible that do not contain any “language” syntax.
// If we try to parse `name = 🦀 AND language = rust` we want to return an
@ -97,7 +113,7 @@ pub fn parse_value<'a>(input: Span<'a>) -> IResult<Token<'a>> {
alt((
delimited(char('\''), cut(|input| quoted_by('\'', input)), cut(char('\''))),
delimited(char('"'), cut(|input| quoted_by('"', input)), cut(char('"'))),
word,
word_not_keyword,
)),
multispace0,
)(input)