2021-10-22 01:59:38 +02:00
|
|
|
|
use nom::branch::alt;
|
2021-11-04 14:22:35 +01:00
|
|
|
|
use nom::bytes::complete::{take_till, take_while, take_while1};
|
|
|
|
|
use nom::character::complete::{char, multispace0};
|
|
|
|
|
use nom::combinator::cut;
|
|
|
|
|
use nom::sequence::{delimited, terminated};
|
2022-08-17 17:25:31 +02:00
|
|
|
|
use nom::{InputIter, InputLength, InputTake, Slice};
|
2021-10-22 01:59:38 +02:00
|
|
|
|
|
2022-08-17 16:06:29 +02:00
|
|
|
|
use crate::error::{ExpectedValueKind, NomErrorExt};
|
2021-11-04 14:22:35 +01:00
|
|
|
|
use crate::{parse_geo_point, parse_geo_radius, Error, ErrorKind, IResult, Span, Token};
|
2021-10-22 01:59:38 +02:00
|
|
|
|
|
2022-01-10 15:14:32 +01:00
|
|
|
|
/// This function goes through all characters in the [Span] if it finds any escaped character (`\`).
|
|
|
|
|
/// It generates a new string with all `\` removed from the [Span].
|
2021-12-20 16:18:15 +01:00
|
|
|
|
fn unescape(buf: Span, char_to_escape: char) -> String {
|
|
|
|
|
let to_escape = format!("\\{}", char_to_escape);
|
|
|
|
|
buf.replace(&to_escape, &char_to_escape.to_string())
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/// Parse a value in quote. If it encounter an escaped quote it'll unescape it.
|
|
|
|
|
fn quoted_by(quote: char, input: Span) -> IResult<Token> {
|
|
|
|
|
// empty fields / values are valid in json
|
|
|
|
|
if input.is_empty() {
|
|
|
|
|
return Ok((input.slice(input.input_len()..), input.into()));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let mut escaped = false;
|
|
|
|
|
let mut i = input.iter_indices();
|
|
|
|
|
|
|
|
|
|
while let Some((idx, c)) = i.next() {
|
2022-01-10 15:53:44 +01:00
|
|
|
|
if c == quote {
|
|
|
|
|
let (rem, output) = input.take_split(idx);
|
|
|
|
|
return Ok((rem, Token::new(output, escaped.then(|| unescape(output, quote)))));
|
|
|
|
|
} else if c == '\\' {
|
|
|
|
|
if let Some((_, c)) = i.next() {
|
|
|
|
|
escaped |= c == quote;
|
|
|
|
|
} else {
|
|
|
|
|
return Err(nom::Err::Error(Error::new_from_kind(
|
|
|
|
|
input,
|
|
|
|
|
ErrorKind::MalformedValue,
|
|
|
|
|
)));
|
2021-12-20 16:18:15 +01:00
|
|
|
|
}
|
|
|
|
|
}
|
2022-01-10 15:53:44 +01:00
|
|
|
|
// if it was preceeded by a `\` or if it was anything else we can continue to advance
|
2021-12-20 16:18:15 +01:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
Ok((
|
|
|
|
|
input.slice(input.input_len()..),
|
|
|
|
|
Token::new(input, escaped.then(|| unescape(input, quote))),
|
|
|
|
|
))
|
|
|
|
|
}
|
|
|
|
|
|
2022-08-17 16:53:40 +02:00
|
|
|
|
// word = (alphanumeric | _ | - | .)+ except for reserved keywords
|
|
|
|
|
pub fn word_not_keyword<'a>(input: Span<'a>) -> IResult<Token<'a>> {
|
|
|
|
|
let (input, word): (_, Token<'a>) =
|
|
|
|
|
take_while1(is_value_component)(input).map(|(s, t)| (s, t.into()))?;
|
|
|
|
|
if is_keyword(word.value()) {
|
|
|
|
|
return Err(nom::Err::Error(Error::new_from_kind(
|
|
|
|
|
input,
|
|
|
|
|
ErrorKind::ReservedKeyword(word.value().to_owned()),
|
|
|
|
|
)));
|
|
|
|
|
}
|
|
|
|
|
Ok((input, word))
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// word = {tag}
|
|
|
|
|
pub fn word_exact<'a, 'b: 'a>(tag: &'b str) -> impl Fn(Span<'a>) -> IResult<'a, Token<'a>> {
|
|
|
|
|
move |input| {
|
|
|
|
|
let (input, word): (_, Token<'a>) =
|
|
|
|
|
take_while1(is_value_component)(input).map(|(s, t)| (s, t.into()))?;
|
|
|
|
|
if word.value() == tag {
|
|
|
|
|
Ok((input, word))
|
|
|
|
|
} else {
|
|
|
|
|
Err(nom::Err::Error(Error::new_from_kind(
|
|
|
|
|
input,
|
|
|
|
|
ErrorKind::InternalError(nom::error::ErrorKind::Tag),
|
|
|
|
|
)))
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2022-06-16 09:12:37 +02:00
|
|
|
|
/// value = WS* ( word | singleQuoted | doubleQuoted) WS+
|
2021-12-20 16:18:15 +01:00
|
|
|
|
pub fn parse_value<'a>(input: Span<'a>) -> IResult<Token<'a>> {
|
2021-11-08 15:30:26 +01:00
|
|
|
|
// to get better diagnostic message we are going to strip the left whitespaces from the input right now
|
|
|
|
|
let (input, _) = take_while(char::is_whitespace)(input)?;
|
|
|
|
|
|
|
|
|
|
// then, we want to check if the user is misusing a geo expression
|
2021-11-09 00:57:46 +01:00
|
|
|
|
// This expression can’t finish without error.
|
|
|
|
|
// We want to return an error in case of failure.
|
|
|
|
|
if let Err(err) = parse_geo_point(input) {
|
|
|
|
|
if err.is_failure() {
|
|
|
|
|
return Err(err);
|
|
|
|
|
}
|
2021-11-04 14:22:35 +01:00
|
|
|
|
}
|
|
|
|
|
match parse_geo_radius(input) {
|
2021-11-04 16:20:53 +01:00
|
|
|
|
Ok(_) => return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::MisusedGeo))),
|
2021-11-04 14:22:35 +01:00
|
|
|
|
// if we encountered a failure it means the user badly wrote a _geoRadius filter.
|
|
|
|
|
// But instead of showing him how to fix his syntax we are going to tell him he should not use this filter as a value.
|
|
|
|
|
Err(e) if e.is_failure() => {
|
2021-11-04 16:20:53 +01:00
|
|
|
|
return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::MisusedGeo)))
|
2021-11-04 14:22:35 +01:00
|
|
|
|
}
|
|
|
|
|
_ => (),
|
|
|
|
|
}
|
|
|
|
|
|
2021-11-08 15:30:26 +01:00
|
|
|
|
// this parser is only used when an error is encountered and it parse the
|
|
|
|
|
// largest string possible that do not contain any “language” syntax.
|
|
|
|
|
// If we try to parse `name = 🦀 AND language = rust` we want to return an
|
|
|
|
|
// error saying we could not parse `🦀`. Not that no value were found or that
|
|
|
|
|
// we could note parse `🦀 AND language = rust`.
|
2021-11-04 14:22:35 +01:00
|
|
|
|
// we want to remove the space before entering the alt because if we don't,
|
|
|
|
|
// when we create the errors from the output of the alt we have spaces everywhere
|
2021-11-08 15:30:26 +01:00
|
|
|
|
let error_word = take_till::<_, _, Error>(is_syntax_component);
|
2021-11-04 14:22:35 +01:00
|
|
|
|
|
2022-06-15 10:13:34 +02:00
|
|
|
|
let (input, value) = terminated(
|
2021-11-04 14:22:35 +01:00
|
|
|
|
alt((
|
2021-12-20 16:18:15 +01:00
|
|
|
|
delimited(char('\''), cut(|input| quoted_by('\'', input)), cut(char('\''))),
|
|
|
|
|
delimited(char('"'), cut(|input| quoted_by('"', input)), cut(char('"'))),
|
2022-08-17 16:53:40 +02:00
|
|
|
|
word_not_keyword,
|
2021-11-04 14:22:35 +01:00
|
|
|
|
)),
|
|
|
|
|
multispace0,
|
|
|
|
|
)(input)
|
2021-11-08 15:30:26 +01:00
|
|
|
|
// if we found nothing in the alt it means the user specified something that was not recognized as a value
|
|
|
|
|
.map_err(|e: nom::Err<Error>| {
|
2022-08-17 16:06:29 +02:00
|
|
|
|
e.map_err(|error| {
|
|
|
|
|
let expected_value_kind = if matches!(error.kind(), ErrorKind::ReservedKeyword(_)) {
|
|
|
|
|
ExpectedValueKind::ReservedKeyword
|
|
|
|
|
} else {
|
|
|
|
|
ExpectedValueKind::Other
|
|
|
|
|
};
|
|
|
|
|
Error::new_from_kind(
|
|
|
|
|
error_word(input).unwrap().1,
|
|
|
|
|
ErrorKind::ExpectedValue(expected_value_kind),
|
|
|
|
|
)
|
|
|
|
|
})
|
2021-11-08 15:30:26 +01:00
|
|
|
|
})
|
2021-11-04 16:20:53 +01:00
|
|
|
|
.map_err(|e| {
|
2021-12-20 16:18:15 +01:00
|
|
|
|
e.map_fail(|failure| {
|
|
|
|
|
// if we found encountered a char failure it means the user had an unmatched quote
|
|
|
|
|
if matches!(failure.kind(), ErrorKind::Char(_)) {
|
|
|
|
|
Error::new_from_kind(input, ErrorKind::MissingClosingDelimiter(failure.char()))
|
|
|
|
|
} else {
|
|
|
|
|
// else we let the failure untouched
|
|
|
|
|
failure
|
|
|
|
|
}
|
|
|
|
|
})
|
2022-06-15 10:13:34 +02:00
|
|
|
|
})?;
|
|
|
|
|
|
|
|
|
|
Ok((input, value))
|
2021-10-22 01:59:38 +02:00
|
|
|
|
}
|
|
|
|
|
|
2021-11-09 00:58:23 +01:00
|
|
|
|
fn is_value_component(c: char) -> bool {
|
2021-10-22 01:59:38 +02:00
|
|
|
|
c.is_alphanumeric() || ['_', '-', '.'].contains(&c)
|
|
|
|
|
}
|
|
|
|
|
|
2021-11-08 15:30:26 +01:00
|
|
|
|
fn is_syntax_component(c: char) -> bool {
|
2021-11-09 17:08:04 +01:00
|
|
|
|
c.is_whitespace() || ['(', ')', '=', '<', '>', '!'].contains(&c)
|
2021-11-08 15:30:26 +01:00
|
|
|
|
}
|
|
|
|
|
|
2022-06-15 10:13:34 +02:00
|
|
|
|
fn is_keyword(s: &str) -> bool {
|
|
|
|
|
matches!(s, "AND" | "OR" | "IN" | "NOT" | "TO" | "EXISTS" | "_geoRadius")
|
|
|
|
|
}
|
|
|
|
|
|
2021-10-22 01:59:38 +02:00
|
|
|
|
#[cfg(test)]
|
2021-11-08 15:30:26 +01:00
|
|
|
|
pub mod test {
|
|
|
|
|
use nom::Finish;
|
|
|
|
|
|
2021-10-22 01:59:38 +02:00
|
|
|
|
use super::*;
|
|
|
|
|
use crate::tests::rtok;
|
|
|
|
|
|
|
|
|
|
#[test]
|
2021-12-20 16:18:15 +01:00
|
|
|
|
fn test_span() {
|
2021-10-22 01:59:38 +02:00
|
|
|
|
let test_case = [
|
|
|
|
|
("channel", rtok("", "channel")),
|
|
|
|
|
(".private", rtok("", ".private")),
|
|
|
|
|
("I-love-kebab", rtok("", "I-love-kebab")),
|
2021-11-04 14:22:35 +01:00
|
|
|
|
("but_snakes_is_also_good", rtok("", "but_snakes_is_also_good")),
|
2021-10-22 01:59:38 +02:00
|
|
|
|
("parens(", rtok("", "parens")),
|
|
|
|
|
("parens)", rtok("", "parens")),
|
|
|
|
|
("not!", rtok("", "not")),
|
|
|
|
|
(" channel", rtok(" ", "channel")),
|
|
|
|
|
("channel ", rtok("", "channel")),
|
2021-11-04 14:22:35 +01:00
|
|
|
|
(" channel ", rtok(" ", "channel")),
|
2021-10-22 01:59:38 +02:00
|
|
|
|
("'channel'", rtok("'", "channel")),
|
|
|
|
|
("\"channel\"", rtok("\"", "channel")),
|
|
|
|
|
("'cha)nnel'", rtok("'", "cha)nnel")),
|
|
|
|
|
("'cha\"nnel'", rtok("'", "cha\"nnel")),
|
|
|
|
|
("\"cha'nnel\"", rtok("\"", "cha'nnel")),
|
|
|
|
|
("\" some spaces \"", rtok("\"", " some spaces ")),
|
|
|
|
|
("\"cha'nnel\"", rtok("'", "cha'nnel")),
|
|
|
|
|
("\"cha'nnel\"", rtok("'", "cha'nnel")),
|
2021-11-08 15:30:26 +01:00
|
|
|
|
("I'm tamo", rtok("'m tamo", "I")),
|
2021-12-20 16:18:15 +01:00
|
|
|
|
("\"I'm \\\"super\\\" tamo\"", rtok("\"", "I'm \\\"super\\\" tamo")),
|
2021-10-22 01:59:38 +02:00
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
for (input, expected) in test_case {
|
2021-11-02 17:35:17 +01:00
|
|
|
|
let input = Span::new_extra(input, input);
|
2021-11-02 20:27:07 +01:00
|
|
|
|
let result = parse_value(input);
|
2021-10-22 01:59:38 +02:00
|
|
|
|
|
|
|
|
|
assert!(
|
|
|
|
|
result.is_ok(),
|
|
|
|
|
"Filter `{:?}` was supposed to be parsed but failed with the following error: `{}`",
|
|
|
|
|
expected,
|
|
|
|
|
result.unwrap_err()
|
|
|
|
|
);
|
2021-12-20 16:18:15 +01:00
|
|
|
|
let token = result.unwrap().1;
|
|
|
|
|
assert_eq!(token, expected, "Filter `{}` failed.", input);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_escape_inside_double_quote() {
|
|
|
|
|
// (input, remaining, expected output token, output value)
|
|
|
|
|
let test_case = [
|
|
|
|
|
("aaaa", "", rtok("", "aaaa"), "aaaa"),
|
|
|
|
|
(r#"aa"aa"#, r#""aa"#, rtok("", "aa"), "aa"),
|
|
|
|
|
(r#"aa\"aa"#, r#""#, rtok("", r#"aa\"aa"#), r#"aa"aa"#),
|
|
|
|
|
(r#"aa\\\aa"#, r#""#, rtok("", r#"aa\\\aa"#), r#"aa\\\aa"#),
|
|
|
|
|
(r#"aa\\"\aa"#, r#""\aa"#, rtok("", r#"aa\\"#), r#"aa\\"#),
|
|
|
|
|
(r#"aa\\\"\aa"#, r#""#, rtok("", r#"aa\\\"\aa"#), r#"aa\\"\aa"#),
|
|
|
|
|
(r#"\"\""#, r#""#, rtok("", r#"\"\""#), r#""""#),
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
for (input, remaining, expected_tok, expected_val) in test_case {
|
|
|
|
|
let span = Span::new_extra(input, "");
|
|
|
|
|
let result = quoted_by('"', span);
|
|
|
|
|
assert!(result.is_ok());
|
|
|
|
|
|
|
|
|
|
let (rem, output) = result.unwrap();
|
|
|
|
|
assert_eq!(rem.to_string(), remaining);
|
|
|
|
|
assert_eq!(output, expected_tok);
|
|
|
|
|
assert_eq!(output.value(), expected_val.to_string());
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_unescape() {
|
|
|
|
|
// double quote
|
|
|
|
|
assert_eq!(
|
|
|
|
|
unescape(Span::new_extra(r#"Hello \"World\""#, ""), '"'),
|
|
|
|
|
r#"Hello "World""#.to_string()
|
|
|
|
|
);
|
|
|
|
|
assert_eq!(
|
|
|
|
|
unescape(Span::new_extra(r#"Hello \\\"World\\\""#, ""), '"'),
|
|
|
|
|
r#"Hello \\"World\\""#.to_string()
|
|
|
|
|
);
|
|
|
|
|
// simple quote
|
|
|
|
|
assert_eq!(
|
|
|
|
|
unescape(Span::new_extra(r#"Hello \'World\'"#, ""), '\''),
|
|
|
|
|
r#"Hello 'World'"#.to_string()
|
|
|
|
|
);
|
|
|
|
|
assert_eq!(
|
|
|
|
|
unescape(Span::new_extra(r#"Hello \\\'World\\\'"#, ""), '\''),
|
|
|
|
|
r#"Hello \\'World\\'"#.to_string()
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn test_value() {
|
|
|
|
|
let test_case = [
|
|
|
|
|
// (input, expected value, if a string was generated to hold the new value)
|
|
|
|
|
("channel", "channel", false),
|
|
|
|
|
// All the base test, no escaped string should be generated
|
|
|
|
|
(".private", ".private", false),
|
|
|
|
|
("I-love-kebab", "I-love-kebab", false),
|
|
|
|
|
("but_snakes_is_also_good", "but_snakes_is_also_good", false),
|
|
|
|
|
("parens(", "parens", false),
|
|
|
|
|
("parens)", "parens", false),
|
|
|
|
|
("not!", "not", false),
|
|
|
|
|
(" channel", "channel", false),
|
|
|
|
|
("channel ", "channel", false),
|
|
|
|
|
(" channel ", "channel", false),
|
|
|
|
|
("'channel'", "channel", false),
|
|
|
|
|
("\"channel\"", "channel", false),
|
|
|
|
|
("'cha)nnel'", "cha)nnel", false),
|
|
|
|
|
("'cha\"nnel'", "cha\"nnel", false),
|
|
|
|
|
("\"cha'nnel\"", "cha'nnel", false),
|
|
|
|
|
("\" some spaces \"", " some spaces ", false),
|
|
|
|
|
("\"cha'nnel\"", "cha'nnel", false),
|
|
|
|
|
("\"cha'nnel\"", "cha'nnel", false),
|
|
|
|
|
("I'm tamo", "I", false),
|
|
|
|
|
// escaped thing but not quote
|
|
|
|
|
(r#""\\""#, r#"\\"#, false),
|
|
|
|
|
(r#""\\\\\\""#, r#"\\\\\\"#, false),
|
|
|
|
|
(r#""aa\\aa""#, r#"aa\\aa"#, false),
|
|
|
|
|
// with double quote
|
|
|
|
|
(r#""Hello \"world\"""#, r#"Hello "world""#, true),
|
|
|
|
|
(r#""Hello \\\"world\\\"""#, r#"Hello \\"world\\""#, true),
|
|
|
|
|
(r#""I'm \"super\" tamo""#, r#"I'm "super" tamo"#, true),
|
|
|
|
|
(r#""\"\"""#, r#""""#, true),
|
|
|
|
|
// with simple quote
|
|
|
|
|
(r#"'Hello \'world\''"#, r#"Hello 'world'"#, true),
|
|
|
|
|
(r#"'Hello \\\'world\\\''"#, r#"Hello \\'world\\'"#, true),
|
|
|
|
|
(r#"'I\'m "super" tamo'"#, r#"I'm "super" tamo"#, true),
|
|
|
|
|
(r#"'\'\''"#, r#"''"#, true),
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
for (input, expected, escaped) in test_case {
|
|
|
|
|
let input = Span::new_extra(input, input);
|
|
|
|
|
let result = parse_value(input);
|
|
|
|
|
|
|
|
|
|
assert!(
|
|
|
|
|
result.is_ok(),
|
|
|
|
|
"Filter `{:?}` was supposed to be parsed but failed with the following error: `{}`",
|
|
|
|
|
expected,
|
|
|
|
|
result.unwrap_err()
|
|
|
|
|
);
|
|
|
|
|
let token = result.unwrap().1;
|
|
|
|
|
assert_eq!(
|
|
|
|
|
token.value.is_some(),
|
|
|
|
|
escaped,
|
|
|
|
|
"Filter `{}` was not supposed to be escaped",
|
|
|
|
|
input
|
|
|
|
|
);
|
|
|
|
|
assert_eq!(token.value(), expected, "Filter `{}` failed.", input);
|
2021-10-22 01:59:38 +02:00
|
|
|
|
}
|
|
|
|
|
}
|
2021-11-08 15:30:26 +01:00
|
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
|
fn diagnostic() {
|
|
|
|
|
let test_case = [
|
|
|
|
|
("🦀", "🦀"),
|
|
|
|
|
(" 🦀", "🦀"),
|
|
|
|
|
("🦀 AND crab = truc", "🦀"),
|
|
|
|
|
("🦀_in_name", "🦀_in_name"),
|
|
|
|
|
(" (name = ...", ""),
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
for (input, expected) in test_case {
|
|
|
|
|
let input = Span::new_extra(input, input);
|
|
|
|
|
let result = parse_value(input);
|
|
|
|
|
|
|
|
|
|
assert!(
|
|
|
|
|
result.is_err(),
|
|
|
|
|
"Filter `{}` wasn’t supposed to be parsed but it did with the following result: `{:?}`",
|
|
|
|
|
expected,
|
|
|
|
|
result.unwrap()
|
|
|
|
|
);
|
|
|
|
|
// get the inner string referenced in the error
|
|
|
|
|
let value = *result.finish().unwrap_err().context().fragment();
|
|
|
|
|
assert_eq!(value, expected, "Filter `{}` was supposed to fail with the following value: `{}`, but it failed with: `{}`.", input, expected, value);
|
|
|
|
|
}
|
|
|
|
|
}
|
2021-10-22 01:59:38 +02:00
|
|
|
|
}
|