From 02a21fd30915f1f2c3361501924fde1c2c5f208b Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 20 Dec 2021 16:18:15 +0100 Subject: [PATCH 1/4] Handle the escapes of quote in the filters --- filter-parser/src/error.rs | 6 +- filter-parser/src/lib.rs | 26 +++-- filter-parser/src/value.rs | 188 ++++++++++++++++++++++++++++++++++--- 3 files changed, 197 insertions(+), 23 deletions(-) diff --git a/filter-parser/src/error.rs b/filter-parser/src/error.rs index 401b8d7f3..dc13861a1 100644 --- a/filter-parser/src/error.rs +++ b/filter-parser/src/error.rs @@ -56,6 +56,7 @@ pub enum ErrorKind<'a> { InvalidPrimary, ExpectedEof, ExpectedValue, + MalformedValue, MissingClosingDelimiter(char), Char(char), InternalError(error::ErrorKind), @@ -82,7 +83,7 @@ impl<'a> Error<'a> { pub fn char(self) -> char { match self.kind { ErrorKind::Char(c) => c, - _ => panic!("Internal filter parser error"), + error => panic!("Internal filter parser error: {:?}", error), } } } @@ -117,6 +118,9 @@ impl<'a> Display for Error<'a> { ErrorKind::ExpectedValue if input.trim().is_empty() => { writeln!(f, "Was expecting a value but instead got nothing.")? } + ErrorKind::MalformedValue => { + writeln!(f, "Malformed value: `{}`.", escaped_input)? + } ErrorKind::MissingClosingDelimiter(c) => { writeln!(f, "Expression `{}` is missing the following closing delimiter: `{}`.", escaped_input, c)? } diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs index 4c5e03c82..07ee57a99 100644 --- a/filter-parser/src/lib.rs +++ b/filter-parser/src/lib.rs @@ -62,29 +62,39 @@ pub type Span<'a> = LocatedSpan<&'a str, &'a str>; type IResult<'a, Ret> = nom::IResult, Ret, Error<'a>>; #[derive(Debug, Clone, Eq)] -pub struct Token<'a>(Span<'a>); +pub struct Token<'a> { + /// The token in the original input, it should be used when possible. + span: Span<'a>, + /// If you need to modify the original input you can use the `value` field + /// to store your modified input. + value: Option, +} impl<'a> Deref for Token<'a> { type Target = &'a str; fn deref(&self) -> &Self::Target { - &self.0 + &self.span } } impl<'a> PartialEq for Token<'a> { fn eq(&self, other: &Self) -> bool { - self.0.fragment() == other.0.fragment() + self.span.fragment() == other.span.fragment() } } impl<'a> Token<'a> { - pub fn new(position: Span<'a>) -> Self { - Self(position) + pub fn new(span: Span<'a>, value: Option) -> Self { + Self { span, value } + } + + pub fn value(&self) -> &str { + self.value.as_ref().map_or(&self.span, |value| value) } pub fn as_external_error(&self, error: impl std::error::Error) -> Error<'a> { - Error::new_from_external(self.0, error) + Error::new_from_external(self.span, error) } pub fn parse(&self) -> Result @@ -92,13 +102,13 @@ impl<'a> Token<'a> { T: FromStr, T::Err: std::error::Error, { - self.0.parse().map_err(|e| self.as_external_error(e)) + self.span.parse().map_err(|e| self.as_external_error(e)) } } impl<'a> From> for Token<'a> { fn from(span: Span<'a>) -> Self { - Self(span) + Self { span, value: None } } } diff --git a/filter-parser/src/value.rs b/filter-parser/src/value.rs index b9d929ab0..d2ca1c932 100644 --- a/filter-parser/src/value.rs +++ b/filter-parser/src/value.rs @@ -7,8 +7,54 @@ use nom::sequence::{delimited, terminated}; use crate::error::NomErrorExt; use crate::{parse_geo_point, parse_geo_radius, Error, ErrorKind, IResult, Span, Token}; +/// This function goes through all chacaters in the [Span], if it finds any escaped character (`\`). +/// It generate a new string with all `\` removed from the [Span]. +fn unescape(buf: Span, char_to_escape: char) -> String { + let to_escape = format!("\\{}", char_to_escape); + buf.replace(&to_escape, &char_to_escape.to_string()) +} + +use nom::{InputIter, InputLength, InputTake, Slice}; + +/// Parse a value in quote. If it encounter an escaped quote it'll unescape it. +fn quoted_by(quote: char, input: Span) -> IResult { + // empty fields / values are valid in json + if input.is_empty() { + return Ok((input.slice(input.input_len()..), input.into())); + } + + let mut escaped = false; + let mut i = input.iter_indices(); + + while let Some((idx, c)) = i.next() { + match c { + c if c == quote => { + let (rem, output) = input.take_split(idx); + return Ok((rem, Token::new(output, escaped.then(|| unescape(output, quote))))); + } + '\\' => { + if let Some((_, c)) = i.next() { + escaped |= c == quote; + } else { + return Err(nom::Err::Error(Error::new_from_kind( + input, + ErrorKind::MalformedValue, + ))); + } + } + // if it was preceeded by a `\` or if it was anything else we can continue to advance + _ => (), + } + } + + Ok(( + input.slice(input.input_len()..), + Token::new(input, escaped.then(|| unescape(input, quote))), + )) +} + /// value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS* -pub fn parse_value(input: Span) -> IResult { +pub fn parse_value<'a>(input: Span<'a>) -> IResult> { // to get better diagnostic message we are going to strip the left whitespaces from the input right now let (input, _) = take_while(char::is_whitespace)(input)?; @@ -30,12 +76,10 @@ pub fn parse_value(input: Span) -> IResult { _ => (), } - // singleQuoted = "'" .* all but quotes "'" - let simple_quoted = take_till(|c: char| c == '\''); - // doubleQuoted = "\"" (word | spaces)* "\"" - let double_quoted = take_till(|c: char| c == '"'); // word = (alphanumeric | _ | - | .)+ - let word = take_while1(is_value_component); + let word = |input: Span<'a>| -> IResult> { + take_while1(is_value_component)(input).map(|(s, t)| (s, t.into())) + }; // this parser is only used when an error is encountered and it parse the // largest string possible that do not contain any “language” syntax. @@ -48,20 +92,27 @@ pub fn parse_value(input: Span) -> IResult { terminated( alt(( - delimited(char('\''), cut(simple_quoted), cut(char('\''))), - delimited(char('"'), cut(double_quoted), cut(char('"'))), + delimited(char('\''), cut(|input| quoted_by('\'', input)), cut(char('\''))), + delimited(char('"'), cut(|input| quoted_by('"', input)), cut(char('"'))), word, )), multispace0, )(input) - .map(|(s, t)| (s, t.into())) + // .map(|(s, t)| (s, t.into())) // if we found nothing in the alt it means the user specified something that was not recognized as a value .map_err(|e: nom::Err| { e.map_err(|_| Error::new_from_kind(error_word(input).unwrap().1, ErrorKind::ExpectedValue)) }) - // if we found encountered a failure it means the user really tried to input a value, but had an unmatched quote .map_err(|e| { - e.map_fail(|c| Error::new_from_kind(input, ErrorKind::MissingClosingDelimiter(c.char()))) + e.map_fail(|failure| { + // if we found encountered a char failure it means the user had an unmatched quote + if matches!(failure.kind(), ErrorKind::Char(_)) { + Error::new_from_kind(input, ErrorKind::MissingClosingDelimiter(failure.char())) + } else { + // else we let the failure untouched + failure + } + }) }) } @@ -81,7 +132,7 @@ pub mod test { use crate::tests::rtok; #[test] - fn name() { + fn test_span() { let test_case = [ ("channel", rtok("", "channel")), (".private", rtok("", ".private")), @@ -102,6 +153,7 @@ pub mod test { ("\"cha'nnel\"", rtok("'", "cha'nnel")), ("\"cha'nnel\"", rtok("'", "cha'nnel")), ("I'm tamo", rtok("'m tamo", "I")), + ("\"I'm \\\"super\\\" tamo\"", rtok("\"", "I'm \\\"super\\\" tamo")), ]; for (input, expected) in test_case { @@ -114,8 +166,116 @@ pub mod test { expected, result.unwrap_err() ); - let value = result.unwrap().1; - assert_eq!(value, expected, "Filter `{}` failed.", input); + let token = result.unwrap().1; + assert_eq!(token, expected, "Filter `{}` failed.", input); + } + } + + #[test] + fn test_escape_inside_double_quote() { + // (input, remaining, expected output token, output value) + let test_case = [ + ("aaaa", "", rtok("", "aaaa"), "aaaa"), + (r#"aa"aa"#, r#""aa"#, rtok("", "aa"), "aa"), + (r#"aa\"aa"#, r#""#, rtok("", r#"aa\"aa"#), r#"aa"aa"#), + (r#"aa\\\aa"#, r#""#, rtok("", r#"aa\\\aa"#), r#"aa\\\aa"#), + (r#"aa\\"\aa"#, r#""\aa"#, rtok("", r#"aa\\"#), r#"aa\\"#), + (r#"aa\\\"\aa"#, r#""#, rtok("", r#"aa\\\"\aa"#), r#"aa\\"\aa"#), + (r#"\"\""#, r#""#, rtok("", r#"\"\""#), r#""""#), + ]; + + for (input, remaining, expected_tok, expected_val) in test_case { + let span = Span::new_extra(input, ""); + let result = quoted_by('"', span); + assert!(result.is_ok()); + + let (rem, output) = result.unwrap(); + assert_eq!(rem.to_string(), remaining); + assert_eq!(output, expected_tok); + assert_eq!(output.value(), expected_val.to_string()); + } + } + + #[test] + fn test_unescape() { + // double quote + assert_eq!( + unescape(Span::new_extra(r#"Hello \"World\""#, ""), '"'), + r#"Hello "World""#.to_string() + ); + assert_eq!( + unescape(Span::new_extra(r#"Hello \\\"World\\\""#, ""), '"'), + r#"Hello \\"World\\""#.to_string() + ); + // simple quote + assert_eq!( + unescape(Span::new_extra(r#"Hello \'World\'"#, ""), '\''), + r#"Hello 'World'"#.to_string() + ); + assert_eq!( + unescape(Span::new_extra(r#"Hello \\\'World\\\'"#, ""), '\''), + r#"Hello \\'World\\'"#.to_string() + ); + } + + #[test] + fn test_value() { + let test_case = [ + // (input, expected value, if a string was generated to hold the new value) + ("channel", "channel", false), + // All the base test, no escaped string should be generated + (".private", ".private", false), + ("I-love-kebab", "I-love-kebab", false), + ("but_snakes_is_also_good", "but_snakes_is_also_good", false), + ("parens(", "parens", false), + ("parens)", "parens", false), + ("not!", "not", false), + (" channel", "channel", false), + ("channel ", "channel", false), + (" channel ", "channel", false), + ("'channel'", "channel", false), + ("\"channel\"", "channel", false), + ("'cha)nnel'", "cha)nnel", false), + ("'cha\"nnel'", "cha\"nnel", false), + ("\"cha'nnel\"", "cha'nnel", false), + ("\" some spaces \"", " some spaces ", false), + ("\"cha'nnel\"", "cha'nnel", false), + ("\"cha'nnel\"", "cha'nnel", false), + ("I'm tamo", "I", false), + // escaped thing but not quote + (r#""\\""#, r#"\\"#, false), + (r#""\\\\\\""#, r#"\\\\\\"#, false), + (r#""aa\\aa""#, r#"aa\\aa"#, false), + // with double quote + (r#""Hello \"world\"""#, r#"Hello "world""#, true), + (r#""Hello \\\"world\\\"""#, r#"Hello \\"world\\""#, true), + (r#""I'm \"super\" tamo""#, r#"I'm "super" tamo"#, true), + (r#""\"\"""#, r#""""#, true), + // with simple quote + (r#"'Hello \'world\''"#, r#"Hello 'world'"#, true), + (r#"'Hello \\\'world\\\''"#, r#"Hello \\'world\\'"#, true), + (r#"'I\'m "super" tamo'"#, r#"I'm "super" tamo"#, true), + (r#"'\'\''"#, r#"''"#, true), + ]; + + for (input, expected, escaped) in test_case { + let input = Span::new_extra(input, input); + let result = parse_value(input); + + assert!( + result.is_ok(), + "Filter `{:?}` was supposed to be parsed but failed with the following error: `{}`", + expected, + result.unwrap_err() + ); + let token = result.unwrap().1; + assert_eq!( + token.value.is_some(), + escaped, + "Filter `{}` was not supposed to be escaped", + input + ); + assert_eq!(token.value(), expected, "Filter `{}` failed.", input); } } From 3c7ea1d298ca8dac991da7f006b9a4bf2a25f9d1 Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 10 Jan 2022 15:14:32 +0100 Subject: [PATCH 2/4] Apply code suggestions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Clément Renault --- filter-parser/src/value.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/filter-parser/src/value.rs b/filter-parser/src/value.rs index d2ca1c932..ec7c93656 100644 --- a/filter-parser/src/value.rs +++ b/filter-parser/src/value.rs @@ -3,19 +3,18 @@ use nom::bytes::complete::{take_till, take_while, take_while1}; use nom::character::complete::{char, multispace0}; use nom::combinator::cut; use nom::sequence::{delimited, terminated}; +use nom::{InputIter, InputLength, InputTake, Slice}; use crate::error::NomErrorExt; use crate::{parse_geo_point, parse_geo_radius, Error, ErrorKind, IResult, Span, Token}; -/// This function goes through all chacaters in the [Span], if it finds any escaped character (`\`). -/// It generate a new string with all `\` removed from the [Span]. +/// This function goes through all characters in the [Span] if it finds any escaped character (`\`). +/// It generates a new string with all `\` removed from the [Span]. fn unescape(buf: Span, char_to_escape: char) -> String { let to_escape = format!("\\{}", char_to_escape); buf.replace(&to_escape, &char_to_escape.to_string()) } -use nom::{InputIter, InputLength, InputTake, Slice}; - /// Parse a value in quote. If it encounter an escaped quote it'll unescape it. fn quoted_by(quote: char, input: Span) -> IResult { // empty fields / values are valid in json @@ -98,7 +97,6 @@ pub fn parse_value<'a>(input: Span<'a>) -> IResult> { )), multispace0, )(input) - // .map(|(s, t)| (s, t.into())) // if we found nothing in the alt it means the user specified something that was not recognized as a value .map_err(|e: nom::Err| { e.map_err(|_| Error::new_from_kind(error_word(input).unwrap().1, ErrorKind::ExpectedValue)) From 0fcde35a20a933963f867b0abb2e471a729dd822 Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 10 Jan 2022 15:53:44 +0100 Subject: [PATCH 3/4] Update filter-parser/src/value.rs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Clément Renault --- filter-parser/src/value.rs | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) diff --git a/filter-parser/src/value.rs b/filter-parser/src/value.rs index ec7c93656..84dd21902 100644 --- a/filter-parser/src/value.rs +++ b/filter-parser/src/value.rs @@ -26,24 +26,20 @@ fn quoted_by(quote: char, input: Span) -> IResult { let mut i = input.iter_indices(); while let Some((idx, c)) = i.next() { - match c { - c if c == quote => { - let (rem, output) = input.take_split(idx); - return Ok((rem, Token::new(output, escaped.then(|| unescape(output, quote))))); + if c == quote { + let (rem, output) = input.take_split(idx); + return Ok((rem, Token::new(output, escaped.then(|| unescape(output, quote))))); + } else if c == '\\' { + if let Some((_, c)) = i.next() { + escaped |= c == quote; + } else { + return Err(nom::Err::Error(Error::new_from_kind( + input, + ErrorKind::MalformedValue, + ))); } - '\\' => { - if let Some((_, c)) = i.next() { - escaped |= c == quote; - } else { - return Err(nom::Err::Error(Error::new_from_kind( - input, - ErrorKind::MalformedValue, - ))); - } - } - // if it was preceeded by a `\` or if it was anything else we can continue to advance - _ => (), } + // if it was preceeded by a `\` or if it was anything else we can continue to advance } Ok(( From 92804f6f459bb1b5defa46de8f90923fa3b78d94 Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 10 Jan 2022 15:59:04 +0100 Subject: [PATCH 4/4] apply clippy suggestions --- filter-parser/src/condition.rs | 8 +------- filter-parser/src/error.rs | 4 ++-- filter-parser/src/lib.rs | 2 +- filter-parser/src/main.rs | 2 +- 4 files changed, 5 insertions(+), 11 deletions(-) diff --git a/filter-parser/src/condition.rs b/filter-parser/src/condition.rs index abd549534..264787055 100644 --- a/filter-parser/src/condition.rs +++ b/filter-parser/src/condition.rs @@ -63,11 +63,5 @@ pub fn parse_to(input: Span) -> IResult { let (input, (key, from, _, to)) = tuple((parse_value, parse_value, tag("TO"), cut(parse_value)))(input)?; - Ok(( - input, - FilterCondition::Condition { - fid: key.into(), - op: Between { from: from.into(), to: to.into() }, - }, - )) + Ok((input, FilterCondition::Condition { fid: key, op: Between { from, to } })) } diff --git a/filter-parser/src/error.rs b/filter-parser/src/error.rs index dc13861a1..ddf7bea47 100644 --- a/filter-parser/src/error.rs +++ b/filter-parser/src/error.rs @@ -19,14 +19,14 @@ impl NomErrorExt for nom::Err { fn map_err E>(self, op: O) -> nom::Err { match self { e @ Self::Failure(_) => e, - e => e.map(|e| op(e)), + e => e.map(op), } } fn map_fail E>(self, op: O) -> nom::Err { match self { e @ Self::Error(_) => e, - e => e.map(|e| op(e)), + e => e.map(op), } } } diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs index 07ee57a99..bad7dbc64 100644 --- a/filter-parser/src/lib.rs +++ b/filter-parser/src/lib.rs @@ -233,7 +233,7 @@ fn parse_geo_point(input: Span) -> IResult { multispace0, tag("_geoPoint"), // if we were able to parse `_geoPoint` we are going to return a Failure whatever happens next. - cut(delimited(char('('), separated_list1(tag(","), ws(|c| recognize_float(c))), char(')'))), + cut(delimited(char('('), separated_list1(tag(","), ws(recognize_float)), char(')'))), ))(input) .map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::ReservedGeo("_geoPoint"))))?; // if we succeeded we still return a `Failure` because geoPoints are not allowed diff --git a/filter-parser/src/main.rs b/filter-parser/src/main.rs index a3e4cab28..15ab86188 100644 --- a/filter-parser/src/main.rs +++ b/filter-parser/src/main.rs @@ -10,7 +10,7 @@ fn main() { } Err(e) => { println!("❎ Invalid filter"); - println!("{}", e.to_string()); + println!("{}", e); } } }