mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 23:04:26 +01:00
Merge #427
427: Handle escaped characters in filters r=Kerollmops a=irevoire Co-authored-by: Tamo <tamo@meilisearch.com>
This commit is contained in:
commit
660eac50b2
@ -63,11 +63,5 @@ pub fn parse_to(input: Span) -> IResult<FilterCondition> {
|
|||||||
let (input, (key, from, _, to)) =
|
let (input, (key, from, _, to)) =
|
||||||
tuple((parse_value, parse_value, tag("TO"), cut(parse_value)))(input)?;
|
tuple((parse_value, parse_value, tag("TO"), cut(parse_value)))(input)?;
|
||||||
|
|
||||||
Ok((
|
Ok((input, FilterCondition::Condition { fid: key, op: Between { from, to } }))
|
||||||
input,
|
|
||||||
FilterCondition::Condition {
|
|
||||||
fid: key.into(),
|
|
||||||
op: Between { from: from.into(), to: to.into() },
|
|
||||||
},
|
|
||||||
))
|
|
||||||
}
|
}
|
||||||
|
@ -19,14 +19,14 @@ impl<E> NomErrorExt<E> for nom::Err<E> {
|
|||||||
fn map_err<O: FnOnce(E) -> E>(self, op: O) -> nom::Err<E> {
|
fn map_err<O: FnOnce(E) -> E>(self, op: O) -> nom::Err<E> {
|
||||||
match self {
|
match self {
|
||||||
e @ Self::Failure(_) => e,
|
e @ Self::Failure(_) => e,
|
||||||
e => e.map(|e| op(e)),
|
e => e.map(op),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn map_fail<O: FnOnce(E) -> E>(self, op: O) -> nom::Err<E> {
|
fn map_fail<O: FnOnce(E) -> E>(self, op: O) -> nom::Err<E> {
|
||||||
match self {
|
match self {
|
||||||
e @ Self::Error(_) => e,
|
e @ Self::Error(_) => e,
|
||||||
e => e.map(|e| op(e)),
|
e => e.map(op),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -56,6 +56,7 @@ pub enum ErrorKind<'a> {
|
|||||||
InvalidPrimary,
|
InvalidPrimary,
|
||||||
ExpectedEof,
|
ExpectedEof,
|
||||||
ExpectedValue,
|
ExpectedValue,
|
||||||
|
MalformedValue,
|
||||||
MissingClosingDelimiter(char),
|
MissingClosingDelimiter(char),
|
||||||
Char(char),
|
Char(char),
|
||||||
InternalError(error::ErrorKind),
|
InternalError(error::ErrorKind),
|
||||||
@ -82,7 +83,7 @@ impl<'a> Error<'a> {
|
|||||||
pub fn char(self) -> char {
|
pub fn char(self) -> char {
|
||||||
match self.kind {
|
match self.kind {
|
||||||
ErrorKind::Char(c) => c,
|
ErrorKind::Char(c) => c,
|
||||||
_ => panic!("Internal filter parser error"),
|
error => panic!("Internal filter parser error: {:?}", error),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -117,6 +118,9 @@ impl<'a> Display for Error<'a> {
|
|||||||
ErrorKind::ExpectedValue if input.trim().is_empty() => {
|
ErrorKind::ExpectedValue if input.trim().is_empty() => {
|
||||||
writeln!(f, "Was expecting a value but instead got nothing.")?
|
writeln!(f, "Was expecting a value but instead got nothing.")?
|
||||||
}
|
}
|
||||||
|
ErrorKind::MalformedValue => {
|
||||||
|
writeln!(f, "Malformed value: `{}`.", escaped_input)?
|
||||||
|
}
|
||||||
ErrorKind::MissingClosingDelimiter(c) => {
|
ErrorKind::MissingClosingDelimiter(c) => {
|
||||||
writeln!(f, "Expression `{}` is missing the following closing delimiter: `{}`.", escaped_input, c)?
|
writeln!(f, "Expression `{}` is missing the following closing delimiter: `{}`.", escaped_input, c)?
|
||||||
}
|
}
|
||||||
|
@ -62,29 +62,39 @@ pub type Span<'a> = LocatedSpan<&'a str, &'a str>;
|
|||||||
type IResult<'a, Ret> = nom::IResult<Span<'a>, Ret, Error<'a>>;
|
type IResult<'a, Ret> = nom::IResult<Span<'a>, Ret, Error<'a>>;
|
||||||
|
|
||||||
#[derive(Debug, Clone, Eq)]
|
#[derive(Debug, Clone, Eq)]
|
||||||
pub struct Token<'a>(Span<'a>);
|
pub struct Token<'a> {
|
||||||
|
/// The token in the original input, it should be used when possible.
|
||||||
|
span: Span<'a>,
|
||||||
|
/// If you need to modify the original input you can use the `value` field
|
||||||
|
/// to store your modified input.
|
||||||
|
value: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
impl<'a> Deref for Token<'a> {
|
impl<'a> Deref for Token<'a> {
|
||||||
type Target = &'a str;
|
type Target = &'a str;
|
||||||
|
|
||||||
fn deref(&self) -> &Self::Target {
|
fn deref(&self) -> &Self::Target {
|
||||||
&self.0
|
&self.span
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> PartialEq for Token<'a> {
|
impl<'a> PartialEq for Token<'a> {
|
||||||
fn eq(&self, other: &Self) -> bool {
|
fn eq(&self, other: &Self) -> bool {
|
||||||
self.0.fragment() == other.0.fragment()
|
self.span.fragment() == other.span.fragment()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> Token<'a> {
|
impl<'a> Token<'a> {
|
||||||
pub fn new(position: Span<'a>) -> Self {
|
pub fn new(span: Span<'a>, value: Option<String>) -> Self {
|
||||||
Self(position)
|
Self { span, value }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn value(&self) -> &str {
|
||||||
|
self.value.as_ref().map_or(&self.span, |value| value)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn as_external_error(&self, error: impl std::error::Error) -> Error<'a> {
|
pub fn as_external_error(&self, error: impl std::error::Error) -> Error<'a> {
|
||||||
Error::new_from_external(self.0, error)
|
Error::new_from_external(self.span, error)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn parse<T>(&self) -> Result<T, Error>
|
pub fn parse<T>(&self) -> Result<T, Error>
|
||||||
@ -92,13 +102,13 @@ impl<'a> Token<'a> {
|
|||||||
T: FromStr,
|
T: FromStr,
|
||||||
T::Err: std::error::Error,
|
T::Err: std::error::Error,
|
||||||
{
|
{
|
||||||
self.0.parse().map_err(|e| self.as_external_error(e))
|
self.span.parse().map_err(|e| self.as_external_error(e))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> From<Span<'a>> for Token<'a> {
|
impl<'a> From<Span<'a>> for Token<'a> {
|
||||||
fn from(span: Span<'a>) -> Self {
|
fn from(span: Span<'a>) -> Self {
|
||||||
Self(span)
|
Self { span, value: None }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -223,7 +233,7 @@ fn parse_geo_point(input: Span) -> IResult<FilterCondition> {
|
|||||||
multispace0,
|
multispace0,
|
||||||
tag("_geoPoint"),
|
tag("_geoPoint"),
|
||||||
// if we were able to parse `_geoPoint` we are going to return a Failure whatever happens next.
|
// if we were able to parse `_geoPoint` we are going to return a Failure whatever happens next.
|
||||||
cut(delimited(char('('), separated_list1(tag(","), ws(|c| recognize_float(c))), char(')'))),
|
cut(delimited(char('('), separated_list1(tag(","), ws(recognize_float)), char(')'))),
|
||||||
))(input)
|
))(input)
|
||||||
.map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::ReservedGeo("_geoPoint"))))?;
|
.map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::ReservedGeo("_geoPoint"))))?;
|
||||||
// if we succeeded we still return a `Failure` because geoPoints are not allowed
|
// if we succeeded we still return a `Failure` because geoPoints are not allowed
|
||||||
|
@ -10,7 +10,7 @@ fn main() {
|
|||||||
}
|
}
|
||||||
Err(e) => {
|
Err(e) => {
|
||||||
println!("❎ Invalid filter");
|
println!("❎ Invalid filter");
|
||||||
println!("{}", e.to_string());
|
println!("{}", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -3,12 +3,53 @@ use nom::bytes::complete::{take_till, take_while, take_while1};
|
|||||||
use nom::character::complete::{char, multispace0};
|
use nom::character::complete::{char, multispace0};
|
||||||
use nom::combinator::cut;
|
use nom::combinator::cut;
|
||||||
use nom::sequence::{delimited, terminated};
|
use nom::sequence::{delimited, terminated};
|
||||||
|
use nom::{InputIter, InputLength, InputTake, Slice};
|
||||||
|
|
||||||
use crate::error::NomErrorExt;
|
use crate::error::NomErrorExt;
|
||||||
use crate::{parse_geo_point, parse_geo_radius, Error, ErrorKind, IResult, Span, Token};
|
use crate::{parse_geo_point, parse_geo_radius, Error, ErrorKind, IResult, Span, Token};
|
||||||
|
|
||||||
|
/// This function goes through all characters in the [Span] if it finds any escaped character (`\`).
|
||||||
|
/// It generates a new string with all `\` removed from the [Span].
|
||||||
|
fn unescape(buf: Span, char_to_escape: char) -> String {
|
||||||
|
let to_escape = format!("\\{}", char_to_escape);
|
||||||
|
buf.replace(&to_escape, &char_to_escape.to_string())
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Parse a value in quote. If it encounter an escaped quote it'll unescape it.
|
||||||
|
fn quoted_by(quote: char, input: Span) -> IResult<Token> {
|
||||||
|
// empty fields / values are valid in json
|
||||||
|
if input.is_empty() {
|
||||||
|
return Ok((input.slice(input.input_len()..), input.into()));
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut escaped = false;
|
||||||
|
let mut i = input.iter_indices();
|
||||||
|
|
||||||
|
while let Some((idx, c)) = i.next() {
|
||||||
|
if c == quote {
|
||||||
|
let (rem, output) = input.take_split(idx);
|
||||||
|
return Ok((rem, Token::new(output, escaped.then(|| unescape(output, quote)))));
|
||||||
|
} else if c == '\\' {
|
||||||
|
if let Some((_, c)) = i.next() {
|
||||||
|
escaped |= c == quote;
|
||||||
|
} else {
|
||||||
|
return Err(nom::Err::Error(Error::new_from_kind(
|
||||||
|
input,
|
||||||
|
ErrorKind::MalformedValue,
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// if it was preceeded by a `\` or if it was anything else we can continue to advance
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok((
|
||||||
|
input.slice(input.input_len()..),
|
||||||
|
Token::new(input, escaped.then(|| unescape(input, quote))),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
/// value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS*
|
/// value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS*
|
||||||
pub fn parse_value(input: Span) -> IResult<Token> {
|
pub fn parse_value<'a>(input: Span<'a>) -> IResult<Token<'a>> {
|
||||||
// to get better diagnostic message we are going to strip the left whitespaces from the input right now
|
// to get better diagnostic message we are going to strip the left whitespaces from the input right now
|
||||||
let (input, _) = take_while(char::is_whitespace)(input)?;
|
let (input, _) = take_while(char::is_whitespace)(input)?;
|
||||||
|
|
||||||
@ -30,12 +71,10 @@ pub fn parse_value(input: Span) -> IResult<Token> {
|
|||||||
_ => (),
|
_ => (),
|
||||||
}
|
}
|
||||||
|
|
||||||
// singleQuoted = "'" .* all but quotes "'"
|
|
||||||
let simple_quoted = take_till(|c: char| c == '\'');
|
|
||||||
// doubleQuoted = "\"" (word | spaces)* "\""
|
|
||||||
let double_quoted = take_till(|c: char| c == '"');
|
|
||||||
// word = (alphanumeric | _ | - | .)+
|
// word = (alphanumeric | _ | - | .)+
|
||||||
let word = take_while1(is_value_component);
|
let word = |input: Span<'a>| -> IResult<Token<'a>> {
|
||||||
|
take_while1(is_value_component)(input).map(|(s, t)| (s, t.into()))
|
||||||
|
};
|
||||||
|
|
||||||
// this parser is only used when an error is encountered and it parse the
|
// this parser is only used when an error is encountered and it parse the
|
||||||
// largest string possible that do not contain any “language” syntax.
|
// largest string possible that do not contain any “language” syntax.
|
||||||
@ -48,20 +87,26 @@ pub fn parse_value(input: Span) -> IResult<Token> {
|
|||||||
|
|
||||||
terminated(
|
terminated(
|
||||||
alt((
|
alt((
|
||||||
delimited(char('\''), cut(simple_quoted), cut(char('\''))),
|
delimited(char('\''), cut(|input| quoted_by('\'', input)), cut(char('\''))),
|
||||||
delimited(char('"'), cut(double_quoted), cut(char('"'))),
|
delimited(char('"'), cut(|input| quoted_by('"', input)), cut(char('"'))),
|
||||||
word,
|
word,
|
||||||
)),
|
)),
|
||||||
multispace0,
|
multispace0,
|
||||||
)(input)
|
)(input)
|
||||||
.map(|(s, t)| (s, t.into()))
|
|
||||||
// if we found nothing in the alt it means the user specified something that was not recognized as a value
|
// if we found nothing in the alt it means the user specified something that was not recognized as a value
|
||||||
.map_err(|e: nom::Err<Error>| {
|
.map_err(|e: nom::Err<Error>| {
|
||||||
e.map_err(|_| Error::new_from_kind(error_word(input).unwrap().1, ErrorKind::ExpectedValue))
|
e.map_err(|_| Error::new_from_kind(error_word(input).unwrap().1, ErrorKind::ExpectedValue))
|
||||||
})
|
})
|
||||||
// if we found encountered a failure it means the user really tried to input a value, but had an unmatched quote
|
|
||||||
.map_err(|e| {
|
.map_err(|e| {
|
||||||
e.map_fail(|c| Error::new_from_kind(input, ErrorKind::MissingClosingDelimiter(c.char())))
|
e.map_fail(|failure| {
|
||||||
|
// if we found encountered a char failure it means the user had an unmatched quote
|
||||||
|
if matches!(failure.kind(), ErrorKind::Char(_)) {
|
||||||
|
Error::new_from_kind(input, ErrorKind::MissingClosingDelimiter(failure.char()))
|
||||||
|
} else {
|
||||||
|
// else we let the failure untouched
|
||||||
|
failure
|
||||||
|
}
|
||||||
|
})
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -81,7 +126,7 @@ pub mod test {
|
|||||||
use crate::tests::rtok;
|
use crate::tests::rtok;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn name() {
|
fn test_span() {
|
||||||
let test_case = [
|
let test_case = [
|
||||||
("channel", rtok("", "channel")),
|
("channel", rtok("", "channel")),
|
||||||
(".private", rtok("", ".private")),
|
(".private", rtok("", ".private")),
|
||||||
@ -102,6 +147,7 @@ pub mod test {
|
|||||||
("\"cha'nnel\"", rtok("'", "cha'nnel")),
|
("\"cha'nnel\"", rtok("'", "cha'nnel")),
|
||||||
("\"cha'nnel\"", rtok("'", "cha'nnel")),
|
("\"cha'nnel\"", rtok("'", "cha'nnel")),
|
||||||
("I'm tamo", rtok("'m tamo", "I")),
|
("I'm tamo", rtok("'m tamo", "I")),
|
||||||
|
("\"I'm \\\"super\\\" tamo\"", rtok("\"", "I'm \\\"super\\\" tamo")),
|
||||||
];
|
];
|
||||||
|
|
||||||
for (input, expected) in test_case {
|
for (input, expected) in test_case {
|
||||||
@ -114,8 +160,116 @@ pub mod test {
|
|||||||
expected,
|
expected,
|
||||||
result.unwrap_err()
|
result.unwrap_err()
|
||||||
);
|
);
|
||||||
let value = result.unwrap().1;
|
let token = result.unwrap().1;
|
||||||
assert_eq!(value, expected, "Filter `{}` failed.", input);
|
assert_eq!(token, expected, "Filter `{}` failed.", input);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_escape_inside_double_quote() {
|
||||||
|
// (input, remaining, expected output token, output value)
|
||||||
|
let test_case = [
|
||||||
|
("aaaa", "", rtok("", "aaaa"), "aaaa"),
|
||||||
|
(r#"aa"aa"#, r#""aa"#, rtok("", "aa"), "aa"),
|
||||||
|
(r#"aa\"aa"#, r#""#, rtok("", r#"aa\"aa"#), r#"aa"aa"#),
|
||||||
|
(r#"aa\\\aa"#, r#""#, rtok("", r#"aa\\\aa"#), r#"aa\\\aa"#),
|
||||||
|
(r#"aa\\"\aa"#, r#""\aa"#, rtok("", r#"aa\\"#), r#"aa\\"#),
|
||||||
|
(r#"aa\\\"\aa"#, r#""#, rtok("", r#"aa\\\"\aa"#), r#"aa\\"\aa"#),
|
||||||
|
(r#"\"\""#, r#""#, rtok("", r#"\"\""#), r#""""#),
|
||||||
|
];
|
||||||
|
|
||||||
|
for (input, remaining, expected_tok, expected_val) in test_case {
|
||||||
|
let span = Span::new_extra(input, "");
|
||||||
|
let result = quoted_by('"', span);
|
||||||
|
assert!(result.is_ok());
|
||||||
|
|
||||||
|
let (rem, output) = result.unwrap();
|
||||||
|
assert_eq!(rem.to_string(), remaining);
|
||||||
|
assert_eq!(output, expected_tok);
|
||||||
|
assert_eq!(output.value(), expected_val.to_string());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_unescape() {
|
||||||
|
// double quote
|
||||||
|
assert_eq!(
|
||||||
|
unescape(Span::new_extra(r#"Hello \"World\""#, ""), '"'),
|
||||||
|
r#"Hello "World""#.to_string()
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
unescape(Span::new_extra(r#"Hello \\\"World\\\""#, ""), '"'),
|
||||||
|
r#"Hello \\"World\\""#.to_string()
|
||||||
|
);
|
||||||
|
// simple quote
|
||||||
|
assert_eq!(
|
||||||
|
unescape(Span::new_extra(r#"Hello \'World\'"#, ""), '\''),
|
||||||
|
r#"Hello 'World'"#.to_string()
|
||||||
|
);
|
||||||
|
assert_eq!(
|
||||||
|
unescape(Span::new_extra(r#"Hello \\\'World\\\'"#, ""), '\''),
|
||||||
|
r#"Hello \\'World\\'"#.to_string()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_value() {
|
||||||
|
let test_case = [
|
||||||
|
// (input, expected value, if a string was generated to hold the new value)
|
||||||
|
("channel", "channel", false),
|
||||||
|
// All the base test, no escaped string should be generated
|
||||||
|
(".private", ".private", false),
|
||||||
|
("I-love-kebab", "I-love-kebab", false),
|
||||||
|
("but_snakes_is_also_good", "but_snakes_is_also_good", false),
|
||||||
|
("parens(", "parens", false),
|
||||||
|
("parens)", "parens", false),
|
||||||
|
("not!", "not", false),
|
||||||
|
(" channel", "channel", false),
|
||||||
|
("channel ", "channel", false),
|
||||||
|
(" channel ", "channel", false),
|
||||||
|
("'channel'", "channel", false),
|
||||||
|
("\"channel\"", "channel", false),
|
||||||
|
("'cha)nnel'", "cha)nnel", false),
|
||||||
|
("'cha\"nnel'", "cha\"nnel", false),
|
||||||
|
("\"cha'nnel\"", "cha'nnel", false),
|
||||||
|
("\" some spaces \"", " some spaces ", false),
|
||||||
|
("\"cha'nnel\"", "cha'nnel", false),
|
||||||
|
("\"cha'nnel\"", "cha'nnel", false),
|
||||||
|
("I'm tamo", "I", false),
|
||||||
|
// escaped thing but not quote
|
||||||
|
(r#""\\""#, r#"\\"#, false),
|
||||||
|
(r#""\\\\\\""#, r#"\\\\\\"#, false),
|
||||||
|
(r#""aa\\aa""#, r#"aa\\aa"#, false),
|
||||||
|
// with double quote
|
||||||
|
(r#""Hello \"world\"""#, r#"Hello "world""#, true),
|
||||||
|
(r#""Hello \\\"world\\\"""#, r#"Hello \\"world\\""#, true),
|
||||||
|
(r#""I'm \"super\" tamo""#, r#"I'm "super" tamo"#, true),
|
||||||
|
(r#""\"\"""#, r#""""#, true),
|
||||||
|
// with simple quote
|
||||||
|
(r#"'Hello \'world\''"#, r#"Hello 'world'"#, true),
|
||||||
|
(r#"'Hello \\\'world\\\''"#, r#"Hello \\'world\\'"#, true),
|
||||||
|
(r#"'I\'m "super" tamo'"#, r#"I'm "super" tamo"#, true),
|
||||||
|
(r#"'\'\''"#, r#"''"#, true),
|
||||||
|
];
|
||||||
|
|
||||||
|
for (input, expected, escaped) in test_case {
|
||||||
|
let input = Span::new_extra(input, input);
|
||||||
|
let result = parse_value(input);
|
||||||
|
|
||||||
|
assert!(
|
||||||
|
result.is_ok(),
|
||||||
|
"Filter `{:?}` was supposed to be parsed but failed with the following error: `{}`",
|
||||||
|
expected,
|
||||||
|
result.unwrap_err()
|
||||||
|
);
|
||||||
|
let token = result.unwrap().1;
|
||||||
|
assert_eq!(
|
||||||
|
token.value.is_some(),
|
||||||
|
escaped,
|
||||||
|
"Filter `{}` was not supposed to be escaped",
|
||||||
|
input
|
||||||
|
);
|
||||||
|
assert_eq!(token.value(), expected, "Filter `{}` failed.", input);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user