mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-25 20:57:35 +01:00
Fix filter parser handling of keywords and surrounding spaces
Now the following fragments are allowed: AND(field = AND'field' = AND"field" =
This commit is contained in:
parent
b09a8f1b91
commit
238a7be58d
@ -52,7 +52,7 @@ use error::{cut_with_err, ExpectedValueKind, NomErrorExt};
|
|||||||
pub use error::{Error, ErrorKind};
|
pub use error::{Error, ErrorKind};
|
||||||
use nom::branch::alt;
|
use nom::branch::alt;
|
||||||
use nom::bytes::complete::tag;
|
use nom::bytes::complete::tag;
|
||||||
use nom::character::complete::{char, multispace0, multispace1};
|
use nom::character::complete::{char, multispace0};
|
||||||
use nom::combinator::{cut, eof, map, opt};
|
use nom::combinator::{cut, eof, map, opt};
|
||||||
use nom::multi::{many0, separated_list1};
|
use nom::multi::{many0, separated_list1};
|
||||||
use nom::number::complete::recognize_float;
|
use nom::number::complete::recognize_float;
|
||||||
@ -60,6 +60,7 @@ use nom::sequence::{delimited, preceded, terminated, tuple};
|
|||||||
use nom::Finish;
|
use nom::Finish;
|
||||||
use nom_locate::LocatedSpan;
|
use nom_locate::LocatedSpan;
|
||||||
pub(crate) use value::parse_value;
|
pub(crate) use value::parse_value;
|
||||||
|
use value::word_exact;
|
||||||
|
|
||||||
pub type Span<'a> = LocatedSpan<&'a str, &'a str>;
|
pub type Span<'a> = LocatedSpan<&'a str, &'a str>;
|
||||||
|
|
||||||
@ -183,7 +184,7 @@ fn parse_value_list<'a>(input: Span<'a>) -> IResult<Vec<Token<'a>>> {
|
|||||||
/// in = value "IN" "[" value_list "]"
|
/// in = value "IN" "[" value_list "]"
|
||||||
fn parse_in(input: Span) -> IResult<FilterCondition> {
|
fn parse_in(input: Span) -> IResult<FilterCondition> {
|
||||||
let (input, value) = parse_value(input)?;
|
let (input, value) = parse_value(input)?;
|
||||||
let (input, _) = ws(tag("IN"))(input)?;
|
let (input, _) = ws(word_exact("IN"))(input)?;
|
||||||
|
|
||||||
// everything after `IN` can be a failure
|
// everything after `IN` can be a failure
|
||||||
let (input, _) =
|
let (input, _) =
|
||||||
@ -215,9 +216,8 @@ fn parse_in(input: Span) -> IResult<FilterCondition> {
|
|||||||
/// in = value "NOT" WS* "IN" "[" value_list "]"
|
/// in = value "NOT" WS* "IN" "[" value_list "]"
|
||||||
fn parse_not_in(input: Span) -> IResult<FilterCondition> {
|
fn parse_not_in(input: Span) -> IResult<FilterCondition> {
|
||||||
let (input, value) = parse_value(input)?;
|
let (input, value) = parse_value(input)?;
|
||||||
let (input, _) = tag("NOT")(input)?;
|
let (input, _) = word_exact("NOT")(input)?;
|
||||||
let (input, _) = multispace1(input)?;
|
let (input, _) = ws(word_exact("IN"))(input)?;
|
||||||
let (input, _) = ws(tag("IN"))(input)?;
|
|
||||||
|
|
||||||
// everything after `IN` can be a failure
|
// everything after `IN` can be a failure
|
||||||
let (input, _) =
|
let (input, _) =
|
||||||
@ -241,8 +241,7 @@ fn parse_not_in(input: Span) -> IResult<FilterCondition> {
|
|||||||
fn parse_or(input: Span) -> IResult<FilterCondition> {
|
fn parse_or(input: Span) -> IResult<FilterCondition> {
|
||||||
let (input, first_filter) = parse_and(input)?;
|
let (input, first_filter) = parse_and(input)?;
|
||||||
// if we found a `OR` then we MUST find something next
|
// if we found a `OR` then we MUST find something next
|
||||||
let (input, mut ors) =
|
let (input, mut ors) = many0(preceded(ws(word_exact("OR")), cut(parse_and)))(input)?;
|
||||||
many0(preceded(ws(tuple((tag("OR"), multispace1))), cut(parse_and)))(input)?;
|
|
||||||
|
|
||||||
let filter = if ors.is_empty() {
|
let filter = if ors.is_empty() {
|
||||||
first_filter
|
first_filter
|
||||||
@ -258,8 +257,7 @@ fn parse_or(input: Span) -> IResult<FilterCondition> {
|
|||||||
fn parse_and(input: Span) -> IResult<FilterCondition> {
|
fn parse_and(input: Span) -> IResult<FilterCondition> {
|
||||||
let (input, first_filter) = parse_not(input)?;
|
let (input, first_filter) = parse_not(input)?;
|
||||||
// if we found a `AND` then we MUST find something next
|
// if we found a `AND` then we MUST find something next
|
||||||
let (input, mut ands) =
|
let (input, mut ands) = many0(preceded(ws(word_exact("AND")), cut(parse_not)))(input)?;
|
||||||
many0(preceded(ws(tuple((tag("AND"), multispace1))), cut(parse_not)))(input)?;
|
|
||||||
|
|
||||||
let filter = if ands.is_empty() {
|
let filter = if ands.is_empty() {
|
||||||
first_filter
|
first_filter
|
||||||
@ -276,9 +274,7 @@ fn parse_and(input: Span) -> IResult<FilterCondition> {
|
|||||||
/// If we parse a `NOT` we MUST parse something behind.
|
/// If we parse a `NOT` we MUST parse something behind.
|
||||||
fn parse_not(input: Span) -> IResult<FilterCondition> {
|
fn parse_not(input: Span) -> IResult<FilterCondition> {
|
||||||
alt((
|
alt((
|
||||||
map(preceded(ws(tuple((tag("NOT"), multispace1))), cut(parse_not)), |e| {
|
map(preceded(ws(word_exact("NOT")), cut(parse_not)), |e| FilterCondition::Not(Box::new(e))),
|
||||||
FilterCondition::Not(Box::new(e))
|
|
||||||
}),
|
|
||||||
parse_primary,
|
parse_primary,
|
||||||
))(input)
|
))(input)
|
||||||
}
|
}
|
||||||
@ -288,7 +284,7 @@ fn parse_not(input: Span) -> IResult<FilterCondition> {
|
|||||||
fn parse_geo_radius(input: Span) -> IResult<FilterCondition> {
|
fn parse_geo_radius(input: Span) -> IResult<FilterCondition> {
|
||||||
// we want to allow space BEFORE the _geoRadius but not after
|
// we want to allow space BEFORE the _geoRadius but not after
|
||||||
let parsed = preceded(
|
let parsed = preceded(
|
||||||
tuple((multispace0, tag("_geoRadius"))),
|
tuple((multispace0, word_exact("_geoRadius"))),
|
||||||
// if we were able to parse `_geoRadius` and can't parse the rest of the input we return a failure
|
// if we were able to parse `_geoRadius` and can't parse the rest of the input we return a failure
|
||||||
cut(delimited(char('('), separated_list1(tag(","), ws(recognize_float)), char(')'))),
|
cut(delimited(char('('), separated_list1(tag(","), ws(recognize_float)), char(')'))),
|
||||||
)(input)
|
)(input)
|
||||||
@ -741,6 +737,10 @@ pub mod tests {
|
|||||||
Fc::GeoLowerThan { point: [rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(", "12"), rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, ", "13")], radius: rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, ", "14") }.into()
|
Fc::GeoLowerThan { point: [rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(", "12"), rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, ", "13")], radius: rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, ", "14") }.into()
|
||||||
])
|
])
|
||||||
)
|
)
|
||||||
|
// (
|
||||||
|
// ("channel = ponce AND'dog' != 'bernese mountain'", ),
|
||||||
|
// ("channel = ponce AND('dog' != 'bernese mountain')", ),
|
||||||
|
// )
|
||||||
];
|
];
|
||||||
|
|
||||||
for (input, expected) in test_case {
|
for (input, expected) in test_case {
|
||||||
@ -770,7 +770,7 @@ pub mod tests {
|
|||||||
("'OR'", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `\\'OR\\'`."),
|
("'OR'", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `\\'OR\\'`."),
|
||||||
("OR", "Was expecting a value but instead got `OR`, which is a reserved keyword. To use `OR` as a field name or a value, surround it by quotes."),
|
("OR", "Was expecting a value but instead got `OR`, which is a reserved keyword. To use `OR` as a field name or a value, surround it by quotes."),
|
||||||
("channel Ponce", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `channel Ponce`."),
|
("channel Ponce", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `channel Ponce`."),
|
||||||
("channel = Ponce OR", "Found unexpected characters at the end of the filter: `OR`. You probably forgot an `OR` or an `AND` rule."),
|
("channel = Ponce OR", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` but instead got nothing."),
|
||||||
("_geoRadius", "The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`."),
|
("_geoRadius", "The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`."),
|
||||||
("_geoRadius = 12", "The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`."),
|
("_geoRadius = 12", "The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`."),
|
||||||
("_geoPoint(12, 13, 14)", "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance) built-in rule to filter on `_geo` coordinates."),
|
("_geoPoint(12, 13, 14)", "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance) built-in rule to filter on `_geo` coordinates."),
|
||||||
@ -783,7 +783,6 @@ pub mod tests {
|
|||||||
("colour NOT EXIST", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `colour NOT EXIST`."),
|
("colour NOT EXIST", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `colour NOT EXIST`."),
|
||||||
("subscribers 100 TO1000", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `subscribers 100 TO1000`."),
|
("subscribers 100 TO1000", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `subscribers 100 TO1000`."),
|
||||||
("channel = ponce ORdog != 'bernese mountain'", "Found unexpected characters at the end of the filter: `ORdog != \\'bernese mountain\\'`. You probably forgot an `OR` or an `AND` rule."),
|
("channel = ponce ORdog != 'bernese mountain'", "Found unexpected characters at the end of the filter: `ORdog != \\'bernese mountain\\'`. You probably forgot an `OR` or an `AND` rule."),
|
||||||
("channel = ponce AND'dog' != 'bernese mountain'", "Found unexpected characters at the end of the filter: `AND\\'dog\\' != \\'bernese mountain\\'`. You probably forgot an `OR` or an `AND` rule."),
|
|
||||||
("colour IN blue, green]", "Expected `[` after `IN` keyword."),
|
("colour IN blue, green]", "Expected `[` after `IN` keyword."),
|
||||||
("colour IN [blue, green, 'blue' > 2]", "Expected only comma-separated field names inside `IN[..]` but instead found `> 2]`"),
|
("colour IN [blue, green, 'blue' > 2]", "Expected only comma-separated field names inside `IN[..]` but instead found `> 2]`"),
|
||||||
("colour IN [blue, green, AND]", "Expected only comma-separated field names inside `IN[..]` but instead found `AND]`"),
|
("colour IN [blue, green, AND]", "Expected only comma-separated field names inside `IN[..]` but instead found `AND]`"),
|
||||||
|
@ -3,7 +3,7 @@ use nom::bytes::complete::{take_till, take_while, take_while1};
|
|||||||
use nom::character::complete::{char, multispace0};
|
use nom::character::complete::{char, multispace0};
|
||||||
use nom::combinator::cut;
|
use nom::combinator::cut;
|
||||||
use nom::sequence::{delimited, terminated};
|
use nom::sequence::{delimited, terminated};
|
||||||
use nom::{InputIter, InputLength, InputTake, Slice};
|
use nom::{error, InputIter, InputLength, InputTake, Slice};
|
||||||
|
|
||||||
use crate::error::{ExpectedValueKind, NomErrorExt};
|
use crate::error::{ExpectedValueKind, NomErrorExt};
|
||||||
use crate::{parse_geo_point, parse_geo_radius, Error, ErrorKind, IResult, Span, Token};
|
use crate::{parse_geo_point, parse_geo_radius, Error, ErrorKind, IResult, Span, Token};
|
||||||
@ -48,6 +48,35 @@ fn quoted_by(quote: char, input: Span) -> IResult<Token> {
|
|||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// word = (alphanumeric | _ | - | .)+ except for reserved keywords
|
||||||
|
pub fn word_not_keyword<'a>(input: Span<'a>) -> IResult<Token<'a>> {
|
||||||
|
let (input, word): (_, Token<'a>) =
|
||||||
|
take_while1(is_value_component)(input).map(|(s, t)| (s, t.into()))?;
|
||||||
|
if is_keyword(word.value()) {
|
||||||
|
return Err(nom::Err::Error(Error::new_from_kind(
|
||||||
|
input,
|
||||||
|
ErrorKind::ReservedKeyword(word.value().to_owned()),
|
||||||
|
)));
|
||||||
|
}
|
||||||
|
Ok((input, word))
|
||||||
|
}
|
||||||
|
|
||||||
|
// word = {tag}
|
||||||
|
pub fn word_exact<'a, 'b: 'a>(tag: &'b str) -> impl Fn(Span<'a>) -> IResult<'a, Token<'a>> {
|
||||||
|
move |input| {
|
||||||
|
let (input, word): (_, Token<'a>) =
|
||||||
|
take_while1(is_value_component)(input).map(|(s, t)| (s, t.into()))?;
|
||||||
|
if word.value() == tag {
|
||||||
|
Ok((input, word))
|
||||||
|
} else {
|
||||||
|
Err(nom::Err::Error(Error::new_from_kind(
|
||||||
|
input,
|
||||||
|
ErrorKind::InternalError(nom::error::ErrorKind::Tag),
|
||||||
|
)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// value = WS* ( word | singleQuoted | doubleQuoted) WS+
|
/// value = WS* ( word | singleQuoted | doubleQuoted) WS+
|
||||||
pub fn parse_value<'a>(input: Span<'a>) -> IResult<Token<'a>> {
|
pub fn parse_value<'a>(input: Span<'a>) -> IResult<Token<'a>> {
|
||||||
// to get better diagnostic message we are going to strip the left whitespaces from the input right now
|
// to get better diagnostic message we are going to strip the left whitespaces from the input right now
|
||||||
@ -71,19 +100,6 @@ pub fn parse_value<'a>(input: Span<'a>) -> IResult<Token<'a>> {
|
|||||||
_ => (),
|
_ => (),
|
||||||
}
|
}
|
||||||
|
|
||||||
// word = (alphanumeric | _ | - | .)+ except for reserved keywords
|
|
||||||
let word = |input: Span<'a>| -> IResult<Token<'a>> {
|
|
||||||
let (input, word): (_, Token<'a>) =
|
|
||||||
take_while1(is_value_component)(input).map(|(s, t)| (s, t.into()))?;
|
|
||||||
if is_keyword(word.value()) {
|
|
||||||
return Err(nom::Err::Error(Error::new_from_kind(
|
|
||||||
input,
|
|
||||||
ErrorKind::ReservedKeyword(word.value().to_owned()),
|
|
||||||
)));
|
|
||||||
}
|
|
||||||
Ok((input, word))
|
|
||||||
};
|
|
||||||
|
|
||||||
// this parser is only used when an error is encountered and it parse the
|
// this parser is only used when an error is encountered and it parse the
|
||||||
// largest string possible that do not contain any “language” syntax.
|
// largest string possible that do not contain any “language” syntax.
|
||||||
// If we try to parse `name = 🦀 AND language = rust` we want to return an
|
// If we try to parse `name = 🦀 AND language = rust` we want to return an
|
||||||
@ -97,7 +113,7 @@ pub fn parse_value<'a>(input: Span<'a>) -> IResult<Token<'a>> {
|
|||||||
alt((
|
alt((
|
||||||
delimited(char('\''), cut(|input| quoted_by('\'', input)), cut(char('\''))),
|
delimited(char('\''), cut(|input| quoted_by('\'', input)), cut(char('\''))),
|
||||||
delimited(char('"'), cut(|input| quoted_by('"', input)), cut(char('"'))),
|
delimited(char('"'), cut(|input| quoted_by('"', input)), cut(char('"'))),
|
||||||
word,
|
word_not_keyword,
|
||||||
)),
|
)),
|
||||||
multispace0,
|
multispace0,
|
||||||
)(input)
|
)(input)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user