From a5c916225077b0dfe8ba35cea702af730f692b1b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= <loic@meilisearch.com>
Date: Wed, 15 Jun 2022 09:14:19 +0200
Subject: [PATCH] Improve parser for NOT EXISTS filter

Allow multiple spaces between NOT and EXISTS
---
 filter-parser/src/condition.rs | 10 ++++----
 filter-parser/src/lib.rs       | 43 +++++++++++++++++++++++-----------
 filter-parser/src/value.rs     |  2 +-
 3 files changed, 36 insertions(+), 19 deletions(-)

diff --git a/filter-parser/src/condition.rs b/filter-parser/src/condition.rs
index b57d68b75..6a5ecbe0a 100644
--- a/filter-parser/src/condition.rs
+++ b/filter-parser/src/condition.rs
@@ -7,11 +7,12 @@
 
 use nom::branch::alt;
 use nom::bytes::complete::tag;
+use nom::character::complete::multispace1;
 use nom::combinator::cut;
 use nom::sequence::{terminated, tuple};
 use Condition::*;
 
-use crate::{parse_value, FilterCondition, IResult, Span, Token};
+use crate::{parse_value, ws, FilterCondition, IResult, Span, Token};
 
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub enum Condition<'a> {
@@ -62,16 +63,17 @@ pub fn parse_condition(input: Span) -> IResult<FilterCondition> {
     Ok((input, condition))
 }
 
-/// exist          = value NOT EXISTS
+/// exist          = value "EXISTS"
 pub fn parse_exists(input: Span) -> IResult<FilterCondition> {
     let (input, key) = terminated(parse_value, tag("EXISTS"))(input)?;
 
     Ok((input, FilterCondition::Condition { fid: key.into(), op: Exists }))
 }
-/// exist          = value NOT EXISTS
+/// exist          = value "NOT" WS* "EXISTS"
 pub fn parse_not_exists(input: Span) -> IResult<FilterCondition> {
-    let (input, key) = terminated(parse_value, tag("NOT EXISTS"))(input)?;
+    let (input, key) = parse_value(input)?;
 
+    let (input, _) = tuple((tag("NOT"), multispace1, tag("EXISTS")))(input)?;
     Ok((input, FilterCondition::Condition { fid: key.into(), op: NotExists }))
 }
 
diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs
index 69215798c..e40519e87 100644
--- a/filter-parser/src/lib.rs
+++ b/filter-parser/src/lib.rs
@@ -1,21 +1,21 @@
 //! BNF grammar:
 //!
 //! ```text
-//! filter         = expression ~ EOF
+//! filter         = expression EOF
 //! expression     = or
 //! or             = and ("OR" and)
 //! and            = not ("AND" not)*
 //! not            = ("NOT" not) | primary
 //! primary        = (WS* "("  expression ")" WS*) | geoRadius | condition | exists | not_exists | to
 //! condition      = value ("==" | ">" ...) value
-//! exists         = value EXISTS
-//! not_exists     = value NOT EXISTS
-//! to             = value value TO value
-//! value          = WS* ( word | singleQuoted | doubleQuoted) ~ WS*
+//! exists         = value "EXISTS"
+//! not_exists     = value "NOT" WS* "EXISTS"
+//! to             = value value "TO" value
+//! value          = WS* ( word | singleQuoted | doubleQuoted) WS*
 //! singleQuoted   = "'" .* all but quotes "'"
 //! doubleQuoted   = "\"" .* all but double quotes "\""
 //! word           = (alphanumeric | _ | - | .)+
-//! geoRadius      = WS* ~ "_geoRadius(" WS* float WS* "," WS* float WS* "," float WS* ")"
+//! geoRadius      = WS* "_geoRadius(" WS* float WS* "," WS* float WS* "," float WS* ")"
 //! ```
 //!
 //! Other BNF grammar used to handle some specific errors:
@@ -31,7 +31,7 @@
 //! field < 12 AND _geoPoint(1, 2)
 //! ```
 //!
-//! - If a user try to use a geoRadius as a value we must throw an error.
+//! - If a user try to use a geoRadius as a value we must throw an error.
 //! ```text
 //! field = _geoRadius(12, 13, 14)
 //! ```
@@ -170,7 +170,7 @@ fn ws<'a, O>(inner: impl FnMut(Span<'a>) -> IResult<O>) -> impl FnMut(Span<'a>)
     delimited(multispace0, inner, multispace0)
 }
 
-/// or             = and (~ "OR" ~ and)
+/// or             = and ("OR" and)
 fn parse_or(input: Span) -> IResult<FilterCondition> {
     let (input, lhs) = parse_and(input)?;
     // if we found a `OR` then we MUST find something next
@@ -182,7 +182,7 @@ fn parse_or(input: Span) -> IResult<FilterCondition> {
     Ok((input, expr))
 }
 
-/// and            = not (~ "AND" not)*
+/// and            = not ("AND" not)*
 fn parse_and(input: Span) -> IResult<FilterCondition> {
     let (input, lhs) = parse_not(input)?;
     // if we found a `AND` then we MUST find something next
@@ -193,14 +193,14 @@ fn parse_and(input: Span) -> IResult<FilterCondition> {
     Ok((input, expr))
 }
 
-/// not            = ("NOT" ~ not) | primary
+/// not            = ("NOT" not) | primary
 /// We can have multiple consecutive not, eg: `NOT NOT channel = mv`.
 /// If we parse a `NOT` we MUST parse something behind.
 fn parse_not(input: Span) -> IResult<FilterCondition> {
     alt((map(preceded(tag("NOT"), cut(parse_not)), |e| e.negate()), parse_primary))(input)
 }
 
-/// geoRadius      = WS* ~ "_geoRadius(float ~ "," ~ float ~ "," float)
+/// geoRadius      = WS* "_geoRadius(float "," float "," float)
 /// If we parse `_geoRadius` we MUST parse the rest of the expression.
 fn parse_geo_radius(input: Span) -> IResult<FilterCondition> {
     // we want to forbid space BEFORE the _geoRadius but not after
@@ -224,7 +224,7 @@ fn parse_geo_radius(input: Span) -> IResult<FilterCondition> {
     Ok((input, res))
 }
 
-/// geoPoint      = WS* ~ "_geoPoint(float ~ "," ~ float ~ "," float)
+/// geoPoint      = WS* "_geoPoint(float "," float "," float)
 fn parse_geo_point(input: Span) -> IResult<FilterCondition> {
     // we want to forbid space BEFORE the _geoPoint but not after
     tuple((
@@ -238,7 +238,7 @@ fn parse_geo_point(input: Span) -> IResult<FilterCondition> {
     Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::ReservedGeo("_geoPoint"))))
 }
 
-/// primary        = (WS* ~ "("  expression ")" ~ WS*) | geoRadius | condition | to
+/// primary        = (WS* "("  expression ")" WS*) | geoRadius | condition | to
 fn parse_primary(input: Span) -> IResult<FilterCondition> {
     alt((
         // if we find a first parenthesis, then we must parse an expression and find the closing parenthesis
@@ -266,7 +266,7 @@ pub fn parse_expression(input: Span) -> IResult<FilterCondition> {
     parse_or(input)
 }
 
-/// filter     = expression ~ EOF
+/// filter     = expression EOF
 pub fn parse_filter(input: Span) -> IResult<FilterCondition> {
     terminated(parse_expression, eof)(input)
 }
@@ -446,6 +446,20 @@ pub mod tests {
                     op: Condition::NotExists,
                 },
             ),
+            (
+                "NOT subscribers NOT EXISTS",
+                Fc::Condition {
+                    fid: rtok("NOT ", "subscribers"),
+                    op: Condition::Exists,
+                },
+            ),
+            (
+                "subscribers NOT   EXISTS",
+                Fc::Condition {
+                    fid: rtok("", "subscribers"),
+                    op: Condition::NotExists,
+                },
+            ),
             (
                 "subscribers 100 TO 1000",
                 Fc::Condition {
@@ -616,6 +630,7 @@ pub mod tests {
             ("channel = \"ponce", "Expression `\\\"ponce` is missing the following closing delimiter: `\"`."),
             ("channel = mv OR (followers >= 1000", "Expression `(followers >= 1000` is missing the following closing delimiter: `)`."),
             ("channel = mv OR followers >= 1000)", "Found unexpected characters at the end of the filter: `)`. You probably forgot an `OR` or an `AND` rule."),
+            ("colour NOT EXIST", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `colour NOT EXIST`."),
         ];
 
         for (input, expected) in test_case {
diff --git a/filter-parser/src/value.rs b/filter-parser/src/value.rs
index 84dd21902..18ae58ae5 100644
--- a/filter-parser/src/value.rs
+++ b/filter-parser/src/value.rs
@@ -48,7 +48,7 @@ fn quoted_by(quote: char, input: Span) -> IResult<Token> {
     ))
 }
 
-/// value          = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS*
+/// value          = WS* ( word | singleQuoted | doubleQuoted) WS*
 pub fn parse_value<'a>(input: Span<'a>) -> IResult<Token<'a>> {
     // to get better diagnostic message we are going to strip the left whitespaces from the input right now
     let (input, _) = take_while(char::is_whitespace)(input)?;