Rename the filter_parser crate into filter-parser

Co-authored-by: Clément Renault <clement@meilisearch.com>
This commit is contained in:
Tamo 2021-11-09 16:16:28 +01:00 committed by Irevoire
parent 0ea0146e04
commit f28600031d
No known key found for this signature in database
GPG key ID: 7A6A970C96104F1B
55 changed files with 5 additions and 8 deletions

10
filter-parser/Cargo.toml Normal file
View file

@ -0,0 +1,10 @@
[package]
name = "filter-parser"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
nom = "7.0.0"
nom_locate = "4.0.0"

37
filter-parser/README.md Normal file
View file

@ -0,0 +1,37 @@
# Filter parser
This workspace is dedicated to the parsing of the MeiliSearch filters.
Most of the code and explanation are in the [`lib.rs`](./src/lib.rs). Especially, the BNF of the filters at the top of this file.
The parser use [nom](https://docs.rs/nom/) to do most of its work and [nom-locate](https://docs.rs/nom_locate/) to keep track of what we were doing when we encountered an error.
## Cli
A simple main is provided to quick-test if a filter can be parsed or not without bringing milli.
It takes one argument and try to parse it.
```
cargo run -- 'field = value' # success
cargo run -- 'field = "doggo' # error => missing closing delimiter "
```
## Fuzz
The workspace have been fuzzed with [cargo-fuzz](https://rust-fuzz.github.io/book/cargo-fuzz.html).
### Setup
You'll need rust-nightly to execute the fuzzer.
```
cargo install cargo-fuzz
```
### Run
When the filter parser is executed by the fuzzer it's triggering a stackoverflow really fast. We can avoid this problem by limiting the `max_len` of [libfuzzer](https://llvm.org/docs/LibFuzzer.html) at 500 characters.
```
cargo fuzz run parse -- -max_len=500
```
## What to do if you find a bug in the parser
- Write a test at the end of the [`lib.rs`](./src/lib.rs) to ensure it never happens again.
- Add a file in [the corpus directory](./fuzz/corpus/parse/) with your filter to help the fuzzer finding new bug. Since this directory is going to be heavily polluted by the execution of the fuzzer it's in the gitignore and you'll need to force push your new test.
Since this directory is going to be heavily polluted by the execution of the fuzzer it's in the gitignore and you'll need to force add your new test.

View file

@ -0,0 +1,25 @@
[package]
name = "filter-parser-fuzz"
version = "0.0.0"
authors = ["Automatically generated"]
publish = false
edition = "2018"
[package.metadata]
cargo-fuzz = true
[dependencies]
libfuzzer-sys = "0.4"
[dependencies.filter-parser]
path = ".."
# Prevent this from interfering with workspaces
[workspace]
members = ["."]
[[bin]]
name = "parse"
path = "fuzz_targets/parse.rs"
test = false
doc = false

View file

@ -0,0 +1 @@
channel = Ponce

View file

@ -0,0 +1 @@
channel != ponce

View file

@ -0,0 +1 @@
NOT channel = ponce

View file

@ -0,0 +1 @@
subscribers < 1000

View file

@ -0,0 +1 @@
subscribers > 1000

View file

@ -0,0 +1 @@
subscribers <= 1000

View file

@ -0,0 +1 @@
subscribers >= 1000

View file

@ -0,0 +1 @@
NOT subscribers < 1000

View file

@ -0,0 +1 @@
NOT subscribers > 1000

View file

@ -0,0 +1 @@
NOT subscribers <= 1000

View file

@ -0,0 +1 @@
NOT subscribers >= 1000

View file

@ -0,0 +1 @@
subscribers = 12

View file

@ -0,0 +1 @@
subscribers 100 TO 1000

View file

@ -0,0 +1 @@
NOT subscribers 100 TO 1000

View file

@ -0,0 +1 @@
_geoRadius(12, 13, 14)

View file

@ -0,0 +1 @@
NOT _geoRadius(12, 13, 14)

View file

@ -0,0 +1 @@
channel = ponce AND 'dog race' != 'bernese mountain'

View file

@ -0,0 +1 @@
channel = ponce OR 'dog race' != 'bernese mountain'

View file

@ -0,0 +1 @@
channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000

View file

@ -0,0 +1 @@
channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > 1000 )

View file

@ -0,0 +1 @@
(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, 14)

View file

@ -0,0 +1 @@
channel = Ponce = 12

View file

@ -0,0 +1 @@
channel = 'Mister Mv'

View file

@ -0,0 +1 @@
channel =

View file

@ -0,0 +1 @@
channel = 🐻

View file

@ -0,0 +1 @@
OR

View file

@ -0,0 +1 @@
AND

View file

@ -0,0 +1 @@
channel Ponce

View file

@ -0,0 +1 @@
channel = Ponce OR

View file

@ -0,0 +1 @@
_geoRadius

View file

@ -0,0 +1 @@
_geoRadius = 12

View file

@ -0,0 +1 @@
_geoPoint(12, 13, 14)

View file

@ -0,0 +1 @@
position <= _geoPoint(12, 13, 14)

View file

@ -0,0 +1 @@
channel = "Mister Mv"

View file

@ -0,0 +1 @@
position <= _geoRadius(12, 13, 14)

View file

@ -0,0 +1 @@
channel = 'ponce

View file

@ -0,0 +1 @@
channel = "ponce

View file

@ -0,0 +1 @@
channel = mv OR (followers >= 1000

View file

@ -0,0 +1 @@
'dog race' = Borzoi

View file

@ -0,0 +1 @@
"dog race" = Chusky

View file

@ -0,0 +1 @@
"dog race" = "Bernese Mountain"

View file

@ -0,0 +1 @@
'dog race' = 'Bernese Mountain'

View file

@ -0,0 +1 @@
"dog race" = 'Bernese Mountain'

View file

@ -0,0 +1,18 @@
#![no_main]
use filter_parser::{ErrorKind, FilterCondition};
use libfuzzer_sys::fuzz_target;
fuzz_target!(|data: &[u8]| {
if let Ok(s) = std::str::from_utf8(data) {
// When we are fuzzing the parser we can get a stack overflow very easily.
// But since this doesn't happens with a normal build we are just going to limit the fuzzer to 500 characters.
if s.len() < 500 {
match FilterCondition::parse(s) {
Err(e) if matches!(e.kind(), ErrorKind::InternalError(_)) => {
panic!("Found an internal error: `{:?}`", e)
}
_ => (),
}
}
}
});

View file

@ -0,0 +1,73 @@
//! BNF grammar:
//!
//! ```text
//! condition = value ("==" | ">" ...) value
//! to = value value TO value
//! ```
use nom::branch::alt;
use nom::bytes::complete::tag;
use nom::combinator::cut;
use nom::sequence::tuple;
use Condition::*;
use crate::{parse_value, FilterCondition, IResult, Span, Token};
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Condition<'a> {
GreaterThan(Token<'a>),
GreaterThanOrEqual(Token<'a>),
Equal(Token<'a>),
NotEqual(Token<'a>),
LowerThan(Token<'a>),
LowerThanOrEqual(Token<'a>),
Between { from: Token<'a>, to: Token<'a> },
}
impl<'a> Condition<'a> {
/// This method can return two operations in case it must express
/// an OR operation for the between case (i.e. `TO`).
pub fn negate(self) -> (Self, Option<Self>) {
match self {
GreaterThan(n) => (LowerThanOrEqual(n), None),
GreaterThanOrEqual(n) => (LowerThan(n), None),
Equal(s) => (NotEqual(s), None),
NotEqual(s) => (Equal(s), None),
LowerThan(n) => (GreaterThanOrEqual(n), None),
LowerThanOrEqual(n) => (GreaterThan(n), None),
Between { from, to } => (LowerThan(from), Some(GreaterThan(to))),
}
}
}
/// condition = value ("==" | ">" ...) value
pub fn parse_condition(input: Span) -> IResult<FilterCondition> {
let operator = alt((tag("<="), tag(">="), tag("!="), tag("<"), tag(">"), tag("=")));
let (input, (fid, op, value)) = tuple((parse_value, operator, cut(parse_value)))(input)?;
let condition = match *op.fragment() {
"=" => FilterCondition::Condition { fid, op: Equal(value) },
"!=" => FilterCondition::Condition { fid, op: NotEqual(value) },
">" => FilterCondition::Condition { fid, op: GreaterThan(value) },
"<" => FilterCondition::Condition { fid, op: LowerThan(value) },
"<=" => FilterCondition::Condition { fid, op: LowerThanOrEqual(value) },
">=" => FilterCondition::Condition { fid, op: GreaterThanOrEqual(value) },
_ => unreachable!(),
};
Ok((input, condition))
}
/// to = value value TO value
pub fn parse_to(input: Span) -> IResult<FilterCondition> {
let (input, (key, from, _, to)) =
tuple((parse_value, parse_value, tag("TO"), cut(parse_value)))(input)?;
Ok((
input,
FilterCondition::Condition {
fid: key.into(),
op: Between { from: from.into(), to: to.into() },
},
))
}

158
filter-parser/src/error.rs Normal file
View file

@ -0,0 +1,158 @@
use std::fmt::Display;
use nom::error::{self, ParseError};
use nom::Parser;
use crate::{IResult, Span};
pub trait NomErrorExt<E> {
fn is_failure(&self) -> bool;
fn map_err<O: FnOnce(E) -> E>(self, op: O) -> nom::Err<E>;
fn map_fail<O: FnOnce(E) -> E>(self, op: O) -> nom::Err<E>;
}
impl<E> NomErrorExt<E> for nom::Err<E> {
fn is_failure(&self) -> bool {
matches!(self, Self::Failure(_))
}
fn map_err<O: FnOnce(E) -> E>(self, op: O) -> nom::Err<E> {
match self {
e @ Self::Failure(_) => e,
e => e.map(|e| op(e)),
}
}
fn map_fail<O: FnOnce(E) -> E>(self, op: O) -> nom::Err<E> {
match self {
e @ Self::Error(_) => e,
e => e.map(|e| op(e)),
}
}
}
/// cut a parser and map the error
pub fn cut_with_err<'a, O>(
mut parser: impl FnMut(Span<'a>) -> IResult<O>,
mut with: impl FnMut(Error<'a>) -> Error<'a>,
) -> impl FnMut(Span<'a>) -> IResult<O> {
move |input| match parser.parse(input) {
Err(nom::Err::Error(e)) => Err(nom::Err::Failure(with(e))),
rest => rest,
}
}
#[derive(Debug)]
pub struct Error<'a> {
context: Span<'a>,
kind: ErrorKind<'a>,
}
#[derive(Debug)]
pub enum ErrorKind<'a> {
ReservedGeo(&'a str),
Geo,
MisusedGeo,
InvalidPrimary,
ExpectedEof,
ExpectedValue,
MissingClosingDelimiter(char),
Char(char),
InternalError(error::ErrorKind),
External(String),
}
impl<'a> Error<'a> {
pub fn kind(&self) -> &ErrorKind<'a> {
&self.kind
}
pub fn context(&self) -> &Span<'a> {
&self.context
}
pub fn new_from_kind(context: Span<'a>, kind: ErrorKind<'a>) -> Self {
Self { context, kind }
}
pub fn new_from_external(context: Span<'a>, error: impl std::error::Error) -> Self {
Self::new_from_kind(context, ErrorKind::External(error.to_string()))
}
pub fn char(self) -> char {
match self.kind {
ErrorKind::Char(c) => c,
_ => panic!("Internal filter parser error"),
}
}
}
impl<'a> ParseError<Span<'a>> for Error<'a> {
fn from_error_kind(input: Span<'a>, kind: error::ErrorKind) -> Self {
let kind = match kind {
error::ErrorKind::Eof => ErrorKind::ExpectedEof,
kind => ErrorKind::InternalError(kind),
};
Self { context: input, kind }
}
fn append(_input: Span<'a>, _kind: error::ErrorKind, other: Self) -> Self {
other
}
fn from_char(input: Span<'a>, c: char) -> Self {
Self { context: input, kind: ErrorKind::Char(c) }
}
}
impl<'a> Display for Error<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let input = self.context.fragment();
// When printing our error message we want to escape all `\n` to be sure we keep our format with the
// first line being the diagnostic and the second line being the incriminated filter.
let escaped_input = input.escape_debug();
match self.kind {
ErrorKind::ExpectedValue if input.trim().is_empty() => {
writeln!(f, "Was expecting a value but instead got nothing.")?
}
ErrorKind::MissingClosingDelimiter(c) => {
writeln!(f, "Expression `{}` is missing the following closing delimiter: `{}`.", escaped_input, c)?
}
ErrorKind::ExpectedValue => {
writeln!(f, "Was expecting a value but instead got `{}`.", escaped_input)?
}
ErrorKind::InvalidPrimary if input.trim().is_empty() => {
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` but instead got nothing.")?
}
ErrorKind::InvalidPrimary => {
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` at `{}`.", escaped_input)?
}
ErrorKind::ExpectedEof => {
writeln!(f, "Found unexpected characters at the end of the filter: `{}`. You probably forgot an `OR` or an `AND` rule.", escaped_input)?
}
ErrorKind::Geo => {
writeln!(f, "The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`.")?
}
ErrorKind::ReservedGeo(name) => {
writeln!(f, "`{}` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance) built-in rule to filter on `_geo` coordinates.", name.escape_debug())?
}
ErrorKind::MisusedGeo => {
writeln!(f, "The `_geoRadius` filter is an operation and can't be used as a value.")?
}
ErrorKind::Char(c) => {
panic!("Tried to display a char error with `{}`", c)
}
ErrorKind::InternalError(kind) => writeln!(
f,
"Encountered an internal `{:?}` error while parsing your filter. Please fill an issue", kind
)?,
ErrorKind::External(ref error) => writeln!(f, "{}", error)?,
}
let base_column = self.context.get_utf8_column();
let size = self.context.fragment().chars().count();
write!(f, "{}:{} {}", base_column, base_column + size, self.context.extra)
}
}

589
filter-parser/src/lib.rs Normal file
View file

@ -0,0 +1,589 @@
//! BNF grammar:
//!
//! ```text
//! filter = expression ~ EOF
//! expression = or
//! or = and (~ "OR" ~ and)
//! and = not (~ "AND" not)*
//! not = ("NOT" | "!") not | primary
//! primary = (WS* ~ "(" expression ")" ~ WS*) | geoRadius | condition | to
//! condition = value ("==" | ">" ...) value
//! to = value value TO value
//! value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS*
//! singleQuoted = "'" .* all but quotes "'"
//! doubleQuoted = "\"" .* all but double quotes "\""
//! word = (alphanumeric | _ | - | .)+
//! geoRadius = WS* ~ "_geoRadius(" ~ WS* ~ float ~ WS* ~ "," ~ WS* ~ float ~ WS* ~ "," float ~ WS* ~ ")"
//! ```
//!
//! Other BNF grammar used to handle some specific errors:
//! ```text
//! geoPoint = WS* ~ "_geoPoint(" ~ (float ~ ",")* ~ ")"
//! ```
//!
//! Specific errors:
//! ================
//! - If a user try to use a geoPoint, as a primary OR as a value we must throw an error.
//! ```text
//! field = _geoPoint(12, 13, 14)
//! field < 12 AND _geoPoint(1, 2)
//! ```
//!
//! - If a user try to use a geoRadius as a value we must throw an error.
//! ```text
//! field = _geoRadius(12, 13, 14)
//! ```
//!
mod condition;
mod error;
mod value;
use std::fmt::Debug;
use std::ops::Deref;
use std::str::FromStr;
pub use condition::{parse_condition, parse_to, Condition};
use error::{cut_with_err, NomErrorExt};
pub use error::{Error, ErrorKind};
use nom::branch::alt;
use nom::bytes::complete::tag;
use nom::character::complete::{char, multispace0};
use nom::combinator::{cut, eof, map};
use nom::multi::{many0, separated_list1};
use nom::number::complete::recognize_float;
use nom::sequence::{delimited, preceded, terminated, tuple};
use nom::Finish;
use nom_locate::LocatedSpan;
pub(crate) use value::parse_value;
pub type Span<'a> = LocatedSpan<&'a str, &'a str>;
type IResult<'a, Ret> = nom::IResult<Span<'a>, Ret, Error<'a>>;
#[derive(Debug, Clone, Eq)]
pub struct Token<'a>(Span<'a>);
impl<'a> Deref for Token<'a> {
type Target = &'a str;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl<'a> PartialEq for Token<'a> {
fn eq(&self, other: &Self) -> bool {
self.0.fragment() == other.0.fragment()
}
}
impl<'a> Token<'a> {
pub fn new(position: Span<'a>) -> Self {
Self(position)
}
pub fn as_external_error(&self, error: impl std::error::Error) -> Error<'a> {
Error::new_from_external(self.0, error)
}
pub fn parse<T>(&self) -> Result<T, Error>
where
T: FromStr,
T::Err: std::error::Error,
{
self.0.parse().map_err(|e| self.as_external_error(e))
}
}
impl<'a> From<Span<'a>> for Token<'a> {
fn from(span: Span<'a>) -> Self {
Self(span)
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum FilterCondition<'a> {
Condition { fid: Token<'a>, op: Condition<'a> },
Or(Box<Self>, Box<Self>),
And(Box<Self>, Box<Self>),
GeoLowerThan { point: [Token<'a>; 2], radius: Token<'a> },
GeoGreaterThan { point: [Token<'a>; 2], radius: Token<'a> },
Empty,
}
impl<'a> FilterCondition<'a> {
pub fn negate(self) -> FilterCondition<'a> {
use FilterCondition::*;
match self {
Condition { fid, op } => match op.negate() {
(op, None) => Condition { fid, op },
(a, Some(b)) => Or(
Condition { fid: fid.clone(), op: a }.into(),
Condition { fid, op: b }.into(),
),
},
Or(a, b) => And(a.negate().into(), b.negate().into()),
And(a, b) => Or(a.negate().into(), b.negate().into()),
Empty => Empty,
GeoLowerThan { point, radius } => GeoGreaterThan { point, radius },
GeoGreaterThan { point, radius } => GeoLowerThan { point, radius },
}
}
pub fn parse(input: &'a str) -> Result<Self, Error> {
if input.trim().is_empty() {
return Ok(Self::Empty);
}
let span = Span::new_extra(input, input);
parse_filter(span).finish().map(|(_rem, output)| output)
}
}
/// remove OPTIONAL whitespaces before AND after the the provided parser.
fn ws<'a, O>(inner: impl FnMut(Span<'a>) -> IResult<O>) -> impl FnMut(Span<'a>) -> IResult<O> {
delimited(multispace0, inner, multispace0)
}
/// or = and (~ "OR" ~ and)
fn parse_or(input: Span) -> IResult<FilterCondition> {
let (input, lhs) = parse_and(input)?;
// if we found a `OR` then we MUST find something next
let (input, ors) = many0(preceded(ws(tag("OR")), cut(parse_and)))(input)?;
let expr = ors
.into_iter()
.fold(lhs, |acc, branch| FilterCondition::Or(Box::new(acc), Box::new(branch)));
Ok((input, expr))
}
/// and = not (~ "AND" not)*
fn parse_and(input: Span) -> IResult<FilterCondition> {
let (input, lhs) = parse_not(input)?;
// if we found a `AND` then we MUST find something next
let (input, ors) = many0(preceded(ws(tag("AND")), cut(parse_not)))(input)?;
let expr = ors
.into_iter()
.fold(lhs, |acc, branch| FilterCondition::And(Box::new(acc), Box::new(branch)));
Ok((input, expr))
}
/// not = ("NOT" | "!") not | primary
/// We can have multiple consecutive not, eg: `NOT NOT channel = mv`.
/// If we parse a `NOT` or `!` we MUST parse something behind.
fn parse_not(input: Span) -> IResult<FilterCondition> {
alt((map(preceded(alt((tag("!"), tag("NOT"))), cut(parse_not)), |e| e.negate()), parse_primary))(
input,
)
}
/// geoRadius = WS* ~ "_geoRadius(float ~ "," ~ float ~ "," float)
/// If we parse `_geoRadius` we MUST parse the rest of the expression.
fn parse_geo_radius(input: Span) -> IResult<FilterCondition> {
// we want to forbid space BEFORE the _geoRadius but not after
let parsed = preceded(
tuple((multispace0, tag("_geoRadius"))),
// if we were able to parse `_geoRadius` and can't parse the rest of the input we returns a failure
cut(delimited(char('('), separated_list1(tag(","), ws(recognize_float)), char(')'))),
)(input)
.map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::Geo)));
let (input, args) = parsed?;
if args.len() != 3 {
return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::Geo)));
}
let res = FilterCondition::GeoLowerThan {
point: [args[0].into(), args[1].into()],
radius: args[2].into(),
};
Ok((input, res))
}
/// geoPoint = WS* ~ "_geoPoint(float ~ "," ~ float ~ "," float)
fn parse_geo_point(input: Span) -> IResult<FilterCondition> {
// we want to forbid space BEFORE the _geoPoint but not after
tuple((
multispace0,
tag("_geoPoint"),
// if we were able to parse `_geoPoint` we are going to return a Failure whatever happens next.
cut(delimited(char('('), separated_list1(tag(","), ws(|c| recognize_float(c))), char(')'))),
))(input)
.map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::ReservedGeo("_geoPoint"))))?;
// if we succeeded we still returns a Failure because geoPoints are not allowed
Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::ReservedGeo("_geoPoint"))))
}
/// primary = (WS* ~ "(" expression ")" ~ WS*) | geoRadius | condition | to
fn parse_primary(input: Span) -> IResult<FilterCondition> {
alt((
// if we find a first parenthesis, then we must parse an expression and find the closing parenthesis
delimited(
ws(char('(')),
cut(parse_expression),
cut_with_err(ws(char(')')), |c| {
Error::new_from_kind(input, ErrorKind::MissingClosingDelimiter(c.char()))
}),
),
parse_geo_radius,
parse_condition,
parse_to,
// the next lines are only for error handling and are written at the end to have the less possible performance impact
parse_geo_point,
))(input)
// if the inner parsers did not match enough information to return an accurate error
.map_err(|e| e.map_err(|_| Error::new_from_kind(input, ErrorKind::InvalidPrimary)))
}
/// expression = or
pub fn parse_expression(input: Span) -> IResult<FilterCondition> {
parse_or(input)
}
/// filter = expression ~ EOF
pub fn parse_filter(input: Span) -> IResult<FilterCondition> {
terminated(parse_expression, eof)(input)
}
#[cfg(test)]
pub mod tests {
use super::*;
/// Create a raw [Token]. You must specify the string that appear BEFORE your element followed by your element
pub fn rtok<'a>(before: &'a str, value: &'a str) -> Token<'a> {
// if the string is empty we still need to return 1 for the line number
let lines = before.is_empty().then(|| 1).unwrap_or_else(|| before.lines().count());
let offset = before.chars().count();
// the extra field is not checked in the tests so we can set it to nothing
unsafe { Span::new_from_raw_offset(offset, lines as u32, value, "") }.into()
}
#[test]
fn parse() {
use FilterCondition as Fc;
let test_case = [
// simple test
(
"channel = Ponce",
Fc::Condition {
fid: rtok("", "channel"),
op: Condition::Equal(rtok("channel = ", "Ponce")),
},
),
(
"subscribers = 12",
Fc::Condition {
fid: rtok("", "subscribers"),
op: Condition::Equal(rtok("subscribers = ", "12")),
},
),
// test all the quotes and simple quotes
(
"channel = 'Mister Mv'",
Fc::Condition {
fid: rtok("", "channel"),
op: Condition::Equal(rtok("channel = '", "Mister Mv")),
},
),
(
"channel = \"Mister Mv\"",
Fc::Condition {
fid: rtok("", "channel"),
op: Condition::Equal(rtok("channel = \"", "Mister Mv")),
},
),
(
"'dog race' = Borzoi",
Fc::Condition {
fid: rtok("'", "dog race"),
op: Condition::Equal(rtok("'dog race' = ", "Borzoi")),
},
),
(
"\"dog race\" = Chusky",
Fc::Condition {
fid: rtok("\"", "dog race"),
op: Condition::Equal(rtok("\"dog race\" = ", "Chusky")),
},
),
(
"\"dog race\" = \"Bernese Mountain\"",
Fc::Condition {
fid: rtok("\"", "dog race"),
op: Condition::Equal(rtok("\"dog race\" = \"", "Bernese Mountain")),
},
),
(
"'dog race' = 'Bernese Mountain'",
Fc::Condition {
fid: rtok("'", "dog race"),
op: Condition::Equal(rtok("'dog race' = '", "Bernese Mountain")),
},
),
(
"\"dog race\" = 'Bernese Mountain'",
Fc::Condition {
fid: rtok("\"", "dog race"),
op: Condition::Equal(rtok("\"dog race\" = \"", "Bernese Mountain")),
},
),
// test all the operators
(
"channel != ponce",
Fc::Condition {
fid: rtok("", "channel"),
op: Condition::NotEqual(rtok("channel != ", "ponce")),
},
),
(
"NOT channel = ponce",
Fc::Condition {
fid: rtok("NOT ", "channel"),
op: Condition::NotEqual(rtok("NOT channel = ", "ponce")),
},
),
(
"subscribers < 1000",
Fc::Condition {
fid: rtok("", "subscribers"),
op: Condition::LowerThan(rtok("subscribers < ", "1000")),
},
),
(
"subscribers > 1000",
Fc::Condition {
fid: rtok("", "subscribers"),
op: Condition::GreaterThan(rtok("subscribers > ", "1000")),
},
),
(
"subscribers <= 1000",
Fc::Condition {
fid: rtok("", "subscribers"),
op: Condition::LowerThanOrEqual(rtok("subscribers <= ", "1000")),
},
),
(
"subscribers >= 1000",
Fc::Condition {
fid: rtok("", "subscribers"),
op: Condition::GreaterThanOrEqual(rtok("subscribers >= ", "1000")),
},
),
(
"NOT subscribers < 1000",
Fc::Condition {
fid: rtok("NOT ", "subscribers"),
op: Condition::GreaterThanOrEqual(rtok("NOT subscribers < ", "1000")),
},
),
(
"NOT subscribers > 1000",
Fc::Condition {
fid: rtok("NOT ", "subscribers"),
op: Condition::LowerThanOrEqual(rtok("NOT subscribers > ", "1000")),
},
),
(
"NOT subscribers <= 1000",
Fc::Condition {
fid: rtok("NOT ", "subscribers"),
op: Condition::GreaterThan(rtok("NOT subscribers <= ", "1000")),
},
),
(
"NOT subscribers >= 1000",
Fc::Condition {
fid: rtok("NOT ", "subscribers"),
op: Condition::LowerThan(rtok("NOT subscribers >= ", "1000")),
},
),
(
"subscribers 100 TO 1000",
Fc::Condition {
fid: rtok("", "subscribers"),
op: Condition::Between {
from: rtok("subscribers ", "100"),
to: rtok("subscribers 100 TO ", "1000"),
},
},
),
(
"NOT subscribers 100 TO 1000",
Fc::Or(
Fc::Condition {
fid: rtok("NOT ", "subscribers"),
op: Condition::LowerThan(rtok("NOT subscribers ", "100")),
}
.into(),
Fc::Condition {
fid: rtok("NOT ", "subscribers"),
op: Condition::GreaterThan(rtok("NOT subscribers 100 TO ", "1000")),
}
.into(),
),
),
(
"_geoRadius(12, 13, 14)",
Fc::GeoLowerThan {
point: [rtok("_geoRadius(", "12"), rtok("_geoRadius(12, ", "13")],
radius: rtok("_geoRadius(12, 13, ", "14"),
},
),
(
"NOT _geoRadius(12, 13, 14)",
Fc::GeoGreaterThan {
point: [rtok("NOT _geoRadius(", "12"), rtok("NOT _geoRadius(12, ", "13")],
radius: rtok("NOT _geoRadius(12, 13, ", "14"),
},
),
// test simple `or` and `and`
(
"channel = ponce AND 'dog race' != 'bernese mountain'",
Fc::And(
Fc::Condition {
fid: rtok("", "channel"),
op: Condition::Equal(rtok("channel = ", "ponce")),
}
.into(),
Fc::Condition {
fid: rtok("channel = ponce AND '", "dog race"),
op: Condition::NotEqual(rtok(
"channel = ponce AND 'dog race' != '",
"bernese mountain",
)),
}
.into(),
),
),
(
"channel = ponce OR 'dog race' != 'bernese mountain'",
Fc::Or(
Fc::Condition {
fid: rtok("", "channel"),
op: Condition::Equal(rtok("channel = ", "ponce")),
}
.into(),
Fc::Condition {
fid: rtok("channel = ponce OR '", "dog race"),
op: Condition::NotEqual(rtok(
"channel = ponce OR 'dog race' != '",
"bernese mountain",
)),
}
.into(),
),
),
(
"channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000",
Fc::Or(
Fc::And(
Fc::Condition {
fid: rtok("", "channel"),
op: Condition::Equal(rtok("channel = ", "ponce")),
}
.into(),
Fc::Condition {
fid: rtok("channel = ponce AND '", "dog race"),
op: Condition::NotEqual(rtok(
"channel = ponce AND 'dog race' != '",
"bernese mountain",
)),
}
.into(),
)
.into(),
Fc::Condition {
fid: rtok(
"channel = ponce AND 'dog race' != 'bernese mountain' OR ",
"subscribers",
),
op: Condition::GreaterThan(rtok(
"channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > ",
"1000",
)),
}
.into(),
),
),
// test parenthesis
(
"channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > 1000 )",
Fc::And(
Fc::Condition { fid: rtok("", "channel"), op: Condition::Equal(rtok("channel = ", "ponce")) }.into(),
Fc::Or(
Fc::Condition { fid: rtok("channel = ponce AND ( '", "dog race"), op: Condition::NotEqual(rtok("channel = ponce AND ( 'dog race' != '", "bernese mountain"))}.into(),
Fc::Condition { fid: rtok("channel = ponce AND ( 'dog race' != 'bernese mountain' OR ", "subscribers"), op: Condition::GreaterThan(rtok("channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > ", "1000")) }.into(),
).into()),
),
(
"(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, 14)",
Fc::And(
Fc::Or(
Fc::And(
Fc::Condition { fid: rtok("(", "channel"), op: Condition::Equal(rtok("(channel = ", "ponce")) }.into(),
Fc::Condition { fid: rtok("(channel = ponce AND '", "dog race"), op: Condition::NotEqual(rtok("(channel = ponce AND 'dog race' != '", "bernese mountain")) }.into(),
).into(),
Fc::Condition { fid: rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR ", "subscribers"), op: Condition::GreaterThan(rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > ", "1000")) }.into(),
).into(),
Fc::GeoLowerThan { point: [rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(", "12"), rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, ", "13")], radius: rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, ", "14") }.into()
)
)
];
for (input, expected) in test_case {
let result = Fc::parse(input);
assert!(
result.is_ok(),
"Filter `{:?}` was supposed to be parsed but failed with the following error: `{}`",
expected,
result.unwrap_err()
);
let filter = result.unwrap();
assert_eq!(filter, expected, "Filter `{}` failed.", input);
}
}
#[test]
fn error() {
use FilterCondition as Fc;
let test_case = [
// simple test
("channel = Ponce = 12", "Found unexpected characters at the end of the filter: `= 12`. You probably forgot an `OR` or an `AND` rule."),
("channel = ", "Was expecting a value but instead got nothing."),
("channel = 🐻", "Was expecting a value but instead got `🐻`."),
("channel = 🐻 AND followers < 100", "Was expecting a value but instead got `🐻`."),
("OR", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` at `OR`."),
("AND", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` at `AND`."),
("channel Ponce", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` at `channel Ponce`."),
("channel = Ponce OR", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` but instead got nothing."),
("_geoRadius", "The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`."),
("_geoRadius = 12", "The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`."),
("_geoPoint(12, 13, 14)", "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance) built-in rule to filter on `_geo` coordinates."),
("position <= _geoPoint(12, 13, 14)", "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance) built-in rule to filter on `_geo` coordinates."),
("position <= _geoRadius(12, 13, 14)", "The `_geoRadius` filter is an operation and can't be used as a value."),
("channel = 'ponce", "Expression `'ponce` is missing the following closing delimiter: `'`."),
("channel = \"ponce", "Expression `\"ponce` is missing the following closing delimiter: `\"`."),
("channel = mv OR (followers >= 1000", "Expression `(followers >= 1000` is missing the following closing delimiter: `)`."),
("channel = mv OR followers >= 1000)", "Found unexpected characters at the end of the filter: `)`. You probably forgot an `OR` or an `AND` rule."),
];
for (input, expected) in test_case {
let result = Fc::parse(input);
assert!(
result.is_err(),
"Filter `{}` wasn't supposed to be parsed but it did with the following result: `{:?}`",
input,
result.unwrap()
);
let filter = result.unwrap_err().to_string();
assert!(filter.starts_with(expected), "Filter `{:?}` was supposed to return the following error:\n{}\n, but instead returned\n{}\n.", input, expected, filter);
}
}
}

16
filter-parser/src/main.rs Normal file
View file

@ -0,0 +1,16 @@
fn main() {
let input = std::env::args().nth(1).expect("You must provide a filter to test");
println!("Trying to execute the following filter:\n{}\n", input);
match filter_parser::FilterCondition::parse(&input) {
Ok(filter) => {
println!("✅ Valid filter");
println!("{:#?}", filter);
}
Err(e) => {
println!("❎ Invalid filter");
println!("{}", e.to_string());
}
}
}

147
filter-parser/src/value.rs Normal file
View file

@ -0,0 +1,147 @@
use nom::branch::alt;
use nom::bytes::complete::{take_till, take_while, take_while1};
use nom::character::complete::{char, multispace0};
use nom::combinator::cut;
use nom::sequence::{delimited, terminated};
use crate::error::NomErrorExt;
use crate::{parse_geo_point, parse_geo_radius, Error, ErrorKind, IResult, Span, Token};
/// value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS*
pub fn parse_value(input: Span) -> IResult<Token> {
// to get better diagnostic message we are going to strip the left whitespaces from the input right now
let (input, _) = take_while(char::is_whitespace)(input)?;
// then, we want to check if the user is misusing a geo expression
// This expression cant finish without error.
// We want to return an error in case of failure.
if let Err(err) = parse_geo_point(input) {
if err.is_failure() {
return Err(err);
}
}
match parse_geo_radius(input) {
Ok(_) => return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::MisusedGeo))),
// if we encountered a failure it means the user badly wrote a _geoRadius filter.
// But instead of showing him how to fix his syntax we are going to tell him he should not use this filter as a value.
Err(e) if e.is_failure() => {
return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::MisusedGeo)))
}
_ => (),
}
// singleQuoted = "'" .* all but quotes "'"
let simple_quoted = take_till(|c: char| c == '\'');
// doubleQuoted = "\"" (word | spaces)* "\""
let double_quoted = take_till(|c: char| c == '"');
// word = (alphanumeric | _ | - | .)+
let word = take_while1(is_value_component);
// this parser is only used when an error is encountered and it parse the
// largest string possible that do not contain any “language” syntax.
// If we try to parse `name = 🦀 AND language = rust` we want to return an
// error saying we could not parse `🦀`. Not that no value were found or that
// we could note parse `🦀 AND language = rust`.
// we want to remove the space before entering the alt because if we don't,
// when we create the errors from the output of the alt we have spaces everywhere
let error_word = take_till::<_, _, Error>(is_syntax_component);
terminated(
alt((
delimited(char('\''), cut(simple_quoted), cut(char('\''))),
delimited(char('"'), cut(double_quoted), cut(char('"'))),
word,
)),
multispace0,
)(input)
.map(|(s, t)| (s, t.into()))
// if we found nothing in the alt it means the user specified something that was not recognized as a value
.map_err(|e: nom::Err<Error>| {
e.map_err(|_| Error::new_from_kind(error_word(input).unwrap().1, ErrorKind::ExpectedValue))
})
// if we found encountered a failure it means the user really tried to input a value, but had an unmatched quote
.map_err(|e| {
e.map_fail(|c| Error::new_from_kind(input, ErrorKind::MissingClosingDelimiter(c.char())))
})
}
fn is_value_component(c: char) -> bool {
c.is_alphanumeric() || ['_', '-', '.'].contains(&c)
}
fn is_syntax_component(c: char) -> bool {
c.is_whitespace() || ['(', ')', '=', '<', '>', '!'].contains(&c)
}
#[cfg(test)]
pub mod test {
use nom::Finish;
use super::*;
use crate::tests::rtok;
#[test]
fn name() {
let test_case = [
("channel", rtok("", "channel")),
(".private", rtok("", ".private")),
("I-love-kebab", rtok("", "I-love-kebab")),
("but_snakes_is_also_good", rtok("", "but_snakes_is_also_good")),
("parens(", rtok("", "parens")),
("parens)", rtok("", "parens")),
("not!", rtok("", "not")),
(" channel", rtok(" ", "channel")),
("channel ", rtok("", "channel")),
(" channel ", rtok(" ", "channel")),
("'channel'", rtok("'", "channel")),
("\"channel\"", rtok("\"", "channel")),
("'cha)nnel'", rtok("'", "cha)nnel")),
("'cha\"nnel'", rtok("'", "cha\"nnel")),
("\"cha'nnel\"", rtok("\"", "cha'nnel")),
("\" some spaces \"", rtok("\"", " some spaces ")),
("\"cha'nnel\"", rtok("'", "cha'nnel")),
("\"cha'nnel\"", rtok("'", "cha'nnel")),
("I'm tamo", rtok("'m tamo", "I")),
];
for (input, expected) in test_case {
let input = Span::new_extra(input, input);
let result = parse_value(input);
assert!(
result.is_ok(),
"Filter `{:?}` was supposed to be parsed but failed with the following error: `{}`",
expected,
result.unwrap_err()
);
let value = result.unwrap().1;
assert_eq!(value, expected, "Filter `{}` failed.", input);
}
}
#[test]
fn diagnostic() {
let test_case = [
("🦀", "🦀"),
(" 🦀", "🦀"),
("🦀 AND crab = truc", "🦀"),
("🦀_in_name", "🦀_in_name"),
(" (name = ...", ""),
];
for (input, expected) in test_case {
let input = Span::new_extra(input, input);
let result = parse_value(input);
assert!(
result.is_err(),
"Filter `{}` wasnt supposed to be parsed but it did with the following result: `{:?}`",
expected,
result.unwrap()
);
// get the inner string referenced in the error
let value = *result.finish().unwrap_err().context().fragment();
assert_eq!(value, expected, "Filter `{}` was supposed to fail with the following value: `{}`, but it failed with: `{}`.", input, expected, value);
}
}
}