mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-03 11:57:07 +02:00
Rename the filter_parser crate into filter-parser
Co-authored-by: Clément Renault <clement@meilisearch.com>
This commit is contained in:
parent
0ea0146e04
commit
f28600031d
55 changed files with 5 additions and 8 deletions
10
filter-parser/Cargo.toml
Normal file
10
filter-parser/Cargo.toml
Normal file
|
@ -0,0 +1,10 @@
|
|||
[package]
|
||||
name = "filter-parser"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
nom = "7.0.0"
|
||||
nom_locate = "4.0.0"
|
37
filter-parser/README.md
Normal file
37
filter-parser/README.md
Normal file
|
@ -0,0 +1,37 @@
|
|||
# Filter parser
|
||||
|
||||
This workspace is dedicated to the parsing of the MeiliSearch filters.
|
||||
|
||||
Most of the code and explanation are in the [`lib.rs`](./src/lib.rs). Especially, the BNF of the filters at the top of this file.
|
||||
|
||||
The parser use [nom](https://docs.rs/nom/) to do most of its work and [nom-locate](https://docs.rs/nom_locate/) to keep track of what we were doing when we encountered an error.
|
||||
|
||||
## Cli
|
||||
A simple main is provided to quick-test if a filter can be parsed or not without bringing milli.
|
||||
It takes one argument and try to parse it.
|
||||
```
|
||||
cargo run -- 'field = value' # success
|
||||
cargo run -- 'field = "doggo' # error => missing closing delimiter "
|
||||
```
|
||||
|
||||
## Fuzz
|
||||
The workspace have been fuzzed with [cargo-fuzz](https://rust-fuzz.github.io/book/cargo-fuzz.html).
|
||||
|
||||
### Setup
|
||||
You'll need rust-nightly to execute the fuzzer.
|
||||
|
||||
```
|
||||
cargo install cargo-fuzz
|
||||
```
|
||||
|
||||
### Run
|
||||
When the filter parser is executed by the fuzzer it's triggering a stackoverflow really fast. We can avoid this problem by limiting the `max_len` of [libfuzzer](https://llvm.org/docs/LibFuzzer.html) at 500 characters.
|
||||
```
|
||||
cargo fuzz run parse -- -max_len=500
|
||||
```
|
||||
|
||||
## What to do if you find a bug in the parser
|
||||
|
||||
- Write a test at the end of the [`lib.rs`](./src/lib.rs) to ensure it never happens again.
|
||||
- Add a file in [the corpus directory](./fuzz/corpus/parse/) with your filter to help the fuzzer finding new bug. Since this directory is going to be heavily polluted by the execution of the fuzzer it's in the gitignore and you'll need to force push your new test.
|
||||
Since this directory is going to be heavily polluted by the execution of the fuzzer it's in the gitignore and you'll need to force add your new test.
|
25
filter-parser/fuzz/Cargo.toml
Normal file
25
filter-parser/fuzz/Cargo.toml
Normal file
|
@ -0,0 +1,25 @@
|
|||
[package]
|
||||
name = "filter-parser-fuzz"
|
||||
version = "0.0.0"
|
||||
authors = ["Automatically generated"]
|
||||
publish = false
|
||||
edition = "2018"
|
||||
|
||||
[package.metadata]
|
||||
cargo-fuzz = true
|
||||
|
||||
[dependencies]
|
||||
libfuzzer-sys = "0.4"
|
||||
|
||||
[dependencies.filter-parser]
|
||||
path = ".."
|
||||
|
||||
# Prevent this from interfering with workspaces
|
||||
[workspace]
|
||||
members = ["."]
|
||||
|
||||
[[bin]]
|
||||
name = "parse"
|
||||
path = "fuzz_targets/parse.rs"
|
||||
test = false
|
||||
doc = false
|
1
filter-parser/fuzz/corpus/parse/test_1
Normal file
1
filter-parser/fuzz/corpus/parse/test_1
Normal file
|
@ -0,0 +1 @@
|
|||
channel = Ponce
|
1
filter-parser/fuzz/corpus/parse/test_10
Normal file
1
filter-parser/fuzz/corpus/parse/test_10
Normal file
|
@ -0,0 +1 @@
|
|||
channel != ponce
|
1
filter-parser/fuzz/corpus/parse/test_11
Normal file
1
filter-parser/fuzz/corpus/parse/test_11
Normal file
|
@ -0,0 +1 @@
|
|||
NOT channel = ponce
|
1
filter-parser/fuzz/corpus/parse/test_12
Normal file
1
filter-parser/fuzz/corpus/parse/test_12
Normal file
|
@ -0,0 +1 @@
|
|||
subscribers < 1000
|
1
filter-parser/fuzz/corpus/parse/test_13
Normal file
1
filter-parser/fuzz/corpus/parse/test_13
Normal file
|
@ -0,0 +1 @@
|
|||
subscribers > 1000
|
1
filter-parser/fuzz/corpus/parse/test_14
Normal file
1
filter-parser/fuzz/corpus/parse/test_14
Normal file
|
@ -0,0 +1 @@
|
|||
subscribers <= 1000
|
1
filter-parser/fuzz/corpus/parse/test_15
Normal file
1
filter-parser/fuzz/corpus/parse/test_15
Normal file
|
@ -0,0 +1 @@
|
|||
subscribers >= 1000
|
1
filter-parser/fuzz/corpus/parse/test_16
Normal file
1
filter-parser/fuzz/corpus/parse/test_16
Normal file
|
@ -0,0 +1 @@
|
|||
NOT subscribers < 1000
|
1
filter-parser/fuzz/corpus/parse/test_17
Normal file
1
filter-parser/fuzz/corpus/parse/test_17
Normal file
|
@ -0,0 +1 @@
|
|||
NOT subscribers > 1000
|
1
filter-parser/fuzz/corpus/parse/test_18
Normal file
1
filter-parser/fuzz/corpus/parse/test_18
Normal file
|
@ -0,0 +1 @@
|
|||
NOT subscribers <= 1000
|
1
filter-parser/fuzz/corpus/parse/test_19
Normal file
1
filter-parser/fuzz/corpus/parse/test_19
Normal file
|
@ -0,0 +1 @@
|
|||
NOT subscribers >= 1000
|
1
filter-parser/fuzz/corpus/parse/test_2
Normal file
1
filter-parser/fuzz/corpus/parse/test_2
Normal file
|
@ -0,0 +1 @@
|
|||
subscribers = 12
|
1
filter-parser/fuzz/corpus/parse/test_20
Normal file
1
filter-parser/fuzz/corpus/parse/test_20
Normal file
|
@ -0,0 +1 @@
|
|||
subscribers 100 TO 1000
|
1
filter-parser/fuzz/corpus/parse/test_21
Normal file
1
filter-parser/fuzz/corpus/parse/test_21
Normal file
|
@ -0,0 +1 @@
|
|||
NOT subscribers 100 TO 1000
|
1
filter-parser/fuzz/corpus/parse/test_22
Normal file
1
filter-parser/fuzz/corpus/parse/test_22
Normal file
|
@ -0,0 +1 @@
|
|||
_geoRadius(12, 13, 14)
|
1
filter-parser/fuzz/corpus/parse/test_23
Normal file
1
filter-parser/fuzz/corpus/parse/test_23
Normal file
|
@ -0,0 +1 @@
|
|||
NOT _geoRadius(12, 13, 14)
|
1
filter-parser/fuzz/corpus/parse/test_24
Normal file
1
filter-parser/fuzz/corpus/parse/test_24
Normal file
|
@ -0,0 +1 @@
|
|||
channel = ponce AND 'dog race' != 'bernese mountain'
|
1
filter-parser/fuzz/corpus/parse/test_25
Normal file
1
filter-parser/fuzz/corpus/parse/test_25
Normal file
|
@ -0,0 +1 @@
|
|||
channel = ponce OR 'dog race' != 'bernese mountain'
|
1
filter-parser/fuzz/corpus/parse/test_26
Normal file
1
filter-parser/fuzz/corpus/parse/test_26
Normal file
|
@ -0,0 +1 @@
|
|||
channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000
|
1
filter-parser/fuzz/corpus/parse/test_27
Normal file
1
filter-parser/fuzz/corpus/parse/test_27
Normal file
|
@ -0,0 +1 @@
|
|||
channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > 1000 )
|
1
filter-parser/fuzz/corpus/parse/test_28
Normal file
1
filter-parser/fuzz/corpus/parse/test_28
Normal file
|
@ -0,0 +1 @@
|
|||
(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, 14)
|
1
filter-parser/fuzz/corpus/parse/test_29
Normal file
1
filter-parser/fuzz/corpus/parse/test_29
Normal file
|
@ -0,0 +1 @@
|
|||
channel = Ponce = 12
|
1
filter-parser/fuzz/corpus/parse/test_3
Normal file
1
filter-parser/fuzz/corpus/parse/test_3
Normal file
|
@ -0,0 +1 @@
|
|||
channel = 'Mister Mv'
|
1
filter-parser/fuzz/corpus/parse/test_30
Normal file
1
filter-parser/fuzz/corpus/parse/test_30
Normal file
|
@ -0,0 +1 @@
|
|||
channel =
|
1
filter-parser/fuzz/corpus/parse/test_31
Normal file
1
filter-parser/fuzz/corpus/parse/test_31
Normal file
|
@ -0,0 +1 @@
|
|||
channel = 🐻
|
1
filter-parser/fuzz/corpus/parse/test_32
Normal file
1
filter-parser/fuzz/corpus/parse/test_32
Normal file
|
@ -0,0 +1 @@
|
|||
OR
|
1
filter-parser/fuzz/corpus/parse/test_33
Normal file
1
filter-parser/fuzz/corpus/parse/test_33
Normal file
|
@ -0,0 +1 @@
|
|||
AND
|
1
filter-parser/fuzz/corpus/parse/test_34
Normal file
1
filter-parser/fuzz/corpus/parse/test_34
Normal file
|
@ -0,0 +1 @@
|
|||
channel Ponce
|
1
filter-parser/fuzz/corpus/parse/test_35
Normal file
1
filter-parser/fuzz/corpus/parse/test_35
Normal file
|
@ -0,0 +1 @@
|
|||
channel = Ponce OR
|
1
filter-parser/fuzz/corpus/parse/test_36
Normal file
1
filter-parser/fuzz/corpus/parse/test_36
Normal file
|
@ -0,0 +1 @@
|
|||
_geoRadius
|
1
filter-parser/fuzz/corpus/parse/test_37
Normal file
1
filter-parser/fuzz/corpus/parse/test_37
Normal file
|
@ -0,0 +1 @@
|
|||
_geoRadius = 12
|
1
filter-parser/fuzz/corpus/parse/test_38
Normal file
1
filter-parser/fuzz/corpus/parse/test_38
Normal file
|
@ -0,0 +1 @@
|
|||
_geoPoint(12, 13, 14)
|
1
filter-parser/fuzz/corpus/parse/test_39
Normal file
1
filter-parser/fuzz/corpus/parse/test_39
Normal file
|
@ -0,0 +1 @@
|
|||
position <= _geoPoint(12, 13, 14)
|
1
filter-parser/fuzz/corpus/parse/test_4
Normal file
1
filter-parser/fuzz/corpus/parse/test_4
Normal file
|
@ -0,0 +1 @@
|
|||
channel = "Mister Mv"
|
1
filter-parser/fuzz/corpus/parse/test_40
Normal file
1
filter-parser/fuzz/corpus/parse/test_40
Normal file
|
@ -0,0 +1 @@
|
|||
position <= _geoRadius(12, 13, 14)
|
1
filter-parser/fuzz/corpus/parse/test_41
Normal file
1
filter-parser/fuzz/corpus/parse/test_41
Normal file
|
@ -0,0 +1 @@
|
|||
channel = 'ponce
|
1
filter-parser/fuzz/corpus/parse/test_42
Normal file
1
filter-parser/fuzz/corpus/parse/test_42
Normal file
|
@ -0,0 +1 @@
|
|||
channel = "ponce
|
1
filter-parser/fuzz/corpus/parse/test_43
Normal file
1
filter-parser/fuzz/corpus/parse/test_43
Normal file
|
@ -0,0 +1 @@
|
|||
channel = mv OR (followers >= 1000
|
1
filter-parser/fuzz/corpus/parse/test_5
Normal file
1
filter-parser/fuzz/corpus/parse/test_5
Normal file
|
@ -0,0 +1 @@
|
|||
'dog race' = Borzoi
|
1
filter-parser/fuzz/corpus/parse/test_6
Normal file
1
filter-parser/fuzz/corpus/parse/test_6
Normal file
|
@ -0,0 +1 @@
|
|||
"dog race" = Chusky
|
1
filter-parser/fuzz/corpus/parse/test_7
Normal file
1
filter-parser/fuzz/corpus/parse/test_7
Normal file
|
@ -0,0 +1 @@
|
|||
"dog race" = "Bernese Mountain"
|
1
filter-parser/fuzz/corpus/parse/test_8
Normal file
1
filter-parser/fuzz/corpus/parse/test_8
Normal file
|
@ -0,0 +1 @@
|
|||
'dog race' = 'Bernese Mountain'
|
1
filter-parser/fuzz/corpus/parse/test_9
Normal file
1
filter-parser/fuzz/corpus/parse/test_9
Normal file
|
@ -0,0 +1 @@
|
|||
"dog race" = 'Bernese Mountain'
|
18
filter-parser/fuzz/fuzz_targets/parse.rs
Normal file
18
filter-parser/fuzz/fuzz_targets/parse.rs
Normal file
|
@ -0,0 +1,18 @@
|
|||
#![no_main]
|
||||
use filter_parser::{ErrorKind, FilterCondition};
|
||||
use libfuzzer_sys::fuzz_target;
|
||||
|
||||
fuzz_target!(|data: &[u8]| {
|
||||
if let Ok(s) = std::str::from_utf8(data) {
|
||||
// When we are fuzzing the parser we can get a stack overflow very easily.
|
||||
// But since this doesn't happens with a normal build we are just going to limit the fuzzer to 500 characters.
|
||||
if s.len() < 500 {
|
||||
match FilterCondition::parse(s) {
|
||||
Err(e) if matches!(e.kind(), ErrorKind::InternalError(_)) => {
|
||||
panic!("Found an internal error: `{:?}`", e)
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
73
filter-parser/src/condition.rs
Normal file
73
filter-parser/src/condition.rs
Normal file
|
@ -0,0 +1,73 @@
|
|||
//! BNF grammar:
|
||||
//!
|
||||
//! ```text
|
||||
//! condition = value ("==" | ">" ...) value
|
||||
//! to = value value TO value
|
||||
//! ```
|
||||
|
||||
use nom::branch::alt;
|
||||
use nom::bytes::complete::tag;
|
||||
use nom::combinator::cut;
|
||||
use nom::sequence::tuple;
|
||||
use Condition::*;
|
||||
|
||||
use crate::{parse_value, FilterCondition, IResult, Span, Token};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum Condition<'a> {
|
||||
GreaterThan(Token<'a>),
|
||||
GreaterThanOrEqual(Token<'a>),
|
||||
Equal(Token<'a>),
|
||||
NotEqual(Token<'a>),
|
||||
LowerThan(Token<'a>),
|
||||
LowerThanOrEqual(Token<'a>),
|
||||
Between { from: Token<'a>, to: Token<'a> },
|
||||
}
|
||||
|
||||
impl<'a> Condition<'a> {
|
||||
/// This method can return two operations in case it must express
|
||||
/// an OR operation for the between case (i.e. `TO`).
|
||||
pub fn negate(self) -> (Self, Option<Self>) {
|
||||
match self {
|
||||
GreaterThan(n) => (LowerThanOrEqual(n), None),
|
||||
GreaterThanOrEqual(n) => (LowerThan(n), None),
|
||||
Equal(s) => (NotEqual(s), None),
|
||||
NotEqual(s) => (Equal(s), None),
|
||||
LowerThan(n) => (GreaterThanOrEqual(n), None),
|
||||
LowerThanOrEqual(n) => (GreaterThan(n), None),
|
||||
Between { from, to } => (LowerThan(from), Some(GreaterThan(to))),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// condition = value ("==" | ">" ...) value
|
||||
pub fn parse_condition(input: Span) -> IResult<FilterCondition> {
|
||||
let operator = alt((tag("<="), tag(">="), tag("!="), tag("<"), tag(">"), tag("=")));
|
||||
let (input, (fid, op, value)) = tuple((parse_value, operator, cut(parse_value)))(input)?;
|
||||
|
||||
let condition = match *op.fragment() {
|
||||
"=" => FilterCondition::Condition { fid, op: Equal(value) },
|
||||
"!=" => FilterCondition::Condition { fid, op: NotEqual(value) },
|
||||
">" => FilterCondition::Condition { fid, op: GreaterThan(value) },
|
||||
"<" => FilterCondition::Condition { fid, op: LowerThan(value) },
|
||||
"<=" => FilterCondition::Condition { fid, op: LowerThanOrEqual(value) },
|
||||
">=" => FilterCondition::Condition { fid, op: GreaterThanOrEqual(value) },
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
Ok((input, condition))
|
||||
}
|
||||
|
||||
/// to = value value TO value
|
||||
pub fn parse_to(input: Span) -> IResult<FilterCondition> {
|
||||
let (input, (key, from, _, to)) =
|
||||
tuple((parse_value, parse_value, tag("TO"), cut(parse_value)))(input)?;
|
||||
|
||||
Ok((
|
||||
input,
|
||||
FilterCondition::Condition {
|
||||
fid: key.into(),
|
||||
op: Between { from: from.into(), to: to.into() },
|
||||
},
|
||||
))
|
||||
}
|
158
filter-parser/src/error.rs
Normal file
158
filter-parser/src/error.rs
Normal file
|
@ -0,0 +1,158 @@
|
|||
use std::fmt::Display;
|
||||
|
||||
use nom::error::{self, ParseError};
|
||||
use nom::Parser;
|
||||
|
||||
use crate::{IResult, Span};
|
||||
|
||||
pub trait NomErrorExt<E> {
|
||||
fn is_failure(&self) -> bool;
|
||||
fn map_err<O: FnOnce(E) -> E>(self, op: O) -> nom::Err<E>;
|
||||
fn map_fail<O: FnOnce(E) -> E>(self, op: O) -> nom::Err<E>;
|
||||
}
|
||||
|
||||
impl<E> NomErrorExt<E> for nom::Err<E> {
|
||||
fn is_failure(&self) -> bool {
|
||||
matches!(self, Self::Failure(_))
|
||||
}
|
||||
|
||||
fn map_err<O: FnOnce(E) -> E>(self, op: O) -> nom::Err<E> {
|
||||
match self {
|
||||
e @ Self::Failure(_) => e,
|
||||
e => e.map(|e| op(e)),
|
||||
}
|
||||
}
|
||||
|
||||
fn map_fail<O: FnOnce(E) -> E>(self, op: O) -> nom::Err<E> {
|
||||
match self {
|
||||
e @ Self::Error(_) => e,
|
||||
e => e.map(|e| op(e)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// cut a parser and map the error
|
||||
pub fn cut_with_err<'a, O>(
|
||||
mut parser: impl FnMut(Span<'a>) -> IResult<O>,
|
||||
mut with: impl FnMut(Error<'a>) -> Error<'a>,
|
||||
) -> impl FnMut(Span<'a>) -> IResult<O> {
|
||||
move |input| match parser.parse(input) {
|
||||
Err(nom::Err::Error(e)) => Err(nom::Err::Failure(with(e))),
|
||||
rest => rest,
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Error<'a> {
|
||||
context: Span<'a>,
|
||||
kind: ErrorKind<'a>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum ErrorKind<'a> {
|
||||
ReservedGeo(&'a str),
|
||||
Geo,
|
||||
MisusedGeo,
|
||||
InvalidPrimary,
|
||||
ExpectedEof,
|
||||
ExpectedValue,
|
||||
MissingClosingDelimiter(char),
|
||||
Char(char),
|
||||
InternalError(error::ErrorKind),
|
||||
External(String),
|
||||
}
|
||||
|
||||
impl<'a> Error<'a> {
|
||||
pub fn kind(&self) -> &ErrorKind<'a> {
|
||||
&self.kind
|
||||
}
|
||||
|
||||
pub fn context(&self) -> &Span<'a> {
|
||||
&self.context
|
||||
}
|
||||
|
||||
pub fn new_from_kind(context: Span<'a>, kind: ErrorKind<'a>) -> Self {
|
||||
Self { context, kind }
|
||||
}
|
||||
|
||||
pub fn new_from_external(context: Span<'a>, error: impl std::error::Error) -> Self {
|
||||
Self::new_from_kind(context, ErrorKind::External(error.to_string()))
|
||||
}
|
||||
|
||||
pub fn char(self) -> char {
|
||||
match self.kind {
|
||||
ErrorKind::Char(c) => c,
|
||||
_ => panic!("Internal filter parser error"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ParseError<Span<'a>> for Error<'a> {
|
||||
fn from_error_kind(input: Span<'a>, kind: error::ErrorKind) -> Self {
|
||||
let kind = match kind {
|
||||
error::ErrorKind::Eof => ErrorKind::ExpectedEof,
|
||||
kind => ErrorKind::InternalError(kind),
|
||||
};
|
||||
Self { context: input, kind }
|
||||
}
|
||||
|
||||
fn append(_input: Span<'a>, _kind: error::ErrorKind, other: Self) -> Self {
|
||||
other
|
||||
}
|
||||
|
||||
fn from_char(input: Span<'a>, c: char) -> Self {
|
||||
Self { context: input, kind: ErrorKind::Char(c) }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Display for Error<'a> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let input = self.context.fragment();
|
||||
|
||||
// When printing our error message we want to escape all `\n` to be sure we keep our format with the
|
||||
// first line being the diagnostic and the second line being the incriminated filter.
|
||||
let escaped_input = input.escape_debug();
|
||||
|
||||
match self.kind {
|
||||
ErrorKind::ExpectedValue if input.trim().is_empty() => {
|
||||
writeln!(f, "Was expecting a value but instead got nothing.")?
|
||||
}
|
||||
ErrorKind::MissingClosingDelimiter(c) => {
|
||||
writeln!(f, "Expression `{}` is missing the following closing delimiter: `{}`.", escaped_input, c)?
|
||||
}
|
||||
ErrorKind::ExpectedValue => {
|
||||
writeln!(f, "Was expecting a value but instead got `{}`.", escaped_input)?
|
||||
}
|
||||
ErrorKind::InvalidPrimary if input.trim().is_empty() => {
|
||||
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` but instead got nothing.")?
|
||||
}
|
||||
ErrorKind::InvalidPrimary => {
|
||||
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` at `{}`.", escaped_input)?
|
||||
}
|
||||
ErrorKind::ExpectedEof => {
|
||||
writeln!(f, "Found unexpected characters at the end of the filter: `{}`. You probably forgot an `OR` or an `AND` rule.", escaped_input)?
|
||||
}
|
||||
ErrorKind::Geo => {
|
||||
writeln!(f, "The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`.")?
|
||||
}
|
||||
ErrorKind::ReservedGeo(name) => {
|
||||
writeln!(f, "`{}` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance) built-in rule to filter on `_geo` coordinates.", name.escape_debug())?
|
||||
}
|
||||
ErrorKind::MisusedGeo => {
|
||||
writeln!(f, "The `_geoRadius` filter is an operation and can't be used as a value.")?
|
||||
}
|
||||
ErrorKind::Char(c) => {
|
||||
panic!("Tried to display a char error with `{}`", c)
|
||||
}
|
||||
ErrorKind::InternalError(kind) => writeln!(
|
||||
f,
|
||||
"Encountered an internal `{:?}` error while parsing your filter. Please fill an issue", kind
|
||||
)?,
|
||||
ErrorKind::External(ref error) => writeln!(f, "{}", error)?,
|
||||
}
|
||||
let base_column = self.context.get_utf8_column();
|
||||
let size = self.context.fragment().chars().count();
|
||||
|
||||
write!(f, "{}:{} {}", base_column, base_column + size, self.context.extra)
|
||||
}
|
||||
}
|
589
filter-parser/src/lib.rs
Normal file
589
filter-parser/src/lib.rs
Normal file
|
@ -0,0 +1,589 @@
|
|||
//! BNF grammar:
|
||||
//!
|
||||
//! ```text
|
||||
//! filter = expression ~ EOF
|
||||
//! expression = or
|
||||
//! or = and (~ "OR" ~ and)
|
||||
//! and = not (~ "AND" not)*
|
||||
//! not = ("NOT" | "!") not | primary
|
||||
//! primary = (WS* ~ "(" expression ")" ~ WS*) | geoRadius | condition | to
|
||||
//! condition = value ("==" | ">" ...) value
|
||||
//! to = value value TO value
|
||||
//! value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS*
|
||||
//! singleQuoted = "'" .* all but quotes "'"
|
||||
//! doubleQuoted = "\"" .* all but double quotes "\""
|
||||
//! word = (alphanumeric | _ | - | .)+
|
||||
//! geoRadius = WS* ~ "_geoRadius(" ~ WS* ~ float ~ WS* ~ "," ~ WS* ~ float ~ WS* ~ "," float ~ WS* ~ ")"
|
||||
//! ```
|
||||
//!
|
||||
//! Other BNF grammar used to handle some specific errors:
|
||||
//! ```text
|
||||
//! geoPoint = WS* ~ "_geoPoint(" ~ (float ~ ",")* ~ ")"
|
||||
//! ```
|
||||
//!
|
||||
//! Specific errors:
|
||||
//! ================
|
||||
//! - If a user try to use a geoPoint, as a primary OR as a value we must throw an error.
|
||||
//! ```text
|
||||
//! field = _geoPoint(12, 13, 14)
|
||||
//! field < 12 AND _geoPoint(1, 2)
|
||||
//! ```
|
||||
//!
|
||||
//! - If a user try to use a geoRadius as a value we must throw an error.
|
||||
//! ```text
|
||||
//! field = _geoRadius(12, 13, 14)
|
||||
//! ```
|
||||
//!
|
||||
|
||||
mod condition;
|
||||
mod error;
|
||||
mod value;
|
||||
|
||||
use std::fmt::Debug;
|
||||
use std::ops::Deref;
|
||||
use std::str::FromStr;
|
||||
|
||||
pub use condition::{parse_condition, parse_to, Condition};
|
||||
use error::{cut_with_err, NomErrorExt};
|
||||
pub use error::{Error, ErrorKind};
|
||||
use nom::branch::alt;
|
||||
use nom::bytes::complete::tag;
|
||||
use nom::character::complete::{char, multispace0};
|
||||
use nom::combinator::{cut, eof, map};
|
||||
use nom::multi::{many0, separated_list1};
|
||||
use nom::number::complete::recognize_float;
|
||||
use nom::sequence::{delimited, preceded, terminated, tuple};
|
||||
use nom::Finish;
|
||||
use nom_locate::LocatedSpan;
|
||||
pub(crate) use value::parse_value;
|
||||
|
||||
pub type Span<'a> = LocatedSpan<&'a str, &'a str>;
|
||||
|
||||
type IResult<'a, Ret> = nom::IResult<Span<'a>, Ret, Error<'a>>;
|
||||
|
||||
#[derive(Debug, Clone, Eq)]
|
||||
pub struct Token<'a>(Span<'a>);
|
||||
|
||||
impl<'a> Deref for Token<'a> {
|
||||
type Target = &'a str;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> PartialEq for Token<'a> {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.0.fragment() == other.0.fragment()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Token<'a> {
|
||||
pub fn new(position: Span<'a>) -> Self {
|
||||
Self(position)
|
||||
}
|
||||
|
||||
pub fn as_external_error(&self, error: impl std::error::Error) -> Error<'a> {
|
||||
Error::new_from_external(self.0, error)
|
||||
}
|
||||
|
||||
pub fn parse<T>(&self) -> Result<T, Error>
|
||||
where
|
||||
T: FromStr,
|
||||
T::Err: std::error::Error,
|
||||
{
|
||||
self.0.parse().map_err(|e| self.as_external_error(e))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<Span<'a>> for Token<'a> {
|
||||
fn from(span: Span<'a>) -> Self {
|
||||
Self(span)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum FilterCondition<'a> {
|
||||
Condition { fid: Token<'a>, op: Condition<'a> },
|
||||
Or(Box<Self>, Box<Self>),
|
||||
And(Box<Self>, Box<Self>),
|
||||
GeoLowerThan { point: [Token<'a>; 2], radius: Token<'a> },
|
||||
GeoGreaterThan { point: [Token<'a>; 2], radius: Token<'a> },
|
||||
Empty,
|
||||
}
|
||||
|
||||
impl<'a> FilterCondition<'a> {
|
||||
pub fn negate(self) -> FilterCondition<'a> {
|
||||
use FilterCondition::*;
|
||||
|
||||
match self {
|
||||
Condition { fid, op } => match op.negate() {
|
||||
(op, None) => Condition { fid, op },
|
||||
(a, Some(b)) => Or(
|
||||
Condition { fid: fid.clone(), op: a }.into(),
|
||||
Condition { fid, op: b }.into(),
|
||||
),
|
||||
},
|
||||
Or(a, b) => And(a.negate().into(), b.negate().into()),
|
||||
And(a, b) => Or(a.negate().into(), b.negate().into()),
|
||||
Empty => Empty,
|
||||
GeoLowerThan { point, radius } => GeoGreaterThan { point, radius },
|
||||
GeoGreaterThan { point, radius } => GeoLowerThan { point, radius },
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse(input: &'a str) -> Result<Self, Error> {
|
||||
if input.trim().is_empty() {
|
||||
return Ok(Self::Empty);
|
||||
}
|
||||
let span = Span::new_extra(input, input);
|
||||
parse_filter(span).finish().map(|(_rem, output)| output)
|
||||
}
|
||||
}
|
||||
|
||||
/// remove OPTIONAL whitespaces before AND after the the provided parser.
|
||||
fn ws<'a, O>(inner: impl FnMut(Span<'a>) -> IResult<O>) -> impl FnMut(Span<'a>) -> IResult<O> {
|
||||
delimited(multispace0, inner, multispace0)
|
||||
}
|
||||
|
||||
/// or = and (~ "OR" ~ and)
|
||||
fn parse_or(input: Span) -> IResult<FilterCondition> {
|
||||
let (input, lhs) = parse_and(input)?;
|
||||
// if we found a `OR` then we MUST find something next
|
||||
let (input, ors) = many0(preceded(ws(tag("OR")), cut(parse_and)))(input)?;
|
||||
|
||||
let expr = ors
|
||||
.into_iter()
|
||||
.fold(lhs, |acc, branch| FilterCondition::Or(Box::new(acc), Box::new(branch)));
|
||||
Ok((input, expr))
|
||||
}
|
||||
|
||||
/// and = not (~ "AND" not)*
|
||||
fn parse_and(input: Span) -> IResult<FilterCondition> {
|
||||
let (input, lhs) = parse_not(input)?;
|
||||
// if we found a `AND` then we MUST find something next
|
||||
let (input, ors) = many0(preceded(ws(tag("AND")), cut(parse_not)))(input)?;
|
||||
let expr = ors
|
||||
.into_iter()
|
||||
.fold(lhs, |acc, branch| FilterCondition::And(Box::new(acc), Box::new(branch)));
|
||||
Ok((input, expr))
|
||||
}
|
||||
|
||||
/// not = ("NOT" | "!") not | primary
|
||||
/// We can have multiple consecutive not, eg: `NOT NOT channel = mv`.
|
||||
/// If we parse a `NOT` or `!` we MUST parse something behind.
|
||||
fn parse_not(input: Span) -> IResult<FilterCondition> {
|
||||
alt((map(preceded(alt((tag("!"), tag("NOT"))), cut(parse_not)), |e| e.negate()), parse_primary))(
|
||||
input,
|
||||
)
|
||||
}
|
||||
|
||||
/// geoRadius = WS* ~ "_geoRadius(float ~ "," ~ float ~ "," float)
|
||||
/// If we parse `_geoRadius` we MUST parse the rest of the expression.
|
||||
fn parse_geo_radius(input: Span) -> IResult<FilterCondition> {
|
||||
// we want to forbid space BEFORE the _geoRadius but not after
|
||||
let parsed = preceded(
|
||||
tuple((multispace0, tag("_geoRadius"))),
|
||||
// if we were able to parse `_geoRadius` and can't parse the rest of the input we returns a failure
|
||||
cut(delimited(char('('), separated_list1(tag(","), ws(recognize_float)), char(')'))),
|
||||
)(input)
|
||||
.map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::Geo)));
|
||||
|
||||
let (input, args) = parsed?;
|
||||
|
||||
if args.len() != 3 {
|
||||
return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::Geo)));
|
||||
}
|
||||
|
||||
let res = FilterCondition::GeoLowerThan {
|
||||
point: [args[0].into(), args[1].into()],
|
||||
radius: args[2].into(),
|
||||
};
|
||||
Ok((input, res))
|
||||
}
|
||||
|
||||
/// geoPoint = WS* ~ "_geoPoint(float ~ "," ~ float ~ "," float)
|
||||
fn parse_geo_point(input: Span) -> IResult<FilterCondition> {
|
||||
// we want to forbid space BEFORE the _geoPoint but not after
|
||||
tuple((
|
||||
multispace0,
|
||||
tag("_geoPoint"),
|
||||
// if we were able to parse `_geoPoint` we are going to return a Failure whatever happens next.
|
||||
cut(delimited(char('('), separated_list1(tag(","), ws(|c| recognize_float(c))), char(')'))),
|
||||
))(input)
|
||||
.map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::ReservedGeo("_geoPoint"))))?;
|
||||
// if we succeeded we still returns a Failure because geoPoints are not allowed
|
||||
Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::ReservedGeo("_geoPoint"))))
|
||||
}
|
||||
|
||||
/// primary = (WS* ~ "(" expression ")" ~ WS*) | geoRadius | condition | to
|
||||
fn parse_primary(input: Span) -> IResult<FilterCondition> {
|
||||
alt((
|
||||
// if we find a first parenthesis, then we must parse an expression and find the closing parenthesis
|
||||
delimited(
|
||||
ws(char('(')),
|
||||
cut(parse_expression),
|
||||
cut_with_err(ws(char(')')), |c| {
|
||||
Error::new_from_kind(input, ErrorKind::MissingClosingDelimiter(c.char()))
|
||||
}),
|
||||
),
|
||||
parse_geo_radius,
|
||||
parse_condition,
|
||||
parse_to,
|
||||
// the next lines are only for error handling and are written at the end to have the less possible performance impact
|
||||
parse_geo_point,
|
||||
))(input)
|
||||
// if the inner parsers did not match enough information to return an accurate error
|
||||
.map_err(|e| e.map_err(|_| Error::new_from_kind(input, ErrorKind::InvalidPrimary)))
|
||||
}
|
||||
|
||||
/// expression = or
|
||||
pub fn parse_expression(input: Span) -> IResult<FilterCondition> {
|
||||
parse_or(input)
|
||||
}
|
||||
|
||||
/// filter = expression ~ EOF
|
||||
pub fn parse_filter(input: Span) -> IResult<FilterCondition> {
|
||||
terminated(parse_expression, eof)(input)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod tests {
|
||||
use super::*;
|
||||
|
||||
/// Create a raw [Token]. You must specify the string that appear BEFORE your element followed by your element
|
||||
pub fn rtok<'a>(before: &'a str, value: &'a str) -> Token<'a> {
|
||||
// if the string is empty we still need to return 1 for the line number
|
||||
let lines = before.is_empty().then(|| 1).unwrap_or_else(|| before.lines().count());
|
||||
let offset = before.chars().count();
|
||||
// the extra field is not checked in the tests so we can set it to nothing
|
||||
unsafe { Span::new_from_raw_offset(offset, lines as u32, value, "") }.into()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse() {
|
||||
use FilterCondition as Fc;
|
||||
|
||||
let test_case = [
|
||||
// simple test
|
||||
(
|
||||
"channel = Ponce",
|
||||
Fc::Condition {
|
||||
fid: rtok("", "channel"),
|
||||
op: Condition::Equal(rtok("channel = ", "Ponce")),
|
||||
},
|
||||
),
|
||||
(
|
||||
"subscribers = 12",
|
||||
Fc::Condition {
|
||||
fid: rtok("", "subscribers"),
|
||||
op: Condition::Equal(rtok("subscribers = ", "12")),
|
||||
},
|
||||
),
|
||||
// test all the quotes and simple quotes
|
||||
(
|
||||
"channel = 'Mister Mv'",
|
||||
Fc::Condition {
|
||||
fid: rtok("", "channel"),
|
||||
op: Condition::Equal(rtok("channel = '", "Mister Mv")),
|
||||
},
|
||||
),
|
||||
(
|
||||
"channel = \"Mister Mv\"",
|
||||
Fc::Condition {
|
||||
fid: rtok("", "channel"),
|
||||
op: Condition::Equal(rtok("channel = \"", "Mister Mv")),
|
||||
},
|
||||
),
|
||||
(
|
||||
"'dog race' = Borzoi",
|
||||
Fc::Condition {
|
||||
fid: rtok("'", "dog race"),
|
||||
op: Condition::Equal(rtok("'dog race' = ", "Borzoi")),
|
||||
},
|
||||
),
|
||||
(
|
||||
"\"dog race\" = Chusky",
|
||||
Fc::Condition {
|
||||
fid: rtok("\"", "dog race"),
|
||||
op: Condition::Equal(rtok("\"dog race\" = ", "Chusky")),
|
||||
},
|
||||
),
|
||||
(
|
||||
"\"dog race\" = \"Bernese Mountain\"",
|
||||
Fc::Condition {
|
||||
fid: rtok("\"", "dog race"),
|
||||
op: Condition::Equal(rtok("\"dog race\" = \"", "Bernese Mountain")),
|
||||
},
|
||||
),
|
||||
(
|
||||
"'dog race' = 'Bernese Mountain'",
|
||||
Fc::Condition {
|
||||
fid: rtok("'", "dog race"),
|
||||
op: Condition::Equal(rtok("'dog race' = '", "Bernese Mountain")),
|
||||
},
|
||||
),
|
||||
(
|
||||
"\"dog race\" = 'Bernese Mountain'",
|
||||
Fc::Condition {
|
||||
fid: rtok("\"", "dog race"),
|
||||
op: Condition::Equal(rtok("\"dog race\" = \"", "Bernese Mountain")),
|
||||
},
|
||||
),
|
||||
// test all the operators
|
||||
(
|
||||
"channel != ponce",
|
||||
Fc::Condition {
|
||||
fid: rtok("", "channel"),
|
||||
op: Condition::NotEqual(rtok("channel != ", "ponce")),
|
||||
},
|
||||
),
|
||||
(
|
||||
"NOT channel = ponce",
|
||||
Fc::Condition {
|
||||
fid: rtok("NOT ", "channel"),
|
||||
op: Condition::NotEqual(rtok("NOT channel = ", "ponce")),
|
||||
},
|
||||
),
|
||||
(
|
||||
"subscribers < 1000",
|
||||
Fc::Condition {
|
||||
fid: rtok("", "subscribers"),
|
||||
op: Condition::LowerThan(rtok("subscribers < ", "1000")),
|
||||
},
|
||||
),
|
||||
(
|
||||
"subscribers > 1000",
|
||||
Fc::Condition {
|
||||
fid: rtok("", "subscribers"),
|
||||
op: Condition::GreaterThan(rtok("subscribers > ", "1000")),
|
||||
},
|
||||
),
|
||||
(
|
||||
"subscribers <= 1000",
|
||||
Fc::Condition {
|
||||
fid: rtok("", "subscribers"),
|
||||
op: Condition::LowerThanOrEqual(rtok("subscribers <= ", "1000")),
|
||||
},
|
||||
),
|
||||
(
|
||||
"subscribers >= 1000",
|
||||
Fc::Condition {
|
||||
fid: rtok("", "subscribers"),
|
||||
op: Condition::GreaterThanOrEqual(rtok("subscribers >= ", "1000")),
|
||||
},
|
||||
),
|
||||
(
|
||||
"NOT subscribers < 1000",
|
||||
Fc::Condition {
|
||||
fid: rtok("NOT ", "subscribers"),
|
||||
op: Condition::GreaterThanOrEqual(rtok("NOT subscribers < ", "1000")),
|
||||
},
|
||||
),
|
||||
(
|
||||
"NOT subscribers > 1000",
|
||||
Fc::Condition {
|
||||
fid: rtok("NOT ", "subscribers"),
|
||||
op: Condition::LowerThanOrEqual(rtok("NOT subscribers > ", "1000")),
|
||||
},
|
||||
),
|
||||
(
|
||||
"NOT subscribers <= 1000",
|
||||
Fc::Condition {
|
||||
fid: rtok("NOT ", "subscribers"),
|
||||
op: Condition::GreaterThan(rtok("NOT subscribers <= ", "1000")),
|
||||
},
|
||||
),
|
||||
(
|
||||
"NOT subscribers >= 1000",
|
||||
Fc::Condition {
|
||||
fid: rtok("NOT ", "subscribers"),
|
||||
op: Condition::LowerThan(rtok("NOT subscribers >= ", "1000")),
|
||||
},
|
||||
),
|
||||
(
|
||||
"subscribers 100 TO 1000",
|
||||
Fc::Condition {
|
||||
fid: rtok("", "subscribers"),
|
||||
op: Condition::Between {
|
||||
from: rtok("subscribers ", "100"),
|
||||
to: rtok("subscribers 100 TO ", "1000"),
|
||||
},
|
||||
},
|
||||
),
|
||||
(
|
||||
"NOT subscribers 100 TO 1000",
|
||||
Fc::Or(
|
||||
Fc::Condition {
|
||||
fid: rtok("NOT ", "subscribers"),
|
||||
op: Condition::LowerThan(rtok("NOT subscribers ", "100")),
|
||||
}
|
||||
.into(),
|
||||
Fc::Condition {
|
||||
fid: rtok("NOT ", "subscribers"),
|
||||
op: Condition::GreaterThan(rtok("NOT subscribers 100 TO ", "1000")),
|
||||
}
|
||||
.into(),
|
||||
),
|
||||
),
|
||||
(
|
||||
"_geoRadius(12, 13, 14)",
|
||||
Fc::GeoLowerThan {
|
||||
point: [rtok("_geoRadius(", "12"), rtok("_geoRadius(12, ", "13")],
|
||||
radius: rtok("_geoRadius(12, 13, ", "14"),
|
||||
},
|
||||
),
|
||||
(
|
||||
"NOT _geoRadius(12, 13, 14)",
|
||||
Fc::GeoGreaterThan {
|
||||
point: [rtok("NOT _geoRadius(", "12"), rtok("NOT _geoRadius(12, ", "13")],
|
||||
radius: rtok("NOT _geoRadius(12, 13, ", "14"),
|
||||
},
|
||||
),
|
||||
// test simple `or` and `and`
|
||||
(
|
||||
"channel = ponce AND 'dog race' != 'bernese mountain'",
|
||||
Fc::And(
|
||||
Fc::Condition {
|
||||
fid: rtok("", "channel"),
|
||||
op: Condition::Equal(rtok("channel = ", "ponce")),
|
||||
}
|
||||
.into(),
|
||||
Fc::Condition {
|
||||
fid: rtok("channel = ponce AND '", "dog race"),
|
||||
op: Condition::NotEqual(rtok(
|
||||
"channel = ponce AND 'dog race' != '",
|
||||
"bernese mountain",
|
||||
)),
|
||||
}
|
||||
.into(),
|
||||
),
|
||||
),
|
||||
(
|
||||
"channel = ponce OR 'dog race' != 'bernese mountain'",
|
||||
Fc::Or(
|
||||
Fc::Condition {
|
||||
fid: rtok("", "channel"),
|
||||
op: Condition::Equal(rtok("channel = ", "ponce")),
|
||||
}
|
||||
.into(),
|
||||
Fc::Condition {
|
||||
fid: rtok("channel = ponce OR '", "dog race"),
|
||||
op: Condition::NotEqual(rtok(
|
||||
"channel = ponce OR 'dog race' != '",
|
||||
"bernese mountain",
|
||||
)),
|
||||
}
|
||||
.into(),
|
||||
),
|
||||
),
|
||||
(
|
||||
"channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000",
|
||||
Fc::Or(
|
||||
Fc::And(
|
||||
Fc::Condition {
|
||||
fid: rtok("", "channel"),
|
||||
op: Condition::Equal(rtok("channel = ", "ponce")),
|
||||
}
|
||||
.into(),
|
||||
Fc::Condition {
|
||||
fid: rtok("channel = ponce AND '", "dog race"),
|
||||
op: Condition::NotEqual(rtok(
|
||||
"channel = ponce AND 'dog race' != '",
|
||||
"bernese mountain",
|
||||
)),
|
||||
}
|
||||
.into(),
|
||||
)
|
||||
.into(),
|
||||
Fc::Condition {
|
||||
fid: rtok(
|
||||
"channel = ponce AND 'dog race' != 'bernese mountain' OR ",
|
||||
"subscribers",
|
||||
),
|
||||
op: Condition::GreaterThan(rtok(
|
||||
"channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > ",
|
||||
"1000",
|
||||
)),
|
||||
}
|
||||
.into(),
|
||||
),
|
||||
),
|
||||
// test parenthesis
|
||||
(
|
||||
"channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > 1000 )",
|
||||
Fc::And(
|
||||
Fc::Condition { fid: rtok("", "channel"), op: Condition::Equal(rtok("channel = ", "ponce")) }.into(),
|
||||
Fc::Or(
|
||||
Fc::Condition { fid: rtok("channel = ponce AND ( '", "dog race"), op: Condition::NotEqual(rtok("channel = ponce AND ( 'dog race' != '", "bernese mountain"))}.into(),
|
||||
Fc::Condition { fid: rtok("channel = ponce AND ( 'dog race' != 'bernese mountain' OR ", "subscribers"), op: Condition::GreaterThan(rtok("channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > ", "1000")) }.into(),
|
||||
).into()),
|
||||
),
|
||||
(
|
||||
"(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, 14)",
|
||||
Fc::And(
|
||||
Fc::Or(
|
||||
Fc::And(
|
||||
Fc::Condition { fid: rtok("(", "channel"), op: Condition::Equal(rtok("(channel = ", "ponce")) }.into(),
|
||||
Fc::Condition { fid: rtok("(channel = ponce AND '", "dog race"), op: Condition::NotEqual(rtok("(channel = ponce AND 'dog race' != '", "bernese mountain")) }.into(),
|
||||
).into(),
|
||||
Fc::Condition { fid: rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR ", "subscribers"), op: Condition::GreaterThan(rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > ", "1000")) }.into(),
|
||||
).into(),
|
||||
Fc::GeoLowerThan { point: [rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(", "12"), rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, ", "13")], radius: rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, ", "14") }.into()
|
||||
)
|
||||
)
|
||||
];
|
||||
|
||||
for (input, expected) in test_case {
|
||||
let result = Fc::parse(input);
|
||||
|
||||
assert!(
|
||||
result.is_ok(),
|
||||
"Filter `{:?}` was supposed to be parsed but failed with the following error: `{}`",
|
||||
expected,
|
||||
result.unwrap_err()
|
||||
);
|
||||
let filter = result.unwrap();
|
||||
assert_eq!(filter, expected, "Filter `{}` failed.", input);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn error() {
|
||||
use FilterCondition as Fc;
|
||||
|
||||
let test_case = [
|
||||
// simple test
|
||||
("channel = Ponce = 12", "Found unexpected characters at the end of the filter: `= 12`. You probably forgot an `OR` or an `AND` rule."),
|
||||
("channel = ", "Was expecting a value but instead got nothing."),
|
||||
("channel = 🐻", "Was expecting a value but instead got `🐻`."),
|
||||
("channel = 🐻 AND followers < 100", "Was expecting a value but instead got `🐻`."),
|
||||
("OR", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` at `OR`."),
|
||||
("AND", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` at `AND`."),
|
||||
("channel Ponce", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` at `channel Ponce`."),
|
||||
("channel = Ponce OR", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` but instead got nothing."),
|
||||
("_geoRadius", "The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`."),
|
||||
("_geoRadius = 12", "The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`."),
|
||||
("_geoPoint(12, 13, 14)", "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance) built-in rule to filter on `_geo` coordinates."),
|
||||
("position <= _geoPoint(12, 13, 14)", "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance) built-in rule to filter on `_geo` coordinates."),
|
||||
("position <= _geoRadius(12, 13, 14)", "The `_geoRadius` filter is an operation and can't be used as a value."),
|
||||
("channel = 'ponce", "Expression `'ponce` is missing the following closing delimiter: `'`."),
|
||||
("channel = \"ponce", "Expression `\"ponce` is missing the following closing delimiter: `\"`."),
|
||||
("channel = mv OR (followers >= 1000", "Expression `(followers >= 1000` is missing the following closing delimiter: `)`."),
|
||||
("channel = mv OR followers >= 1000)", "Found unexpected characters at the end of the filter: `)`. You probably forgot an `OR` or an `AND` rule."),
|
||||
];
|
||||
|
||||
for (input, expected) in test_case {
|
||||
let result = Fc::parse(input);
|
||||
|
||||
assert!(
|
||||
result.is_err(),
|
||||
"Filter `{}` wasn't supposed to be parsed but it did with the following result: `{:?}`",
|
||||
input,
|
||||
result.unwrap()
|
||||
);
|
||||
let filter = result.unwrap_err().to_string();
|
||||
assert!(filter.starts_with(expected), "Filter `{:?}` was supposed to return the following error:\n{}\n, but instead returned\n{}\n.", input, expected, filter);
|
||||
}
|
||||
}
|
||||
}
|
16
filter-parser/src/main.rs
Normal file
16
filter-parser/src/main.rs
Normal file
|
@ -0,0 +1,16 @@
|
|||
fn main() {
|
||||
let input = std::env::args().nth(1).expect("You must provide a filter to test");
|
||||
|
||||
println!("Trying to execute the following filter:\n{}\n", input);
|
||||
|
||||
match filter_parser::FilterCondition::parse(&input) {
|
||||
Ok(filter) => {
|
||||
println!("✅ Valid filter");
|
||||
println!("{:#?}", filter);
|
||||
}
|
||||
Err(e) => {
|
||||
println!("❎ Invalid filter");
|
||||
println!("{}", e.to_string());
|
||||
}
|
||||
}
|
||||
}
|
147
filter-parser/src/value.rs
Normal file
147
filter-parser/src/value.rs
Normal file
|
@ -0,0 +1,147 @@
|
|||
use nom::branch::alt;
|
||||
use nom::bytes::complete::{take_till, take_while, take_while1};
|
||||
use nom::character::complete::{char, multispace0};
|
||||
use nom::combinator::cut;
|
||||
use nom::sequence::{delimited, terminated};
|
||||
|
||||
use crate::error::NomErrorExt;
|
||||
use crate::{parse_geo_point, parse_geo_radius, Error, ErrorKind, IResult, Span, Token};
|
||||
|
||||
/// value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS*
|
||||
pub fn parse_value(input: Span) -> IResult<Token> {
|
||||
// to get better diagnostic message we are going to strip the left whitespaces from the input right now
|
||||
let (input, _) = take_while(char::is_whitespace)(input)?;
|
||||
|
||||
// then, we want to check if the user is misusing a geo expression
|
||||
// This expression can’t finish without error.
|
||||
// We want to return an error in case of failure.
|
||||
if let Err(err) = parse_geo_point(input) {
|
||||
if err.is_failure() {
|
||||
return Err(err);
|
||||
}
|
||||
}
|
||||
match parse_geo_radius(input) {
|
||||
Ok(_) => return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::MisusedGeo))),
|
||||
// if we encountered a failure it means the user badly wrote a _geoRadius filter.
|
||||
// But instead of showing him how to fix his syntax we are going to tell him he should not use this filter as a value.
|
||||
Err(e) if e.is_failure() => {
|
||||
return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::MisusedGeo)))
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
|
||||
// singleQuoted = "'" .* all but quotes "'"
|
||||
let simple_quoted = take_till(|c: char| c == '\'');
|
||||
// doubleQuoted = "\"" (word | spaces)* "\""
|
||||
let double_quoted = take_till(|c: char| c == '"');
|
||||
// word = (alphanumeric | _ | - | .)+
|
||||
let word = take_while1(is_value_component);
|
||||
|
||||
// this parser is only used when an error is encountered and it parse the
|
||||
// largest string possible that do not contain any “language” syntax.
|
||||
// If we try to parse `name = 🦀 AND language = rust` we want to return an
|
||||
// error saying we could not parse `🦀`. Not that no value were found or that
|
||||
// we could note parse `🦀 AND language = rust`.
|
||||
// we want to remove the space before entering the alt because if we don't,
|
||||
// when we create the errors from the output of the alt we have spaces everywhere
|
||||
let error_word = take_till::<_, _, Error>(is_syntax_component);
|
||||
|
||||
terminated(
|
||||
alt((
|
||||
delimited(char('\''), cut(simple_quoted), cut(char('\''))),
|
||||
delimited(char('"'), cut(double_quoted), cut(char('"'))),
|
||||
word,
|
||||
)),
|
||||
multispace0,
|
||||
)(input)
|
||||
.map(|(s, t)| (s, t.into()))
|
||||
// if we found nothing in the alt it means the user specified something that was not recognized as a value
|
||||
.map_err(|e: nom::Err<Error>| {
|
||||
e.map_err(|_| Error::new_from_kind(error_word(input).unwrap().1, ErrorKind::ExpectedValue))
|
||||
})
|
||||
// if we found encountered a failure it means the user really tried to input a value, but had an unmatched quote
|
||||
.map_err(|e| {
|
||||
e.map_fail(|c| Error::new_from_kind(input, ErrorKind::MissingClosingDelimiter(c.char())))
|
||||
})
|
||||
}
|
||||
|
||||
fn is_value_component(c: char) -> bool {
|
||||
c.is_alphanumeric() || ['_', '-', '.'].contains(&c)
|
||||
}
|
||||
|
||||
fn is_syntax_component(c: char) -> bool {
|
||||
c.is_whitespace() || ['(', ')', '=', '<', '>', '!'].contains(&c)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod test {
|
||||
use nom::Finish;
|
||||
|
||||
use super::*;
|
||||
use crate::tests::rtok;
|
||||
|
||||
#[test]
|
||||
fn name() {
|
||||
let test_case = [
|
||||
("channel", rtok("", "channel")),
|
||||
(".private", rtok("", ".private")),
|
||||
("I-love-kebab", rtok("", "I-love-kebab")),
|
||||
("but_snakes_is_also_good", rtok("", "but_snakes_is_also_good")),
|
||||
("parens(", rtok("", "parens")),
|
||||
("parens)", rtok("", "parens")),
|
||||
("not!", rtok("", "not")),
|
||||
(" channel", rtok(" ", "channel")),
|
||||
("channel ", rtok("", "channel")),
|
||||
(" channel ", rtok(" ", "channel")),
|
||||
("'channel'", rtok("'", "channel")),
|
||||
("\"channel\"", rtok("\"", "channel")),
|
||||
("'cha)nnel'", rtok("'", "cha)nnel")),
|
||||
("'cha\"nnel'", rtok("'", "cha\"nnel")),
|
||||
("\"cha'nnel\"", rtok("\"", "cha'nnel")),
|
||||
("\" some spaces \"", rtok("\"", " some spaces ")),
|
||||
("\"cha'nnel\"", rtok("'", "cha'nnel")),
|
||||
("\"cha'nnel\"", rtok("'", "cha'nnel")),
|
||||
("I'm tamo", rtok("'m tamo", "I")),
|
||||
];
|
||||
|
||||
for (input, expected) in test_case {
|
||||
let input = Span::new_extra(input, input);
|
||||
let result = parse_value(input);
|
||||
|
||||
assert!(
|
||||
result.is_ok(),
|
||||
"Filter `{:?}` was supposed to be parsed but failed with the following error: `{}`",
|
||||
expected,
|
||||
result.unwrap_err()
|
||||
);
|
||||
let value = result.unwrap().1;
|
||||
assert_eq!(value, expected, "Filter `{}` failed.", input);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn diagnostic() {
|
||||
let test_case = [
|
||||
("🦀", "🦀"),
|
||||
(" 🦀", "🦀"),
|
||||
("🦀 AND crab = truc", "🦀"),
|
||||
("🦀_in_name", "🦀_in_name"),
|
||||
(" (name = ...", ""),
|
||||
];
|
||||
|
||||
for (input, expected) in test_case {
|
||||
let input = Span::new_extra(input, input);
|
||||
let result = parse_value(input);
|
||||
|
||||
assert!(
|
||||
result.is_err(),
|
||||
"Filter `{}` wasn’t supposed to be parsed but it did with the following result: `{:?}`",
|
||||
expected,
|
||||
result.unwrap()
|
||||
);
|
||||
// get the inner string referenced in the error
|
||||
let value = *result.finish().unwrap_err().context().fragment();
|
||||
assert_eq!(value, expected, "Filter `{}` was supposed to fail with the following value: `{}`, but it failed with: `{}`.", input, expected, value);
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue