mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-03 11:57:07 +02:00
Rename the filter_parser crate into filter-parser
Co-authored-by: Clément Renault <clement@meilisearch.com>
This commit is contained in:
parent
0ea0146e04
commit
f28600031d
55 changed files with 5 additions and 8 deletions
73
filter-parser/src/condition.rs
Normal file
73
filter-parser/src/condition.rs
Normal file
|
@ -0,0 +1,73 @@
|
|||
//! BNF grammar:
|
||||
//!
|
||||
//! ```text
|
||||
//! condition = value ("==" | ">" ...) value
|
||||
//! to = value value TO value
|
||||
//! ```
|
||||
|
||||
use nom::branch::alt;
|
||||
use nom::bytes::complete::tag;
|
||||
use nom::combinator::cut;
|
||||
use nom::sequence::tuple;
|
||||
use Condition::*;
|
||||
|
||||
use crate::{parse_value, FilterCondition, IResult, Span, Token};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum Condition<'a> {
|
||||
GreaterThan(Token<'a>),
|
||||
GreaterThanOrEqual(Token<'a>),
|
||||
Equal(Token<'a>),
|
||||
NotEqual(Token<'a>),
|
||||
LowerThan(Token<'a>),
|
||||
LowerThanOrEqual(Token<'a>),
|
||||
Between { from: Token<'a>, to: Token<'a> },
|
||||
}
|
||||
|
||||
impl<'a> Condition<'a> {
|
||||
/// This method can return two operations in case it must express
|
||||
/// an OR operation for the between case (i.e. `TO`).
|
||||
pub fn negate(self) -> (Self, Option<Self>) {
|
||||
match self {
|
||||
GreaterThan(n) => (LowerThanOrEqual(n), None),
|
||||
GreaterThanOrEqual(n) => (LowerThan(n), None),
|
||||
Equal(s) => (NotEqual(s), None),
|
||||
NotEqual(s) => (Equal(s), None),
|
||||
LowerThan(n) => (GreaterThanOrEqual(n), None),
|
||||
LowerThanOrEqual(n) => (GreaterThan(n), None),
|
||||
Between { from, to } => (LowerThan(from), Some(GreaterThan(to))),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// condition = value ("==" | ">" ...) value
|
||||
pub fn parse_condition(input: Span) -> IResult<FilterCondition> {
|
||||
let operator = alt((tag("<="), tag(">="), tag("!="), tag("<"), tag(">"), tag("=")));
|
||||
let (input, (fid, op, value)) = tuple((parse_value, operator, cut(parse_value)))(input)?;
|
||||
|
||||
let condition = match *op.fragment() {
|
||||
"=" => FilterCondition::Condition { fid, op: Equal(value) },
|
||||
"!=" => FilterCondition::Condition { fid, op: NotEqual(value) },
|
||||
">" => FilterCondition::Condition { fid, op: GreaterThan(value) },
|
||||
"<" => FilterCondition::Condition { fid, op: LowerThan(value) },
|
||||
"<=" => FilterCondition::Condition { fid, op: LowerThanOrEqual(value) },
|
||||
">=" => FilterCondition::Condition { fid, op: GreaterThanOrEqual(value) },
|
||||
_ => unreachable!(),
|
||||
};
|
||||
|
||||
Ok((input, condition))
|
||||
}
|
||||
|
||||
/// to = value value TO value
|
||||
pub fn parse_to(input: Span) -> IResult<FilterCondition> {
|
||||
let (input, (key, from, _, to)) =
|
||||
tuple((parse_value, parse_value, tag("TO"), cut(parse_value)))(input)?;
|
||||
|
||||
Ok((
|
||||
input,
|
||||
FilterCondition::Condition {
|
||||
fid: key.into(),
|
||||
op: Between { from: from.into(), to: to.into() },
|
||||
},
|
||||
))
|
||||
}
|
158
filter-parser/src/error.rs
Normal file
158
filter-parser/src/error.rs
Normal file
|
@ -0,0 +1,158 @@
|
|||
use std::fmt::Display;
|
||||
|
||||
use nom::error::{self, ParseError};
|
||||
use nom::Parser;
|
||||
|
||||
use crate::{IResult, Span};
|
||||
|
||||
pub trait NomErrorExt<E> {
|
||||
fn is_failure(&self) -> bool;
|
||||
fn map_err<O: FnOnce(E) -> E>(self, op: O) -> nom::Err<E>;
|
||||
fn map_fail<O: FnOnce(E) -> E>(self, op: O) -> nom::Err<E>;
|
||||
}
|
||||
|
||||
impl<E> NomErrorExt<E> for nom::Err<E> {
|
||||
fn is_failure(&self) -> bool {
|
||||
matches!(self, Self::Failure(_))
|
||||
}
|
||||
|
||||
fn map_err<O: FnOnce(E) -> E>(self, op: O) -> nom::Err<E> {
|
||||
match self {
|
||||
e @ Self::Failure(_) => e,
|
||||
e => e.map(|e| op(e)),
|
||||
}
|
||||
}
|
||||
|
||||
fn map_fail<O: FnOnce(E) -> E>(self, op: O) -> nom::Err<E> {
|
||||
match self {
|
||||
e @ Self::Error(_) => e,
|
||||
e => e.map(|e| op(e)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// cut a parser and map the error
|
||||
pub fn cut_with_err<'a, O>(
|
||||
mut parser: impl FnMut(Span<'a>) -> IResult<O>,
|
||||
mut with: impl FnMut(Error<'a>) -> Error<'a>,
|
||||
) -> impl FnMut(Span<'a>) -> IResult<O> {
|
||||
move |input| match parser.parse(input) {
|
||||
Err(nom::Err::Error(e)) => Err(nom::Err::Failure(with(e))),
|
||||
rest => rest,
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Error<'a> {
|
||||
context: Span<'a>,
|
||||
kind: ErrorKind<'a>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum ErrorKind<'a> {
|
||||
ReservedGeo(&'a str),
|
||||
Geo,
|
||||
MisusedGeo,
|
||||
InvalidPrimary,
|
||||
ExpectedEof,
|
||||
ExpectedValue,
|
||||
MissingClosingDelimiter(char),
|
||||
Char(char),
|
||||
InternalError(error::ErrorKind),
|
||||
External(String),
|
||||
}
|
||||
|
||||
impl<'a> Error<'a> {
|
||||
pub fn kind(&self) -> &ErrorKind<'a> {
|
||||
&self.kind
|
||||
}
|
||||
|
||||
pub fn context(&self) -> &Span<'a> {
|
||||
&self.context
|
||||
}
|
||||
|
||||
pub fn new_from_kind(context: Span<'a>, kind: ErrorKind<'a>) -> Self {
|
||||
Self { context, kind }
|
||||
}
|
||||
|
||||
pub fn new_from_external(context: Span<'a>, error: impl std::error::Error) -> Self {
|
||||
Self::new_from_kind(context, ErrorKind::External(error.to_string()))
|
||||
}
|
||||
|
||||
pub fn char(self) -> char {
|
||||
match self.kind {
|
||||
ErrorKind::Char(c) => c,
|
||||
_ => panic!("Internal filter parser error"),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> ParseError<Span<'a>> for Error<'a> {
|
||||
fn from_error_kind(input: Span<'a>, kind: error::ErrorKind) -> Self {
|
||||
let kind = match kind {
|
||||
error::ErrorKind::Eof => ErrorKind::ExpectedEof,
|
||||
kind => ErrorKind::InternalError(kind),
|
||||
};
|
||||
Self { context: input, kind }
|
||||
}
|
||||
|
||||
fn append(_input: Span<'a>, _kind: error::ErrorKind, other: Self) -> Self {
|
||||
other
|
||||
}
|
||||
|
||||
fn from_char(input: Span<'a>, c: char) -> Self {
|
||||
Self { context: input, kind: ErrorKind::Char(c) }
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Display for Error<'a> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let input = self.context.fragment();
|
||||
|
||||
// When printing our error message we want to escape all `\n` to be sure we keep our format with the
|
||||
// first line being the diagnostic and the second line being the incriminated filter.
|
||||
let escaped_input = input.escape_debug();
|
||||
|
||||
match self.kind {
|
||||
ErrorKind::ExpectedValue if input.trim().is_empty() => {
|
||||
writeln!(f, "Was expecting a value but instead got nothing.")?
|
||||
}
|
||||
ErrorKind::MissingClosingDelimiter(c) => {
|
||||
writeln!(f, "Expression `{}` is missing the following closing delimiter: `{}`.", escaped_input, c)?
|
||||
}
|
||||
ErrorKind::ExpectedValue => {
|
||||
writeln!(f, "Was expecting a value but instead got `{}`.", escaped_input)?
|
||||
}
|
||||
ErrorKind::InvalidPrimary if input.trim().is_empty() => {
|
||||
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` but instead got nothing.")?
|
||||
}
|
||||
ErrorKind::InvalidPrimary => {
|
||||
writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` at `{}`.", escaped_input)?
|
||||
}
|
||||
ErrorKind::ExpectedEof => {
|
||||
writeln!(f, "Found unexpected characters at the end of the filter: `{}`. You probably forgot an `OR` or an `AND` rule.", escaped_input)?
|
||||
}
|
||||
ErrorKind::Geo => {
|
||||
writeln!(f, "The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`.")?
|
||||
}
|
||||
ErrorKind::ReservedGeo(name) => {
|
||||
writeln!(f, "`{}` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance) built-in rule to filter on `_geo` coordinates.", name.escape_debug())?
|
||||
}
|
||||
ErrorKind::MisusedGeo => {
|
||||
writeln!(f, "The `_geoRadius` filter is an operation and can't be used as a value.")?
|
||||
}
|
||||
ErrorKind::Char(c) => {
|
||||
panic!("Tried to display a char error with `{}`", c)
|
||||
}
|
||||
ErrorKind::InternalError(kind) => writeln!(
|
||||
f,
|
||||
"Encountered an internal `{:?}` error while parsing your filter. Please fill an issue", kind
|
||||
)?,
|
||||
ErrorKind::External(ref error) => writeln!(f, "{}", error)?,
|
||||
}
|
||||
let base_column = self.context.get_utf8_column();
|
||||
let size = self.context.fragment().chars().count();
|
||||
|
||||
write!(f, "{}:{} {}", base_column, base_column + size, self.context.extra)
|
||||
}
|
||||
}
|
589
filter-parser/src/lib.rs
Normal file
589
filter-parser/src/lib.rs
Normal file
|
@ -0,0 +1,589 @@
|
|||
//! BNF grammar:
|
||||
//!
|
||||
//! ```text
|
||||
//! filter = expression ~ EOF
|
||||
//! expression = or
|
||||
//! or = and (~ "OR" ~ and)
|
||||
//! and = not (~ "AND" not)*
|
||||
//! not = ("NOT" | "!") not | primary
|
||||
//! primary = (WS* ~ "(" expression ")" ~ WS*) | geoRadius | condition | to
|
||||
//! condition = value ("==" | ">" ...) value
|
||||
//! to = value value TO value
|
||||
//! value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS*
|
||||
//! singleQuoted = "'" .* all but quotes "'"
|
||||
//! doubleQuoted = "\"" .* all but double quotes "\""
|
||||
//! word = (alphanumeric | _ | - | .)+
|
||||
//! geoRadius = WS* ~ "_geoRadius(" ~ WS* ~ float ~ WS* ~ "," ~ WS* ~ float ~ WS* ~ "," float ~ WS* ~ ")"
|
||||
//! ```
|
||||
//!
|
||||
//! Other BNF grammar used to handle some specific errors:
|
||||
//! ```text
|
||||
//! geoPoint = WS* ~ "_geoPoint(" ~ (float ~ ",")* ~ ")"
|
||||
//! ```
|
||||
//!
|
||||
//! Specific errors:
|
||||
//! ================
|
||||
//! - If a user try to use a geoPoint, as a primary OR as a value we must throw an error.
|
||||
//! ```text
|
||||
//! field = _geoPoint(12, 13, 14)
|
||||
//! field < 12 AND _geoPoint(1, 2)
|
||||
//! ```
|
||||
//!
|
||||
//! - If a user try to use a geoRadius as a value we must throw an error.
|
||||
//! ```text
|
||||
//! field = _geoRadius(12, 13, 14)
|
||||
//! ```
|
||||
//!
|
||||
|
||||
mod condition;
|
||||
mod error;
|
||||
mod value;
|
||||
|
||||
use std::fmt::Debug;
|
||||
use std::ops::Deref;
|
||||
use std::str::FromStr;
|
||||
|
||||
pub use condition::{parse_condition, parse_to, Condition};
|
||||
use error::{cut_with_err, NomErrorExt};
|
||||
pub use error::{Error, ErrorKind};
|
||||
use nom::branch::alt;
|
||||
use nom::bytes::complete::tag;
|
||||
use nom::character::complete::{char, multispace0};
|
||||
use nom::combinator::{cut, eof, map};
|
||||
use nom::multi::{many0, separated_list1};
|
||||
use nom::number::complete::recognize_float;
|
||||
use nom::sequence::{delimited, preceded, terminated, tuple};
|
||||
use nom::Finish;
|
||||
use nom_locate::LocatedSpan;
|
||||
pub(crate) use value::parse_value;
|
||||
|
||||
pub type Span<'a> = LocatedSpan<&'a str, &'a str>;
|
||||
|
||||
type IResult<'a, Ret> = nom::IResult<Span<'a>, Ret, Error<'a>>;
|
||||
|
||||
#[derive(Debug, Clone, Eq)]
|
||||
pub struct Token<'a>(Span<'a>);
|
||||
|
||||
impl<'a> Deref for Token<'a> {
|
||||
type Target = &'a str;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> PartialEq for Token<'a> {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.0.fragment() == other.0.fragment()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Token<'a> {
|
||||
pub fn new(position: Span<'a>) -> Self {
|
||||
Self(position)
|
||||
}
|
||||
|
||||
pub fn as_external_error(&self, error: impl std::error::Error) -> Error<'a> {
|
||||
Error::new_from_external(self.0, error)
|
||||
}
|
||||
|
||||
pub fn parse<T>(&self) -> Result<T, Error>
|
||||
where
|
||||
T: FromStr,
|
||||
T::Err: std::error::Error,
|
||||
{
|
||||
self.0.parse().map_err(|e| self.as_external_error(e))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> From<Span<'a>> for Token<'a> {
|
||||
fn from(span: Span<'a>) -> Self {
|
||||
Self(span)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum FilterCondition<'a> {
|
||||
Condition { fid: Token<'a>, op: Condition<'a> },
|
||||
Or(Box<Self>, Box<Self>),
|
||||
And(Box<Self>, Box<Self>),
|
||||
GeoLowerThan { point: [Token<'a>; 2], radius: Token<'a> },
|
||||
GeoGreaterThan { point: [Token<'a>; 2], radius: Token<'a> },
|
||||
Empty,
|
||||
}
|
||||
|
||||
impl<'a> FilterCondition<'a> {
|
||||
pub fn negate(self) -> FilterCondition<'a> {
|
||||
use FilterCondition::*;
|
||||
|
||||
match self {
|
||||
Condition { fid, op } => match op.negate() {
|
||||
(op, None) => Condition { fid, op },
|
||||
(a, Some(b)) => Or(
|
||||
Condition { fid: fid.clone(), op: a }.into(),
|
||||
Condition { fid, op: b }.into(),
|
||||
),
|
||||
},
|
||||
Or(a, b) => And(a.negate().into(), b.negate().into()),
|
||||
And(a, b) => Or(a.negate().into(), b.negate().into()),
|
||||
Empty => Empty,
|
||||
GeoLowerThan { point, radius } => GeoGreaterThan { point, radius },
|
||||
GeoGreaterThan { point, radius } => GeoLowerThan { point, radius },
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse(input: &'a str) -> Result<Self, Error> {
|
||||
if input.trim().is_empty() {
|
||||
return Ok(Self::Empty);
|
||||
}
|
||||
let span = Span::new_extra(input, input);
|
||||
parse_filter(span).finish().map(|(_rem, output)| output)
|
||||
}
|
||||
}
|
||||
|
||||
/// remove OPTIONAL whitespaces before AND after the the provided parser.
|
||||
fn ws<'a, O>(inner: impl FnMut(Span<'a>) -> IResult<O>) -> impl FnMut(Span<'a>) -> IResult<O> {
|
||||
delimited(multispace0, inner, multispace0)
|
||||
}
|
||||
|
||||
/// or = and (~ "OR" ~ and)
|
||||
fn parse_or(input: Span) -> IResult<FilterCondition> {
|
||||
let (input, lhs) = parse_and(input)?;
|
||||
// if we found a `OR` then we MUST find something next
|
||||
let (input, ors) = many0(preceded(ws(tag("OR")), cut(parse_and)))(input)?;
|
||||
|
||||
let expr = ors
|
||||
.into_iter()
|
||||
.fold(lhs, |acc, branch| FilterCondition::Or(Box::new(acc), Box::new(branch)));
|
||||
Ok((input, expr))
|
||||
}
|
||||
|
||||
/// and = not (~ "AND" not)*
|
||||
fn parse_and(input: Span) -> IResult<FilterCondition> {
|
||||
let (input, lhs) = parse_not(input)?;
|
||||
// if we found a `AND` then we MUST find something next
|
||||
let (input, ors) = many0(preceded(ws(tag("AND")), cut(parse_not)))(input)?;
|
||||
let expr = ors
|
||||
.into_iter()
|
||||
.fold(lhs, |acc, branch| FilterCondition::And(Box::new(acc), Box::new(branch)));
|
||||
Ok((input, expr))
|
||||
}
|
||||
|
||||
/// not = ("NOT" | "!") not | primary
|
||||
/// We can have multiple consecutive not, eg: `NOT NOT channel = mv`.
|
||||
/// If we parse a `NOT` or `!` we MUST parse something behind.
|
||||
fn parse_not(input: Span) -> IResult<FilterCondition> {
|
||||
alt((map(preceded(alt((tag("!"), tag("NOT"))), cut(parse_not)), |e| e.negate()), parse_primary))(
|
||||
input,
|
||||
)
|
||||
}
|
||||
|
||||
/// geoRadius = WS* ~ "_geoRadius(float ~ "," ~ float ~ "," float)
|
||||
/// If we parse `_geoRadius` we MUST parse the rest of the expression.
|
||||
fn parse_geo_radius(input: Span) -> IResult<FilterCondition> {
|
||||
// we want to forbid space BEFORE the _geoRadius but not after
|
||||
let parsed = preceded(
|
||||
tuple((multispace0, tag("_geoRadius"))),
|
||||
// if we were able to parse `_geoRadius` and can't parse the rest of the input we returns a failure
|
||||
cut(delimited(char('('), separated_list1(tag(","), ws(recognize_float)), char(')'))),
|
||||
)(input)
|
||||
.map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::Geo)));
|
||||
|
||||
let (input, args) = parsed?;
|
||||
|
||||
if args.len() != 3 {
|
||||
return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::Geo)));
|
||||
}
|
||||
|
||||
let res = FilterCondition::GeoLowerThan {
|
||||
point: [args[0].into(), args[1].into()],
|
||||
radius: args[2].into(),
|
||||
};
|
||||
Ok((input, res))
|
||||
}
|
||||
|
||||
/// geoPoint = WS* ~ "_geoPoint(float ~ "," ~ float ~ "," float)
|
||||
fn parse_geo_point(input: Span) -> IResult<FilterCondition> {
|
||||
// we want to forbid space BEFORE the _geoPoint but not after
|
||||
tuple((
|
||||
multispace0,
|
||||
tag("_geoPoint"),
|
||||
// if we were able to parse `_geoPoint` we are going to return a Failure whatever happens next.
|
||||
cut(delimited(char('('), separated_list1(tag(","), ws(|c| recognize_float(c))), char(')'))),
|
||||
))(input)
|
||||
.map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::ReservedGeo("_geoPoint"))))?;
|
||||
// if we succeeded we still returns a Failure because geoPoints are not allowed
|
||||
Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::ReservedGeo("_geoPoint"))))
|
||||
}
|
||||
|
||||
/// primary = (WS* ~ "(" expression ")" ~ WS*) | geoRadius | condition | to
|
||||
fn parse_primary(input: Span) -> IResult<FilterCondition> {
|
||||
alt((
|
||||
// if we find a first parenthesis, then we must parse an expression and find the closing parenthesis
|
||||
delimited(
|
||||
ws(char('(')),
|
||||
cut(parse_expression),
|
||||
cut_with_err(ws(char(')')), |c| {
|
||||
Error::new_from_kind(input, ErrorKind::MissingClosingDelimiter(c.char()))
|
||||
}),
|
||||
),
|
||||
parse_geo_radius,
|
||||
parse_condition,
|
||||
parse_to,
|
||||
// the next lines are only for error handling and are written at the end to have the less possible performance impact
|
||||
parse_geo_point,
|
||||
))(input)
|
||||
// if the inner parsers did not match enough information to return an accurate error
|
||||
.map_err(|e| e.map_err(|_| Error::new_from_kind(input, ErrorKind::InvalidPrimary)))
|
||||
}
|
||||
|
||||
/// expression = or
|
||||
pub fn parse_expression(input: Span) -> IResult<FilterCondition> {
|
||||
parse_or(input)
|
||||
}
|
||||
|
||||
/// filter = expression ~ EOF
|
||||
pub fn parse_filter(input: Span) -> IResult<FilterCondition> {
|
||||
terminated(parse_expression, eof)(input)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod tests {
|
||||
use super::*;
|
||||
|
||||
/// Create a raw [Token]. You must specify the string that appear BEFORE your element followed by your element
|
||||
pub fn rtok<'a>(before: &'a str, value: &'a str) -> Token<'a> {
|
||||
// if the string is empty we still need to return 1 for the line number
|
||||
let lines = before.is_empty().then(|| 1).unwrap_or_else(|| before.lines().count());
|
||||
let offset = before.chars().count();
|
||||
// the extra field is not checked in the tests so we can set it to nothing
|
||||
unsafe { Span::new_from_raw_offset(offset, lines as u32, value, "") }.into()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse() {
|
||||
use FilterCondition as Fc;
|
||||
|
||||
let test_case = [
|
||||
// simple test
|
||||
(
|
||||
"channel = Ponce",
|
||||
Fc::Condition {
|
||||
fid: rtok("", "channel"),
|
||||
op: Condition::Equal(rtok("channel = ", "Ponce")),
|
||||
},
|
||||
),
|
||||
(
|
||||
"subscribers = 12",
|
||||
Fc::Condition {
|
||||
fid: rtok("", "subscribers"),
|
||||
op: Condition::Equal(rtok("subscribers = ", "12")),
|
||||
},
|
||||
),
|
||||
// test all the quotes and simple quotes
|
||||
(
|
||||
"channel = 'Mister Mv'",
|
||||
Fc::Condition {
|
||||
fid: rtok("", "channel"),
|
||||
op: Condition::Equal(rtok("channel = '", "Mister Mv")),
|
||||
},
|
||||
),
|
||||
(
|
||||
"channel = \"Mister Mv\"",
|
||||
Fc::Condition {
|
||||
fid: rtok("", "channel"),
|
||||
op: Condition::Equal(rtok("channel = \"", "Mister Mv")),
|
||||
},
|
||||
),
|
||||
(
|
||||
"'dog race' = Borzoi",
|
||||
Fc::Condition {
|
||||
fid: rtok("'", "dog race"),
|
||||
op: Condition::Equal(rtok("'dog race' = ", "Borzoi")),
|
||||
},
|
||||
),
|
||||
(
|
||||
"\"dog race\" = Chusky",
|
||||
Fc::Condition {
|
||||
fid: rtok("\"", "dog race"),
|
||||
op: Condition::Equal(rtok("\"dog race\" = ", "Chusky")),
|
||||
},
|
||||
),
|
||||
(
|
||||
"\"dog race\" = \"Bernese Mountain\"",
|
||||
Fc::Condition {
|
||||
fid: rtok("\"", "dog race"),
|
||||
op: Condition::Equal(rtok("\"dog race\" = \"", "Bernese Mountain")),
|
||||
},
|
||||
),
|
||||
(
|
||||
"'dog race' = 'Bernese Mountain'",
|
||||
Fc::Condition {
|
||||
fid: rtok("'", "dog race"),
|
||||
op: Condition::Equal(rtok("'dog race' = '", "Bernese Mountain")),
|
||||
},
|
||||
),
|
||||
(
|
||||
"\"dog race\" = 'Bernese Mountain'",
|
||||
Fc::Condition {
|
||||
fid: rtok("\"", "dog race"),
|
||||
op: Condition::Equal(rtok("\"dog race\" = \"", "Bernese Mountain")),
|
||||
},
|
||||
),
|
||||
// test all the operators
|
||||
(
|
||||
"channel != ponce",
|
||||
Fc::Condition {
|
||||
fid: rtok("", "channel"),
|
||||
op: Condition::NotEqual(rtok("channel != ", "ponce")),
|
||||
},
|
||||
),
|
||||
(
|
||||
"NOT channel = ponce",
|
||||
Fc::Condition {
|
||||
fid: rtok("NOT ", "channel"),
|
||||
op: Condition::NotEqual(rtok("NOT channel = ", "ponce")),
|
||||
},
|
||||
),
|
||||
(
|
||||
"subscribers < 1000",
|
||||
Fc::Condition {
|
||||
fid: rtok("", "subscribers"),
|
||||
op: Condition::LowerThan(rtok("subscribers < ", "1000")),
|
||||
},
|
||||
),
|
||||
(
|
||||
"subscribers > 1000",
|
||||
Fc::Condition {
|
||||
fid: rtok("", "subscribers"),
|
||||
op: Condition::GreaterThan(rtok("subscribers > ", "1000")),
|
||||
},
|
||||
),
|
||||
(
|
||||
"subscribers <= 1000",
|
||||
Fc::Condition {
|
||||
fid: rtok("", "subscribers"),
|
||||
op: Condition::LowerThanOrEqual(rtok("subscribers <= ", "1000")),
|
||||
},
|
||||
),
|
||||
(
|
||||
"subscribers >= 1000",
|
||||
Fc::Condition {
|
||||
fid: rtok("", "subscribers"),
|
||||
op: Condition::GreaterThanOrEqual(rtok("subscribers >= ", "1000")),
|
||||
},
|
||||
),
|
||||
(
|
||||
"NOT subscribers < 1000",
|
||||
Fc::Condition {
|
||||
fid: rtok("NOT ", "subscribers"),
|
||||
op: Condition::GreaterThanOrEqual(rtok("NOT subscribers < ", "1000")),
|
||||
},
|
||||
),
|
||||
(
|
||||
"NOT subscribers > 1000",
|
||||
Fc::Condition {
|
||||
fid: rtok("NOT ", "subscribers"),
|
||||
op: Condition::LowerThanOrEqual(rtok("NOT subscribers > ", "1000")),
|
||||
},
|
||||
),
|
||||
(
|
||||
"NOT subscribers <= 1000",
|
||||
Fc::Condition {
|
||||
fid: rtok("NOT ", "subscribers"),
|
||||
op: Condition::GreaterThan(rtok("NOT subscribers <= ", "1000")),
|
||||
},
|
||||
),
|
||||
(
|
||||
"NOT subscribers >= 1000",
|
||||
Fc::Condition {
|
||||
fid: rtok("NOT ", "subscribers"),
|
||||
op: Condition::LowerThan(rtok("NOT subscribers >= ", "1000")),
|
||||
},
|
||||
),
|
||||
(
|
||||
"subscribers 100 TO 1000",
|
||||
Fc::Condition {
|
||||
fid: rtok("", "subscribers"),
|
||||
op: Condition::Between {
|
||||
from: rtok("subscribers ", "100"),
|
||||
to: rtok("subscribers 100 TO ", "1000"),
|
||||
},
|
||||
},
|
||||
),
|
||||
(
|
||||
"NOT subscribers 100 TO 1000",
|
||||
Fc::Or(
|
||||
Fc::Condition {
|
||||
fid: rtok("NOT ", "subscribers"),
|
||||
op: Condition::LowerThan(rtok("NOT subscribers ", "100")),
|
||||
}
|
||||
.into(),
|
||||
Fc::Condition {
|
||||
fid: rtok("NOT ", "subscribers"),
|
||||
op: Condition::GreaterThan(rtok("NOT subscribers 100 TO ", "1000")),
|
||||
}
|
||||
.into(),
|
||||
),
|
||||
),
|
||||
(
|
||||
"_geoRadius(12, 13, 14)",
|
||||
Fc::GeoLowerThan {
|
||||
point: [rtok("_geoRadius(", "12"), rtok("_geoRadius(12, ", "13")],
|
||||
radius: rtok("_geoRadius(12, 13, ", "14"),
|
||||
},
|
||||
),
|
||||
(
|
||||
"NOT _geoRadius(12, 13, 14)",
|
||||
Fc::GeoGreaterThan {
|
||||
point: [rtok("NOT _geoRadius(", "12"), rtok("NOT _geoRadius(12, ", "13")],
|
||||
radius: rtok("NOT _geoRadius(12, 13, ", "14"),
|
||||
},
|
||||
),
|
||||
// test simple `or` and `and`
|
||||
(
|
||||
"channel = ponce AND 'dog race' != 'bernese mountain'",
|
||||
Fc::And(
|
||||
Fc::Condition {
|
||||
fid: rtok("", "channel"),
|
||||
op: Condition::Equal(rtok("channel = ", "ponce")),
|
||||
}
|
||||
.into(),
|
||||
Fc::Condition {
|
||||
fid: rtok("channel = ponce AND '", "dog race"),
|
||||
op: Condition::NotEqual(rtok(
|
||||
"channel = ponce AND 'dog race' != '",
|
||||
"bernese mountain",
|
||||
)),
|
||||
}
|
||||
.into(),
|
||||
),
|
||||
),
|
||||
(
|
||||
"channel = ponce OR 'dog race' != 'bernese mountain'",
|
||||
Fc::Or(
|
||||
Fc::Condition {
|
||||
fid: rtok("", "channel"),
|
||||
op: Condition::Equal(rtok("channel = ", "ponce")),
|
||||
}
|
||||
.into(),
|
||||
Fc::Condition {
|
||||
fid: rtok("channel = ponce OR '", "dog race"),
|
||||
op: Condition::NotEqual(rtok(
|
||||
"channel = ponce OR 'dog race' != '",
|
||||
"bernese mountain",
|
||||
)),
|
||||
}
|
||||
.into(),
|
||||
),
|
||||
),
|
||||
(
|
||||
"channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000",
|
||||
Fc::Or(
|
||||
Fc::And(
|
||||
Fc::Condition {
|
||||
fid: rtok("", "channel"),
|
||||
op: Condition::Equal(rtok("channel = ", "ponce")),
|
||||
}
|
||||
.into(),
|
||||
Fc::Condition {
|
||||
fid: rtok("channel = ponce AND '", "dog race"),
|
||||
op: Condition::NotEqual(rtok(
|
||||
"channel = ponce AND 'dog race' != '",
|
||||
"bernese mountain",
|
||||
)),
|
||||
}
|
||||
.into(),
|
||||
)
|
||||
.into(),
|
||||
Fc::Condition {
|
||||
fid: rtok(
|
||||
"channel = ponce AND 'dog race' != 'bernese mountain' OR ",
|
||||
"subscribers",
|
||||
),
|
||||
op: Condition::GreaterThan(rtok(
|
||||
"channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > ",
|
||||
"1000",
|
||||
)),
|
||||
}
|
||||
.into(),
|
||||
),
|
||||
),
|
||||
// test parenthesis
|
||||
(
|
||||
"channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > 1000 )",
|
||||
Fc::And(
|
||||
Fc::Condition { fid: rtok("", "channel"), op: Condition::Equal(rtok("channel = ", "ponce")) }.into(),
|
||||
Fc::Or(
|
||||
Fc::Condition { fid: rtok("channel = ponce AND ( '", "dog race"), op: Condition::NotEqual(rtok("channel = ponce AND ( 'dog race' != '", "bernese mountain"))}.into(),
|
||||
Fc::Condition { fid: rtok("channel = ponce AND ( 'dog race' != 'bernese mountain' OR ", "subscribers"), op: Condition::GreaterThan(rtok("channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > ", "1000")) }.into(),
|
||||
).into()),
|
||||
),
|
||||
(
|
||||
"(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, 14)",
|
||||
Fc::And(
|
||||
Fc::Or(
|
||||
Fc::And(
|
||||
Fc::Condition { fid: rtok("(", "channel"), op: Condition::Equal(rtok("(channel = ", "ponce")) }.into(),
|
||||
Fc::Condition { fid: rtok("(channel = ponce AND '", "dog race"), op: Condition::NotEqual(rtok("(channel = ponce AND 'dog race' != '", "bernese mountain")) }.into(),
|
||||
).into(),
|
||||
Fc::Condition { fid: rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR ", "subscribers"), op: Condition::GreaterThan(rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > ", "1000")) }.into(),
|
||||
).into(),
|
||||
Fc::GeoLowerThan { point: [rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(", "12"), rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, ", "13")], radius: rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, ", "14") }.into()
|
||||
)
|
||||
)
|
||||
];
|
||||
|
||||
for (input, expected) in test_case {
|
||||
let result = Fc::parse(input);
|
||||
|
||||
assert!(
|
||||
result.is_ok(),
|
||||
"Filter `{:?}` was supposed to be parsed but failed with the following error: `{}`",
|
||||
expected,
|
||||
result.unwrap_err()
|
||||
);
|
||||
let filter = result.unwrap();
|
||||
assert_eq!(filter, expected, "Filter `{}` failed.", input);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn error() {
|
||||
use FilterCondition as Fc;
|
||||
|
||||
let test_case = [
|
||||
// simple test
|
||||
("channel = Ponce = 12", "Found unexpected characters at the end of the filter: `= 12`. You probably forgot an `OR` or an `AND` rule."),
|
||||
("channel = ", "Was expecting a value but instead got nothing."),
|
||||
("channel = 🐻", "Was expecting a value but instead got `🐻`."),
|
||||
("channel = 🐻 AND followers < 100", "Was expecting a value but instead got `🐻`."),
|
||||
("OR", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` at `OR`."),
|
||||
("AND", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` at `AND`."),
|
||||
("channel Ponce", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` at `channel Ponce`."),
|
||||
("channel = Ponce OR", "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `TO` or `_geoRadius` but instead got nothing."),
|
||||
("_geoRadius", "The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`."),
|
||||
("_geoRadius = 12", "The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`."),
|
||||
("_geoPoint(12, 13, 14)", "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance) built-in rule to filter on `_geo` coordinates."),
|
||||
("position <= _geoPoint(12, 13, 14)", "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance) built-in rule to filter on `_geo` coordinates."),
|
||||
("position <= _geoRadius(12, 13, 14)", "The `_geoRadius` filter is an operation and can't be used as a value."),
|
||||
("channel = 'ponce", "Expression `'ponce` is missing the following closing delimiter: `'`."),
|
||||
("channel = \"ponce", "Expression `\"ponce` is missing the following closing delimiter: `\"`."),
|
||||
("channel = mv OR (followers >= 1000", "Expression `(followers >= 1000` is missing the following closing delimiter: `)`."),
|
||||
("channel = mv OR followers >= 1000)", "Found unexpected characters at the end of the filter: `)`. You probably forgot an `OR` or an `AND` rule."),
|
||||
];
|
||||
|
||||
for (input, expected) in test_case {
|
||||
let result = Fc::parse(input);
|
||||
|
||||
assert!(
|
||||
result.is_err(),
|
||||
"Filter `{}` wasn't supposed to be parsed but it did with the following result: `{:?}`",
|
||||
input,
|
||||
result.unwrap()
|
||||
);
|
||||
let filter = result.unwrap_err().to_string();
|
||||
assert!(filter.starts_with(expected), "Filter `{:?}` was supposed to return the following error:\n{}\n, but instead returned\n{}\n.", input, expected, filter);
|
||||
}
|
||||
}
|
||||
}
|
16
filter-parser/src/main.rs
Normal file
16
filter-parser/src/main.rs
Normal file
|
@ -0,0 +1,16 @@
|
|||
fn main() {
|
||||
let input = std::env::args().nth(1).expect("You must provide a filter to test");
|
||||
|
||||
println!("Trying to execute the following filter:\n{}\n", input);
|
||||
|
||||
match filter_parser::FilterCondition::parse(&input) {
|
||||
Ok(filter) => {
|
||||
println!("✅ Valid filter");
|
||||
println!("{:#?}", filter);
|
||||
}
|
||||
Err(e) => {
|
||||
println!("❎ Invalid filter");
|
||||
println!("{}", e.to_string());
|
||||
}
|
||||
}
|
||||
}
|
147
filter-parser/src/value.rs
Normal file
147
filter-parser/src/value.rs
Normal file
|
@ -0,0 +1,147 @@
|
|||
use nom::branch::alt;
|
||||
use nom::bytes::complete::{take_till, take_while, take_while1};
|
||||
use nom::character::complete::{char, multispace0};
|
||||
use nom::combinator::cut;
|
||||
use nom::sequence::{delimited, terminated};
|
||||
|
||||
use crate::error::NomErrorExt;
|
||||
use crate::{parse_geo_point, parse_geo_radius, Error, ErrorKind, IResult, Span, Token};
|
||||
|
||||
/// value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS*
|
||||
pub fn parse_value(input: Span) -> IResult<Token> {
|
||||
// to get better diagnostic message we are going to strip the left whitespaces from the input right now
|
||||
let (input, _) = take_while(char::is_whitespace)(input)?;
|
||||
|
||||
// then, we want to check if the user is misusing a geo expression
|
||||
// This expression can’t finish without error.
|
||||
// We want to return an error in case of failure.
|
||||
if let Err(err) = parse_geo_point(input) {
|
||||
if err.is_failure() {
|
||||
return Err(err);
|
||||
}
|
||||
}
|
||||
match parse_geo_radius(input) {
|
||||
Ok(_) => return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::MisusedGeo))),
|
||||
// if we encountered a failure it means the user badly wrote a _geoRadius filter.
|
||||
// But instead of showing him how to fix his syntax we are going to tell him he should not use this filter as a value.
|
||||
Err(e) if e.is_failure() => {
|
||||
return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::MisusedGeo)))
|
||||
}
|
||||
_ => (),
|
||||
}
|
||||
|
||||
// singleQuoted = "'" .* all but quotes "'"
|
||||
let simple_quoted = take_till(|c: char| c == '\'');
|
||||
// doubleQuoted = "\"" (word | spaces)* "\""
|
||||
let double_quoted = take_till(|c: char| c == '"');
|
||||
// word = (alphanumeric | _ | - | .)+
|
||||
let word = take_while1(is_value_component);
|
||||
|
||||
// this parser is only used when an error is encountered and it parse the
|
||||
// largest string possible that do not contain any “language” syntax.
|
||||
// If we try to parse `name = 🦀 AND language = rust` we want to return an
|
||||
// error saying we could not parse `🦀`. Not that no value were found or that
|
||||
// we could note parse `🦀 AND language = rust`.
|
||||
// we want to remove the space before entering the alt because if we don't,
|
||||
// when we create the errors from the output of the alt we have spaces everywhere
|
||||
let error_word = take_till::<_, _, Error>(is_syntax_component);
|
||||
|
||||
terminated(
|
||||
alt((
|
||||
delimited(char('\''), cut(simple_quoted), cut(char('\''))),
|
||||
delimited(char('"'), cut(double_quoted), cut(char('"'))),
|
||||
word,
|
||||
)),
|
||||
multispace0,
|
||||
)(input)
|
||||
.map(|(s, t)| (s, t.into()))
|
||||
// if we found nothing in the alt it means the user specified something that was not recognized as a value
|
||||
.map_err(|e: nom::Err<Error>| {
|
||||
e.map_err(|_| Error::new_from_kind(error_word(input).unwrap().1, ErrorKind::ExpectedValue))
|
||||
})
|
||||
// if we found encountered a failure it means the user really tried to input a value, but had an unmatched quote
|
||||
.map_err(|e| {
|
||||
e.map_fail(|c| Error::new_from_kind(input, ErrorKind::MissingClosingDelimiter(c.char())))
|
||||
})
|
||||
}
|
||||
|
||||
fn is_value_component(c: char) -> bool {
|
||||
c.is_alphanumeric() || ['_', '-', '.'].contains(&c)
|
||||
}
|
||||
|
||||
fn is_syntax_component(c: char) -> bool {
|
||||
c.is_whitespace() || ['(', ')', '=', '<', '>', '!'].contains(&c)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod test {
|
||||
use nom::Finish;
|
||||
|
||||
use super::*;
|
||||
use crate::tests::rtok;
|
||||
|
||||
#[test]
|
||||
fn name() {
|
||||
let test_case = [
|
||||
("channel", rtok("", "channel")),
|
||||
(".private", rtok("", ".private")),
|
||||
("I-love-kebab", rtok("", "I-love-kebab")),
|
||||
("but_snakes_is_also_good", rtok("", "but_snakes_is_also_good")),
|
||||
("parens(", rtok("", "parens")),
|
||||
("parens)", rtok("", "parens")),
|
||||
("not!", rtok("", "not")),
|
||||
(" channel", rtok(" ", "channel")),
|
||||
("channel ", rtok("", "channel")),
|
||||
(" channel ", rtok(" ", "channel")),
|
||||
("'channel'", rtok("'", "channel")),
|
||||
("\"channel\"", rtok("\"", "channel")),
|
||||
("'cha)nnel'", rtok("'", "cha)nnel")),
|
||||
("'cha\"nnel'", rtok("'", "cha\"nnel")),
|
||||
("\"cha'nnel\"", rtok("\"", "cha'nnel")),
|
||||
("\" some spaces \"", rtok("\"", " some spaces ")),
|
||||
("\"cha'nnel\"", rtok("'", "cha'nnel")),
|
||||
("\"cha'nnel\"", rtok("'", "cha'nnel")),
|
||||
("I'm tamo", rtok("'m tamo", "I")),
|
||||
];
|
||||
|
||||
for (input, expected) in test_case {
|
||||
let input = Span::new_extra(input, input);
|
||||
let result = parse_value(input);
|
||||
|
||||
assert!(
|
||||
result.is_ok(),
|
||||
"Filter `{:?}` was supposed to be parsed but failed with the following error: `{}`",
|
||||
expected,
|
||||
result.unwrap_err()
|
||||
);
|
||||
let value = result.unwrap().1;
|
||||
assert_eq!(value, expected, "Filter `{}` failed.", input);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn diagnostic() {
|
||||
let test_case = [
|
||||
("🦀", "🦀"),
|
||||
(" 🦀", "🦀"),
|
||||
("🦀 AND crab = truc", "🦀"),
|
||||
("🦀_in_name", "🦀_in_name"),
|
||||
(" (name = ...", ""),
|
||||
];
|
||||
|
||||
for (input, expected) in test_case {
|
||||
let input = Span::new_extra(input, input);
|
||||
let result = parse_value(input);
|
||||
|
||||
assert!(
|
||||
result.is_err(),
|
||||
"Filter `{}` wasn’t supposed to be parsed but it did with the following result: `{:?}`",
|
||||
expected,
|
||||
result.unwrap()
|
||||
);
|
||||
// get the inner string referenced in the error
|
||||
let value = *result.finish().unwrap_err().context().fragment();
|
||||
assert_eq!(value, expected, "Filter `{}` was supposed to fail with the following value: `{}`, but it failed with: `{}`.", input, expected, value);
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue