diff --git a/filter_parser/Cargo.toml b/filter_parser/Cargo.toml index d8a522b1b..80767d5c4 100644 --- a/filter_parser/Cargo.toml +++ b/filter_parser/Cargo.toml @@ -8,8 +8,3 @@ edition = "2021" [dependencies] nom = "7.0.0" nom_locate = "4.0.0" - -[dev-dependencies] -big_s = "1.0.2" -maplit = "1.0.2" -rand = "0.8.3" diff --git a/filter_parser/src/condition.rs b/filter_parser/src/condition.rs new file mode 100644 index 000000000..5a1bb62be --- /dev/null +++ b/filter_parser/src/condition.rs @@ -0,0 +1,94 @@ +//! BNF grammar: +//! +//! ```text +//! condition = value ("==" | ">" ...) value +//! to = value value TO value +//! value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS* +//! singleQuoted = "'" .* all but quotes "'" +//! doubleQuoted = "\"" (word | spaces)* "\"" +//! word = (alphanumeric | _ | - | .)+ +//! geoRadius = WS* ~ "_geoRadius(float ~ "," ~ float ~ "," float) +//! ``` + +use nom::branch::alt; +use nom::bytes::complete::tag; +use nom::sequence::tuple; +use nom::IResult; +use Condition::*; + +use crate::{parse_value, ws, FilterCondition, Span, Token}; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Condition<'a> { + GreaterThan(Token<'a>), + GreaterThanOrEqual(Token<'a>), + Equal(Token<'a>), + NotEqual(Token<'a>), + LowerThan(Token<'a>), + LowerThanOrEqual(Token<'a>), + Between { from: Token<'a>, to: Token<'a> }, +} + +impl<'a> Condition<'a> { + /// This method can return two operations in case it must express + /// an OR operation for the between case (i.e. `TO`). + pub fn negate(self) -> (Self, Option) { + match self { + GreaterThan(n) => (LowerThanOrEqual(n), None), + GreaterThanOrEqual(n) => (LowerThan(n), None), + Equal(s) => (NotEqual(s), None), + NotEqual(s) => (Equal(s), None), + LowerThan(n) => (GreaterThanOrEqual(n), None), + LowerThanOrEqual(n) => (GreaterThan(n), None), + Between { from, to } => (LowerThan(from), Some(GreaterThan(to))), + } + } +} + +/// condition = value ("==" | ">" ...) value +pub fn parse_condition(input: Span) -> IResult { + let operator = alt((tag("<="), tag(">="), tag("!="), tag("<"), tag(">"), tag("="))); + let (input, (key, op, value)) = + tuple((|c| parse_value(c), operator, |c| parse_value(c)))(input)?; + + let fid = key; + + // TODO + match *op.fragment() { + "=" => { + let k = FilterCondition::Condition { fid, op: Equal(value) }; + Ok((input, k)) + } + "!=" => { + let k = FilterCondition::Condition { fid, op: NotEqual(value) }; + Ok((input, k)) + } + ">" | "<" | "<=" | ">=" => { + let k = match *op.fragment() { + ">" => FilterCondition::Condition { fid, op: GreaterThan(value) }, + "<" => FilterCondition::Condition { fid, op: LowerThan(value) }, + "<=" => FilterCondition::Condition { fid, op: LowerThanOrEqual(value) }, + ">=" => FilterCondition::Condition { fid, op: GreaterThanOrEqual(value) }, + _ => unreachable!(), + }; + Ok((input, k)) + } + _ => unreachable!(), + } +} + +/// to = value value TO value +pub fn parse_to(input: Span) -> IResult { + let (input, (key, from, _, to)) = + tuple((ws(|c| parse_value(c)), ws(|c| parse_value(c)), tag("TO"), ws(|c| parse_value(c))))( + input, + )?; + + Ok(( + input, + FilterCondition::Condition { + fid: key.into(), + op: Between { from: from.into(), to: to.into() }, + }, + )) +} diff --git a/filter_parser/src/lib.rs b/filter_parser/src/lib.rs index 6e6f5a1e6..096a9e26e 100644 --- a/filter_parser/src/lib.rs +++ b/filter_parser/src/lib.rs @@ -1,4 +1,3 @@ -#![allow(unused_imports)] //! BNF grammar: //! //! ```text @@ -7,8 +6,8 @@ //! and = not (~ "AND" not)* //! not = ("NOT" | "!") not | primary //! primary = (WS* ~ "(" expression ")" ~ WS*) | condition | to | geoRadius -//! to = value value TO value //! condition = value ("==" | ">" ...) value +//! to = value value TO value //! value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS* //! singleQuoted = "'" .* all but quotes "'" //! doubleQuoted = "\"" (word | spaces)* "\"" @@ -16,61 +15,24 @@ //! geoRadius = WS* ~ "_geoRadius(float ~ "," ~ float ~ "," float) //! ``` -#[derive(Debug, Clone, PartialEq, Eq)] -pub enum FilterCondition<'a> { - Operator { fid: Token<'a>, op: Operator<'a> }, - Or(Box, Box), - And(Box, Box), - GeoLowerThan { point: [Token<'a>; 2], radius: Token<'a> }, - GeoGreaterThan { point: [Token<'a>; 2], radius: Token<'a> }, - Empty, -} - -impl<'a> FilterCondition<'a> { - pub fn negate(self) -> FilterCondition<'a> { - use FilterCondition::*; - - match self { - Operator { fid, op } => match op.negate() { - (op, None) => Operator { fid, op }, - (a, Some(b)) => { - Or(Operator { fid: fid.clone(), op: a }.into(), Operator { fid, op: b }.into()) - } - }, - Or(a, b) => And(a.negate().into(), b.negate().into()), - And(a, b) => Or(a.negate().into(), b.negate().into()), - Empty => Empty, - GeoLowerThan { point, radius } => GeoGreaterThan { point, radius }, - GeoGreaterThan { point, radius } => GeoLowerThan { point, radius }, - } - } - - pub fn parse(input: &'a str) -> IResult { - let span = Span::new(input); - parse_expression(span) - } -} - -use std::collections::HashSet; +mod condition; +mod value; use std::fmt::Debug; -use std::result::Result as StdResult; +pub use condition::{parse_condition, parse_to, Condition}; use nom::branch::alt; -use nom::bytes::complete::{tag, take_till, take_while1}; +use nom::bytes::complete::tag; use nom::character::complete::{char, multispace0}; use nom::combinator::map; -use nom::error::{ContextError, ErrorKind, ParseError, VerboseError}; +use nom::error::{ContextError, ParseError}; use nom::multi::{many0, separated_list1}; use nom::number::complete::recognize_float; -use nom::sequence::{delimited, preceded, tuple}; +use nom::sequence::{delimited, preceded}; use nom::IResult; use nom_locate::LocatedSpan; +pub(crate) use value::parse_value; -use self::Operator::*; - -pub enum FilterError { - AttributeNotFilterable(String), -} +type Span<'a> = LocatedSpan<&'a str>; #[derive(Debug, Clone, PartialEq, Eq)] pub struct Token<'a> { @@ -90,42 +52,49 @@ impl<'a> From> for Token<'a> { } } -type Span<'a> = LocatedSpan<&'a str>; - #[derive(Debug, Clone, PartialEq, Eq)] -pub enum Operator<'a> { - GreaterThan(Token<'a>), - GreaterThanOrEqual(Token<'a>), - Equal(Token<'a>), - NotEqual(Token<'a>), - LowerThan(Token<'a>), - LowerThanOrEqual(Token<'a>), - Between { from: Token<'a>, to: Token<'a> }, +pub enum FilterCondition<'a> { + Condition { fid: Token<'a>, op: Condition<'a> }, + Or(Box, Box), + And(Box, Box), + GeoLowerThan { point: [Token<'a>; 2], radius: Token<'a> }, + GeoGreaterThan { point: [Token<'a>; 2], radius: Token<'a> }, + Empty, } -impl<'a> Operator<'a> { - /// This method can return two operations in case it must express - /// an OR operation for the between case (i.e. `TO`). - pub fn negate(self) -> (Self, Option) { +impl<'a> FilterCondition<'a> { + pub fn negate(self) -> FilterCondition<'a> { + use FilterCondition::*; + match self { - GreaterThan(n) => (LowerThanOrEqual(n), None), - GreaterThanOrEqual(n) => (LowerThan(n), None), - Equal(s) => (NotEqual(s), None), - NotEqual(s) => (Equal(s), None), - LowerThan(n) => (GreaterThanOrEqual(n), None), - LowerThanOrEqual(n) => (GreaterThan(n), None), - Between { from, to } => (LowerThan(from), Some(GreaterThan(to))), + Condition { fid, op } => match op.negate() { + (op, None) => Condition { fid, op }, + (a, Some(b)) => Or( + Condition { fid: fid.clone(), op: a }.into(), + Condition { fid, op: b }.into(), + ), + }, + Or(a, b) => And(a.negate().into(), b.negate().into()), + And(a, b) => Or(a.negate().into(), b.negate().into()), + Empty => Empty, + GeoLowerThan { point, radius } => GeoGreaterThan { point, radius }, + GeoGreaterThan { point, radius } => GeoLowerThan { point, radius }, } } + + pub fn parse(input: &'a str) -> IResult { + let span = Span::new(input); + parse_expression(span) + } } -pub trait FilterParserError<'a>: - nom::error::ParseError<&'a str> + ContextError<&'a str> + std::fmt::Debug -{ +// remove OPTIONAL whitespaces before AND after the the provided parser +fn ws<'a, O>( + inner: impl FnMut(Span<'a>) -> IResult, +) -> impl FnMut(Span<'a>) -> IResult { + delimited(multispace0, inner, multispace0) } -impl<'a> FilterParserError<'a> for VerboseError<&'a str> {} - /// and = not (~ "AND" not)* fn parse_or(input: Span) -> IResult { let (input, lhs) = parse_and(input)?; @@ -153,60 +122,6 @@ fn parse_not(input: Span) -> IResult { }))(input) } -fn ws<'a, O>( - inner: impl FnMut(Span<'a>) -> IResult, -) -> impl FnMut(Span<'a>) -> IResult { - delimited(multispace0, inner, multispace0) -} - -/// condition = value ("==" | ">" ...) value -fn parse_condition(input: Span) -> IResult { - let operator = alt((tag("<="), tag(">="), tag("!="), tag("<"), tag(">"), tag("="))); - let (input, (key, op, value)) = - tuple((|c| parse_value(c), operator, |c| parse_value(c)))(input)?; - - let fid = key.into(); - - // TODO - match *op.fragment() { - "=" => { - let k = FilterCondition::Operator { fid, op: Equal(value.into()) }; - Ok((input, k)) - } - "!=" => { - let k = FilterCondition::Operator { fid, op: NotEqual(value.into()) }; - Ok((input, k)) - } - ">" | "<" | "<=" | ">=" => { - let k = match *op.fragment() { - ">" => FilterCondition::Operator { fid, op: GreaterThan(value.into()) }, - "<" => FilterCondition::Operator { fid, op: LowerThan(value.into()) }, - "<=" => FilterCondition::Operator { fid, op: LowerThanOrEqual(value.into()) }, - ">=" => FilterCondition::Operator { fid, op: GreaterThanOrEqual(value.into()) }, - _ => unreachable!(), - }; - Ok((input, k)) - } - _ => unreachable!(), - } -} - -/// to = value value TO value -fn parse_to(input: Span) -> IResult { - let (input, (key, from, _, to)) = - tuple((ws(|c| parse_value(c)), ws(|c| parse_value(c)), tag("TO"), ws(|c| parse_value(c))))( - input, - )?; - - Ok(( - input, - FilterCondition::Operator { - fid: key.into(), - op: Between { from: from.into(), to: to.into() }, - }, - )) -} - /// geoRadius = WS* ~ "_geoRadius(float ~ "," ~ float ~ "," float) fn parse_geo_radius(input: Span) -> IResult { let err_msg_args_incomplete = "_geoRadius. The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`"; @@ -262,40 +177,17 @@ fn parse_primary(input: Span) -> IResult { ))(input) } -/// value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS* -fn parse_value(input: Span) -> IResult { - // singleQuoted = "'" .* all but quotes "'" - let simple_quoted_key = |input| take_till(|c: char| c == '\'')(input); - // doubleQuoted = "\"" (word | spaces)* "\"" - let quoted_key = |input| take_till(|c: char| c == '"')(input); - // word = (alphanumeric | _ | - | .)+ - let word = |input| take_while1(is_key_component)(input); - - alt(( - ws(delimited(char('\''), simple_quoted_key, char('\''))), - ws(delimited(char('"'), quoted_key, char('"'))), - ws(word), - ))(input) -} - -fn is_key_component(c: char) -> bool { - c.is_alphanumeric() || ['_', '-', '.'].contains(&c) -} - /// expression = or pub fn parse_expression(input: Span) -> IResult { parse_or(input) } #[cfg(test)] -mod tests { - use big_s::S; - use maplit::hashset; - +pub mod tests { use super::*; /// Create a raw [Token]. You must specify the string that appear BEFORE your element followed by your element - fn rtok<'a>(before: &'a str, value: &'a str) -> Token<'a> { + pub fn rtok<'a>(before: &'a str, value: &'a str) -> Token<'a> { // if the string is empty we still need to return 1 for the line number let lines = before.is_empty().then(|| 1).unwrap_or_else(|| before.lines().count()); let offset = before.chars().count(); @@ -306,149 +198,148 @@ mod tests { fn parse() { use FilterCondition as Fc; - // new_from_raw_offset is unsafe let test_case = [ // simple test ( "channel = Ponce", - Fc::Operator { + Fc::Condition { fid: rtok("", "channel"), - op: Operator::Equal(rtok("channel = ", "Ponce")), + op: Condition::Equal(rtok("channel = ", "Ponce")), }, ), ( "subscribers = 12", - Fc::Operator { + Fc::Condition { fid: rtok("", "subscribers"), - op: Operator::Equal(rtok("subscribers = ", "12")), + op: Condition::Equal(rtok("subscribers = ", "12")), }, ), // test all the quotes and simple quotes ( "channel = 'Mister Mv'", - Fc::Operator { + Fc::Condition { fid: rtok("", "channel"), - op: Operator::Equal(rtok("channel = '", "Mister Mv")), + op: Condition::Equal(rtok("channel = '", "Mister Mv")), }, ), ( "channel = \"Mister Mv\"", - Fc::Operator { + Fc::Condition { fid: rtok("", "channel"), - op: Operator::Equal(rtok("channel = \"", "Mister Mv")), + op: Condition::Equal(rtok("channel = \"", "Mister Mv")), }, ), ( "'dog race' = Borzoi", - Fc::Operator { + Fc::Condition { fid: rtok("'", "dog race"), - op: Operator::Equal(rtok("'dog race' = ", "Borzoi")), + op: Condition::Equal(rtok("'dog race' = ", "Borzoi")), }, ), ( "\"dog race\" = Chusky", - Fc::Operator { + Fc::Condition { fid: rtok("\"", "dog race"), - op: Operator::Equal(rtok("\"dog race\" = ", "Chusky")), + op: Condition::Equal(rtok("\"dog race\" = ", "Chusky")), }, ), ( "\"dog race\" = \"Bernese Mountain\"", - Fc::Operator { + Fc::Condition { fid: rtok("\"", "dog race"), - op: Operator::Equal(rtok("\"dog race\" = \"", "Bernese Mountain")), + op: Condition::Equal(rtok("\"dog race\" = \"", "Bernese Mountain")), }, ), ( "'dog race' = 'Bernese Mountain'", - Fc::Operator { + Fc::Condition { fid: rtok("'", "dog race"), - op: Operator::Equal(rtok("'dog race' = '", "Bernese Mountain")), + op: Condition::Equal(rtok("'dog race' = '", "Bernese Mountain")), }, ), ( "\"dog race\" = 'Bernese Mountain'", - Fc::Operator { + Fc::Condition { fid: rtok("\"", "dog race"), - op: Operator::Equal(rtok("\"dog race\" = \"", "Bernese Mountain")), + op: Condition::Equal(rtok("\"dog race\" = \"", "Bernese Mountain")), }, ), // test all the operators ( "channel != ponce", - Fc::Operator { + Fc::Condition { fid: rtok("", "channel"), - op: Operator::NotEqual(rtok("channel != ", "ponce")), + op: Condition::NotEqual(rtok("channel != ", "ponce")), }, ), ( "NOT channel = ponce", - Fc::Operator { + Fc::Condition { fid: rtok("NOT ", "channel"), - op: Operator::NotEqual(rtok("NOT channel = ", "ponce")), + op: Condition::NotEqual(rtok("NOT channel = ", "ponce")), }, ), ( "subscribers < 1000", - Fc::Operator { + Fc::Condition { fid: rtok("", "subscribers"), - op: Operator::LowerThan(rtok("subscribers < ", "1000")), + op: Condition::LowerThan(rtok("subscribers < ", "1000")), }, ), ( "subscribers > 1000", - Fc::Operator { + Fc::Condition { fid: rtok("", "subscribers"), - op: Operator::GreaterThan(rtok("subscribers > ", "1000")), + op: Condition::GreaterThan(rtok("subscribers > ", "1000")), }, ), ( "subscribers <= 1000", - Fc::Operator { + Fc::Condition { fid: rtok("", "subscribers"), - op: Operator::LowerThanOrEqual(rtok("subscribers <= ", "1000")), + op: Condition::LowerThanOrEqual(rtok("subscribers <= ", "1000")), }, ), ( "subscribers >= 1000", - Fc::Operator { + Fc::Condition { fid: rtok("", "subscribers"), - op: Operator::GreaterThanOrEqual(rtok("subscribers >= ", "1000")), + op: Condition::GreaterThanOrEqual(rtok("subscribers >= ", "1000")), }, ), ( "NOT subscribers < 1000", - Fc::Operator { + Fc::Condition { fid: rtok("NOT ", "subscribers"), - op: Operator::GreaterThanOrEqual(rtok("NOT subscribers < ", "1000")), + op: Condition::GreaterThanOrEqual(rtok("NOT subscribers < ", "1000")), }, ), ( "NOT subscribers > 1000", - Fc::Operator { + Fc::Condition { fid: rtok("NOT ", "subscribers"), - op: Operator::LowerThanOrEqual(rtok("NOT subscribers > ", "1000")), + op: Condition::LowerThanOrEqual(rtok("NOT subscribers > ", "1000")), }, ), ( "NOT subscribers <= 1000", - Fc::Operator { + Fc::Condition { fid: rtok("NOT ", "subscribers"), - op: Operator::GreaterThan(rtok("NOT subscribers <= ", "1000")), + op: Condition::GreaterThan(rtok("NOT subscribers <= ", "1000")), }, ), ( "NOT subscribers >= 1000", - Fc::Operator { + Fc::Condition { fid: rtok("NOT ", "subscribers"), - op: Operator::LowerThan(rtok("NOT subscribers >= ", "1000")), + op: Condition::LowerThan(rtok("NOT subscribers >= ", "1000")), }, ), ( "subscribers 100 TO 1000", - Fc::Operator { + Fc::Condition { fid: rtok("", "subscribers"), - op: Operator::Between { + op: Condition::Between { from: rtok("subscribers ", "100"), to: rtok("subscribers 100 TO ", "1000"), }, @@ -457,14 +348,14 @@ mod tests { ( "NOT subscribers 100 TO 1000", Fc::Or( - Fc::Operator { + Fc::Condition { fid: rtok("NOT ", "subscribers"), - op: Operator::LowerThan(rtok("NOT subscribers ", "100")), + op: Condition::LowerThan(rtok("NOT subscribers ", "100")), } .into(), - Fc::Operator { + Fc::Condition { fid: rtok("NOT ", "subscribers"), - op: Operator::GreaterThan(rtok("NOT subscribers 100 TO ", "1000")), + op: Condition::GreaterThan(rtok("NOT subscribers 100 TO ", "1000")), } .into(), ), @@ -487,14 +378,14 @@ mod tests { ( "channel = ponce AND 'dog race' != 'bernese mountain'", Fc::And( - Fc::Operator { + Fc::Condition { fid: rtok("", "channel"), - op: Operator::Equal(rtok("channel = ", "ponce")), + op: Condition::Equal(rtok("channel = ", "ponce")), } .into(), - Fc::Operator { + Fc::Condition { fid: rtok("channel = ponce AND '", "dog race"), - op: Operator::NotEqual(rtok( + op: Condition::NotEqual(rtok( "channel = ponce AND 'dog race' != '", "bernese mountain", )), @@ -505,14 +396,14 @@ mod tests { ( "channel = ponce OR 'dog race' != 'bernese mountain'", Fc::Or( - Fc::Operator { + Fc::Condition { fid: rtok("", "channel"), - op: Operator::Equal(rtok("channel = ", "ponce")), + op: Condition::Equal(rtok("channel = ", "ponce")), } .into(), - Fc::Operator { + Fc::Condition { fid: rtok("channel = ponce OR '", "dog race"), - op: Operator::NotEqual(rtok( + op: Condition::NotEqual(rtok( "channel = ponce OR 'dog race' != '", "bernese mountain", )), @@ -524,14 +415,14 @@ mod tests { "channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000", Fc::Or( Fc::And( - Fc::Operator { + Fc::Condition { fid: rtok("", "channel"), - op: Operator::Equal(rtok("channel = ", "ponce")), + op: Condition::Equal(rtok("channel = ", "ponce")), } .into(), - Fc::Operator { + Fc::Condition { fid: rtok("channel = ponce AND '", "dog race"), - op: Operator::NotEqual(rtok( + op: Condition::NotEqual(rtok( "channel = ponce AND 'dog race' != '", "bernese mountain", )), @@ -539,12 +430,12 @@ mod tests { .into(), ) .into(), - Fc::Operator { + Fc::Condition { fid: rtok( "channel = ponce AND 'dog race' != 'bernese mountain' OR ", "subscribers", ), - op: Operator::GreaterThan(rtok( + op: Condition::GreaterThan(rtok( "channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > ", "1000", )), @@ -556,10 +447,10 @@ mod tests { ( "channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > 1000 )", Fc::And( - Fc::Operator { fid: rtok("", "channel"), op: Operator::Equal(rtok("channel = ", "ponce")) }.into(), + Fc::Condition { fid: rtok("", "channel"), op: Condition::Equal(rtok("channel = ", "ponce")) }.into(), Fc::Or( - Fc::Operator { fid: rtok("channel = ponce AND ( '", "dog race"), op: Operator::NotEqual(rtok("channel = ponce AND ( 'dog race' != '", "bernese mountain"))}.into(), - Fc::Operator { fid: rtok("channel = ponce AND ( 'dog race' != 'bernese mountain' OR ", "subscribers"), op: Operator::GreaterThan(rtok("channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > ", "1000")) }.into(), + Fc::Condition { fid: rtok("channel = ponce AND ( '", "dog race"), op: Condition::NotEqual(rtok("channel = ponce AND ( 'dog race' != '", "bernese mountain"))}.into(), + Fc::Condition { fid: rtok("channel = ponce AND ( 'dog race' != 'bernese mountain' OR ", "subscribers"), op: Condition::GreaterThan(rtok("channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > ", "1000")) }.into(), ).into()), ), ( @@ -567,10 +458,10 @@ mod tests { Fc::And( Fc::Or( Fc::And( - Fc::Operator { fid: rtok("(", "channel"), op: Operator::Equal(rtok("(channel = ", "ponce")) }.into(), - Fc::Operator { fid: rtok("(channel = ponce AND '", "dog race"), op: Operator::NotEqual(rtok("(channel = ponce AND 'dog race' != '", "bernese mountain")) }.into(), + Fc::Condition { fid: rtok("(", "channel"), op: Condition::Equal(rtok("(channel = ", "ponce")) }.into(), + Fc::Condition { fid: rtok("(channel = ponce AND '", "dog race"), op: Condition::NotEqual(rtok("(channel = ponce AND 'dog race' != '", "bernese mountain")) }.into(), ).into(), - Fc::Operator { fid: rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR ", "subscribers"), op: Operator::GreaterThan(rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > ", "1000")) }.into(), + Fc::Condition { fid: rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR ", "subscribers"), op: Condition::GreaterThan(rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > ", "1000")) }.into(), ).into(), Fc::GeoLowerThan { point: [rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(", "12"), rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, ", "13")], radius: rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, ", "14") }.into() ) @@ -590,34 +481,4 @@ mod tests { assert_eq!(filter, expected, "Filter `{}` failed.", input); } } - - #[test] - fn name() { - use FilterCondition as Fc; - - // new_from_raw_offset is unsafe - let test_case = [ - // simple test - ( - "channel=Ponce", - Fc::Operator { - fid: rtok("", "channel"), - op: Operator::Equal(rtok("channel = ", "Ponce")), - }, - ), - ]; - - for (input, expected) in test_case { - let result = Fc::parse(input); - - assert!( - result.is_ok(), - "Filter `{:?}` was supposed to be parsed but failed with the following error: `{}`", - expected, - result.unwrap_err() - ); - let filter = result.unwrap().1; - assert_eq!(filter, expected, "Filter `{}` failed.", input); - } - } } diff --git a/filter_parser/src/value.rs b/filter_parser/src/value.rs new file mode 100644 index 000000000..c36becf7e --- /dev/null +++ b/filter_parser/src/value.rs @@ -0,0 +1,71 @@ +use nom::branch::alt; +use nom::bytes::complete::{take_till, take_while1}; +use nom::character::complete::char; +use nom::sequence::delimited; +use nom::IResult; + +use crate::{ws, Span, Token}; + +/// value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS* +pub fn parse_value(input: Span) -> IResult { + // singleQuoted = "'" .* all but quotes "'" + let simple_quoted_key = |input| take_till(|c: char| c == '\'')(input); + // doubleQuoted = "\"" (word | spaces)* "\"" + let quoted_key = |input| take_till(|c: char| c == '"')(input); + // word = (alphanumeric | _ | - | .)+ + let word = |input| take_while1(is_key_component)(input); + + alt(( + ws(delimited(char('\''), simple_quoted_key, char('\''))), + ws(delimited(char('"'), quoted_key, char('"'))), + ws(word), + ))(input) + .map(|(s, t)| (s, t.into())) +} + +fn is_key_component(c: char) -> bool { + c.is_alphanumeric() || ['_', '-', '.'].contains(&c) +} + +#[cfg(test)] +pub mod tests { + use super::*; + use crate::tests::rtok; + + #[test] + fn name() { + let test_case = [ + ("channel", rtok("", "channel")), + (".private", rtok("", ".private")), + ("I-love-kebab", rtok("", "I-love-kebab")), + ("but_snakes_are_also_good", rtok("", "but_snakes_are_also_good")), + ("parens(", rtok("", "parens")), + ("parens)", rtok("", "parens")), + ("not!", rtok("", "not")), + (" channel", rtok(" ", "channel")), + ("channel ", rtok("", "channel")), + ("'channel'", rtok("'", "channel")), + ("\"channel\"", rtok("\"", "channel")), + ("'cha)nnel'", rtok("'", "cha)nnel")), + ("'cha\"nnel'", rtok("'", "cha\"nnel")), + ("\"cha'nnel\"", rtok("\"", "cha'nnel")), + ("\" some spaces \"", rtok("\"", " some spaces ")), + ("\"cha'nnel\"", rtok("'", "cha'nnel")), + ("\"cha'nnel\"", rtok("'", "cha'nnel")), + ]; + + for (input, expected) in test_case { + let input = Span::new(input); + let result = parse_value(input); + + assert!( + result.is_ok(), + "Filter `{:?}` was supposed to be parsed but failed with the following error: `{}`", + expected, + result.unwrap_err() + ); + let value = result.unwrap().1; + assert_eq!(value, expected, "Filter `{}` failed.", input); + } + } +} diff --git a/milli/Cargo.toml b/milli/Cargo.toml index 594cc60e0..63fd0d984 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -40,6 +40,7 @@ uuid = { version = "0.8.2", features = ["v4"] } # facet filter parser nom = "7.0.0" +nom_locate = "4.0.0" # documents words self-join itertools = "0.10.0" diff --git a/milli/src/search/facet/filter_condition.rs b/milli/src/search/facet/filter_condition.rs index 4fedeee69..5c57adb88 100644 --- a/milli/src/search/facet/filter_condition.rs +++ b/milli/src/search/facet/filter_condition.rs @@ -16,20 +16,20 @@ use crate::heed_codec::facet::{ }; use crate::{distance_between_two_points, CboRoaringBitmapCodec, FieldId, Index, Result}; -#[derive(Debug, Clone, PartialEq)] -pub enum FilterCondition { - Operator(FieldId, Operator), +#[derive(Debug, Clone)] +pub enum FilterCondition<'a> { + Operator(FieldId, Operator<'a>), Or(Box, Box), And(Box, Box), Empty, } -impl FilterCondition { - pub fn from_array( +impl<'a> FilterCondition<'a> { + pub fn from_array( rtxn: &heed::RoTxn, index: &Index, array: I, - ) -> Result> + ) -> Result>> where I: IntoIterator>, J: IntoIterator, @@ -73,8 +73,8 @@ impl FilterCondition { pub fn from_str( rtxn: &heed::RoTxn, index: &Index, - expression: &str, - ) -> Result { + expression: &'a str, + ) -> Result> { let fields_ids_map = index.fields_ids_map(rtxn)?; let filterable_fields = index.filterable_fields(rtxn)?; let ctx = @@ -93,7 +93,7 @@ impl FilterCondition { } } } - pub fn negate(self) -> FilterCondition { + pub fn negate(self) -> FilterCondition<'a> { match self { Operator(fid, op) => match op.negate() { (op, None) => Operator(fid, op), @@ -106,7 +106,7 @@ impl FilterCondition { } } -impl FilterCondition { +impl<'a> FilterCondition<'a> { /// Aggregates the documents ids that are part of the specified range automatically /// going deeper through the levels. fn explore_facet_number_levels( @@ -221,7 +221,7 @@ impl FilterCondition { numbers_db: heed::Database, strings_db: heed::Database, field_id: FieldId, - operator: &Operator, + operator: &Operator<'a>, ) -> Result { // Make sure we always bound the ranges with the field id and the level, // as the facets values are all in the same database and prefixed by the diff --git a/milli/src/search/facet/filter_parser.rs b/milli/src/search/facet/filter_parser.rs index 47189841a..c25d523aa 100644 --- a/milli/src/search/facet/filter_parser.rs +++ b/milli/src/search/facet/filter_parser.rs @@ -28,25 +28,38 @@ use nom::multi::{many0, separated_list1}; use nom::number::complete::recognize_float; use nom::sequence::{delimited, preceded, tuple}; use nom::IResult; +use nom_locate::LocatedSpan; use self::Operator::*; use super::FilterCondition; use crate::{FieldId, FieldsIdsMap}; -#[derive(Debug, Clone, PartialEq)] -pub enum Operator { - GreaterThan(f64), - GreaterThanOrEqual(f64), - Equal(Option, String), - NotEqual(Option, String), - LowerThan(f64), - LowerThanOrEqual(f64), - Between(f64, f64), - GeoLowerThan([f64; 2], f64), - GeoGreaterThan([f64; 2], f64), +pub enum FilterError { + AttributeNotFilterable(String), } -impl Operator { +#[derive(Debug, Clone, PartialEq, Eq)] +struct Token<'a> { + pub position: Span<'a>, + pub inner: &'a str, +} + +type Span<'a> = LocatedSpan<&'a str>; + +#[derive(Debug, Clone)] +pub enum Operator<'a> { + GreaterThan(Token<'a>), + GreaterThanOrEqual(Token<'a>), + Equal(Option>, Token<'a>), + NotEqual(Option>, Token<'a>), + LowerThan(Token<'a>), + LowerThanOrEqual(Token<'a>), + Between(Token<'a>, Token<'a>), + GeoLowerThan([Token<'a>; 2], Token<'a>), + GeoGreaterThan([Token<'a>; 2], Token<'a>), +} + +impl<'a> Operator<'a> { /// This method can return two operations in case it must express /// an OR operation for the between case (i.e. `TO`). pub fn negate(self) -> (Self, Option) { @@ -180,16 +193,13 @@ impl<'a> ParseContext<'a> { where E: FilterParserError<'a>, { - let error = match input.chars().nth(0) { - Some(ch) => Err(nom::Err::Failure(E::from_char(input, ch))), - None => Err(nom::Err::Failure(E::from_error_kind(input, ErrorKind::Eof))), - }; - if !self.filterable_fields.contains(key) { - return error; - } match self.fields_ids_map.id(key) { - Some(fid) => Ok(fid), - None => error, + Some(fid) if self.filterable_fields.contains(key) => Ok(fid), + _ => Err(nom::Err::Failure(E::add_context( + input, + "Attribute is not filterable", + E::from_char(input, 'T'), + ))), } } diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 9b76ca851..8cd7f1a34 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -34,7 +34,8 @@ mod query_tree; pub struct Search<'a> { query: Option, - filter: Option, + // this should be linked to the String in the query + filter: Option>, offset: usize, limit: usize, sort_criteria: Option>,