update some names and move some parser out of the lib.rs

This commit is contained in:
Tamo 2021-10-22 01:59:38 +02:00
parent 7e5c5c4d27
commit 01dedde1c9
No known key found for this signature in database
GPG Key ID: 20CD8020AFA88D69
8 changed files with 318 additions and 285 deletions

View File

@ -8,8 +8,3 @@ edition = "2021"
[dependencies] [dependencies]
nom = "7.0.0" nom = "7.0.0"
nom_locate = "4.0.0" nom_locate = "4.0.0"
[dev-dependencies]
big_s = "1.0.2"
maplit = "1.0.2"
rand = "0.8.3"

View File

@ -0,0 +1,94 @@
//! BNF grammar:
//!
//! ```text
//! condition = value ("==" | ">" ...) value
//! to = value value TO value
//! value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS*
//! singleQuoted = "'" .* all but quotes "'"
//! doubleQuoted = "\"" (word | spaces)* "\""
//! word = (alphanumeric | _ | - | .)+
//! geoRadius = WS* ~ "_geoRadius(float ~ "," ~ float ~ "," float)
//! ```
use nom::branch::alt;
use nom::bytes::complete::tag;
use nom::sequence::tuple;
use nom::IResult;
use Condition::*;
use crate::{parse_value, ws, FilterCondition, Span, Token};
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Condition<'a> {
GreaterThan(Token<'a>),
GreaterThanOrEqual(Token<'a>),
Equal(Token<'a>),
NotEqual(Token<'a>),
LowerThan(Token<'a>),
LowerThanOrEqual(Token<'a>),
Between { from: Token<'a>, to: Token<'a> },
}
impl<'a> Condition<'a> {
/// This method can return two operations in case it must express
/// an OR operation for the between case (i.e. `TO`).
pub fn negate(self) -> (Self, Option<Self>) {
match self {
GreaterThan(n) => (LowerThanOrEqual(n), None),
GreaterThanOrEqual(n) => (LowerThan(n), None),
Equal(s) => (NotEqual(s), None),
NotEqual(s) => (Equal(s), None),
LowerThan(n) => (GreaterThanOrEqual(n), None),
LowerThanOrEqual(n) => (GreaterThan(n), None),
Between { from, to } => (LowerThan(from), Some(GreaterThan(to))),
}
}
}
/// condition = value ("==" | ">" ...) value
pub fn parse_condition(input: Span) -> IResult<Span, FilterCondition> {
let operator = alt((tag("<="), tag(">="), tag("!="), tag("<"), tag(">"), tag("=")));
let (input, (key, op, value)) =
tuple((|c| parse_value(c), operator, |c| parse_value(c)))(input)?;
let fid = key;
// TODO
match *op.fragment() {
"=" => {
let k = FilterCondition::Condition { fid, op: Equal(value) };
Ok((input, k))
}
"!=" => {
let k = FilterCondition::Condition { fid, op: NotEqual(value) };
Ok((input, k))
}
">" | "<" | "<=" | ">=" => {
let k = match *op.fragment() {
">" => FilterCondition::Condition { fid, op: GreaterThan(value) },
"<" => FilterCondition::Condition { fid, op: LowerThan(value) },
"<=" => FilterCondition::Condition { fid, op: LowerThanOrEqual(value) },
">=" => FilterCondition::Condition { fid, op: GreaterThanOrEqual(value) },
_ => unreachable!(),
};
Ok((input, k))
}
_ => unreachable!(),
}
}
/// to = value value TO value
pub fn parse_to(input: Span) -> IResult<Span, FilterCondition> {
let (input, (key, from, _, to)) =
tuple((ws(|c| parse_value(c)), ws(|c| parse_value(c)), tag("TO"), ws(|c| parse_value(c))))(
input,
)?;
Ok((
input,
FilterCondition::Condition {
fid: key.into(),
op: Between { from: from.into(), to: to.into() },
},
))
}

View File

@ -1,4 +1,3 @@
#![allow(unused_imports)]
//! BNF grammar: //! BNF grammar:
//! //!
//! ```text //! ```text
@ -7,8 +6,8 @@
//! and = not (~ "AND" not)* //! and = not (~ "AND" not)*
//! not = ("NOT" | "!") not | primary //! not = ("NOT" | "!") not | primary
//! primary = (WS* ~ "(" expression ")" ~ WS*) | condition | to | geoRadius //! primary = (WS* ~ "(" expression ")" ~ WS*) | condition | to | geoRadius
//! to = value value TO value
//! condition = value ("==" | ">" ...) value //! condition = value ("==" | ">" ...) value
//! to = value value TO value
//! value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS* //! value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS*
//! singleQuoted = "'" .* all but quotes "'" //! singleQuoted = "'" .* all but quotes "'"
//! doubleQuoted = "\"" (word | spaces)* "\"" //! doubleQuoted = "\"" (word | spaces)* "\""
@ -16,61 +15,24 @@
//! geoRadius = WS* ~ "_geoRadius(float ~ "," ~ float ~ "," float) //! geoRadius = WS* ~ "_geoRadius(float ~ "," ~ float ~ "," float)
//! ``` //! ```
#[derive(Debug, Clone, PartialEq, Eq)] mod condition;
pub enum FilterCondition<'a> { mod value;
Operator { fid: Token<'a>, op: Operator<'a> },
Or(Box<Self>, Box<Self>),
And(Box<Self>, Box<Self>),
GeoLowerThan { point: [Token<'a>; 2], radius: Token<'a> },
GeoGreaterThan { point: [Token<'a>; 2], radius: Token<'a> },
Empty,
}
impl<'a> FilterCondition<'a> {
pub fn negate(self) -> FilterCondition<'a> {
use FilterCondition::*;
match self {
Operator { fid, op } => match op.negate() {
(op, None) => Operator { fid, op },
(a, Some(b)) => {
Or(Operator { fid: fid.clone(), op: a }.into(), Operator { fid, op: b }.into())
}
},
Or(a, b) => And(a.negate().into(), b.negate().into()),
And(a, b) => Or(a.negate().into(), b.negate().into()),
Empty => Empty,
GeoLowerThan { point, radius } => GeoGreaterThan { point, radius },
GeoGreaterThan { point, radius } => GeoLowerThan { point, radius },
}
}
pub fn parse(input: &'a str) -> IResult<Span, Self> {
let span = Span::new(input);
parse_expression(span)
}
}
use std::collections::HashSet;
use std::fmt::Debug; use std::fmt::Debug;
use std::result::Result as StdResult;
pub use condition::{parse_condition, parse_to, Condition};
use nom::branch::alt; use nom::branch::alt;
use nom::bytes::complete::{tag, take_till, take_while1}; use nom::bytes::complete::tag;
use nom::character::complete::{char, multispace0}; use nom::character::complete::{char, multispace0};
use nom::combinator::map; use nom::combinator::map;
use nom::error::{ContextError, ErrorKind, ParseError, VerboseError}; use nom::error::{ContextError, ParseError};
use nom::multi::{many0, separated_list1}; use nom::multi::{many0, separated_list1};
use nom::number::complete::recognize_float; use nom::number::complete::recognize_float;
use nom::sequence::{delimited, preceded, tuple}; use nom::sequence::{delimited, preceded};
use nom::IResult; use nom::IResult;
use nom_locate::LocatedSpan; use nom_locate::LocatedSpan;
pub(crate) use value::parse_value;
use self::Operator::*; type Span<'a> = LocatedSpan<&'a str>;
pub enum FilterError {
AttributeNotFilterable(String),
}
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
pub struct Token<'a> { pub struct Token<'a> {
@ -90,41 +52,48 @@ impl<'a> From<Span<'a>> for Token<'a> {
} }
} }
type Span<'a> = LocatedSpan<&'a str>;
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
pub enum Operator<'a> { pub enum FilterCondition<'a> {
GreaterThan(Token<'a>), Condition { fid: Token<'a>, op: Condition<'a> },
GreaterThanOrEqual(Token<'a>), Or(Box<Self>, Box<Self>),
Equal(Token<'a>), And(Box<Self>, Box<Self>),
NotEqual(Token<'a>), GeoLowerThan { point: [Token<'a>; 2], radius: Token<'a> },
LowerThan(Token<'a>), GeoGreaterThan { point: [Token<'a>; 2], radius: Token<'a> },
LowerThanOrEqual(Token<'a>), Empty,
Between { from: Token<'a>, to: Token<'a> },
} }
impl<'a> Operator<'a> { impl<'a> FilterCondition<'a> {
/// This method can return two operations in case it must express pub fn negate(self) -> FilterCondition<'a> {
/// an OR operation for the between case (i.e. `TO`). use FilterCondition::*;
pub fn negate(self) -> (Self, Option<Self>) {
match self { match self {
GreaterThan(n) => (LowerThanOrEqual(n), None), Condition { fid, op } => match op.negate() {
GreaterThanOrEqual(n) => (LowerThan(n), None), (op, None) => Condition { fid, op },
Equal(s) => (NotEqual(s), None), (a, Some(b)) => Or(
NotEqual(s) => (Equal(s), None), Condition { fid: fid.clone(), op: a }.into(),
LowerThan(n) => (GreaterThanOrEqual(n), None), Condition { fid, op: b }.into(),
LowerThanOrEqual(n) => (GreaterThan(n), None), ),
Between { from, to } => (LowerThan(from), Some(GreaterThan(to))), },
} Or(a, b) => And(a.negate().into(), b.negate().into()),
And(a, b) => Or(a.negate().into(), b.negate().into()),
Empty => Empty,
GeoLowerThan { point, radius } => GeoGreaterThan { point, radius },
GeoGreaterThan { point, radius } => GeoLowerThan { point, radius },
} }
} }
pub trait FilterParserError<'a>: pub fn parse(input: &'a str) -> IResult<Span, Self> {
nom::error::ParseError<&'a str> + ContextError<&'a str> + std::fmt::Debug let span = Span::new(input);
{ parse_expression(span)
}
} }
impl<'a> FilterParserError<'a> for VerboseError<&'a str> {} // remove OPTIONAL whitespaces before AND after the the provided parser
fn ws<'a, O>(
inner: impl FnMut(Span<'a>) -> IResult<Span, O>,
) -> impl FnMut(Span<'a>) -> IResult<Span, O> {
delimited(multispace0, inner, multispace0)
}
/// and = not (~ "AND" not)* /// and = not (~ "AND" not)*
fn parse_or(input: Span) -> IResult<Span, FilterCondition> { fn parse_or(input: Span) -> IResult<Span, FilterCondition> {
@ -153,60 +122,6 @@ fn parse_not(input: Span) -> IResult<Span, FilterCondition> {
}))(input) }))(input)
} }
fn ws<'a, O>(
inner: impl FnMut(Span<'a>) -> IResult<Span, O>,
) -> impl FnMut(Span<'a>) -> IResult<Span, O> {
delimited(multispace0, inner, multispace0)
}
/// condition = value ("==" | ">" ...) value
fn parse_condition(input: Span) -> IResult<Span, FilterCondition> {
let operator = alt((tag("<="), tag(">="), tag("!="), tag("<"), tag(">"), tag("=")));
let (input, (key, op, value)) =
tuple((|c| parse_value(c), operator, |c| parse_value(c)))(input)?;
let fid = key.into();
// TODO
match *op.fragment() {
"=" => {
let k = FilterCondition::Operator { fid, op: Equal(value.into()) };
Ok((input, k))
}
"!=" => {
let k = FilterCondition::Operator { fid, op: NotEqual(value.into()) };
Ok((input, k))
}
">" | "<" | "<=" | ">=" => {
let k = match *op.fragment() {
">" => FilterCondition::Operator { fid, op: GreaterThan(value.into()) },
"<" => FilterCondition::Operator { fid, op: LowerThan(value.into()) },
"<=" => FilterCondition::Operator { fid, op: LowerThanOrEqual(value.into()) },
">=" => FilterCondition::Operator { fid, op: GreaterThanOrEqual(value.into()) },
_ => unreachable!(),
};
Ok((input, k))
}
_ => unreachable!(),
}
}
/// to = value value TO value
fn parse_to(input: Span) -> IResult<Span, FilterCondition> {
let (input, (key, from, _, to)) =
tuple((ws(|c| parse_value(c)), ws(|c| parse_value(c)), tag("TO"), ws(|c| parse_value(c))))(
input,
)?;
Ok((
input,
FilterCondition::Operator {
fid: key.into(),
op: Between { from: from.into(), to: to.into() },
},
))
}
/// geoRadius = WS* ~ "_geoRadius(float ~ "," ~ float ~ "," float) /// geoRadius = WS* ~ "_geoRadius(float ~ "," ~ float ~ "," float)
fn parse_geo_radius(input: Span) -> IResult<Span, FilterCondition> { fn parse_geo_radius(input: Span) -> IResult<Span, FilterCondition> {
let err_msg_args_incomplete = "_geoRadius. The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`"; let err_msg_args_incomplete = "_geoRadius. The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`";
@ -262,40 +177,17 @@ fn parse_primary(input: Span) -> IResult<Span, FilterCondition> {
))(input) ))(input)
} }
/// value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS*
fn parse_value(input: Span) -> IResult<Span, Span> {
// singleQuoted = "'" .* all but quotes "'"
let simple_quoted_key = |input| take_till(|c: char| c == '\'')(input);
// doubleQuoted = "\"" (word | spaces)* "\""
let quoted_key = |input| take_till(|c: char| c == '"')(input);
// word = (alphanumeric | _ | - | .)+
let word = |input| take_while1(is_key_component)(input);
alt((
ws(delimited(char('\''), simple_quoted_key, char('\''))),
ws(delimited(char('"'), quoted_key, char('"'))),
ws(word),
))(input)
}
fn is_key_component(c: char) -> bool {
c.is_alphanumeric() || ['_', '-', '.'].contains(&c)
}
/// expression = or /// expression = or
pub fn parse_expression(input: Span) -> IResult<Span, FilterCondition> { pub fn parse_expression(input: Span) -> IResult<Span, FilterCondition> {
parse_or(input) parse_or(input)
} }
#[cfg(test)] #[cfg(test)]
mod tests { pub mod tests {
use big_s::S;
use maplit::hashset;
use super::*; use super::*;
/// Create a raw [Token]. You must specify the string that appear BEFORE your element followed by your element /// Create a raw [Token]. You must specify the string that appear BEFORE your element followed by your element
fn rtok<'a>(before: &'a str, value: &'a str) -> Token<'a> { pub fn rtok<'a>(before: &'a str, value: &'a str) -> Token<'a> {
// if the string is empty we still need to return 1 for the line number // if the string is empty we still need to return 1 for the line number
let lines = before.is_empty().then(|| 1).unwrap_or_else(|| before.lines().count()); let lines = before.is_empty().then(|| 1).unwrap_or_else(|| before.lines().count());
let offset = before.chars().count(); let offset = before.chars().count();
@ -306,149 +198,148 @@ mod tests {
fn parse() { fn parse() {
use FilterCondition as Fc; use FilterCondition as Fc;
// new_from_raw_offset is unsafe
let test_case = [ let test_case = [
// simple test // simple test
( (
"channel = Ponce", "channel = Ponce",
Fc::Operator { Fc::Condition {
fid: rtok("", "channel"), fid: rtok("", "channel"),
op: Operator::Equal(rtok("channel = ", "Ponce")), op: Condition::Equal(rtok("channel = ", "Ponce")),
}, },
), ),
( (
"subscribers = 12", "subscribers = 12",
Fc::Operator { Fc::Condition {
fid: rtok("", "subscribers"), fid: rtok("", "subscribers"),
op: Operator::Equal(rtok("subscribers = ", "12")), op: Condition::Equal(rtok("subscribers = ", "12")),
}, },
), ),
// test all the quotes and simple quotes // test all the quotes and simple quotes
( (
"channel = 'Mister Mv'", "channel = 'Mister Mv'",
Fc::Operator { Fc::Condition {
fid: rtok("", "channel"), fid: rtok("", "channel"),
op: Operator::Equal(rtok("channel = '", "Mister Mv")), op: Condition::Equal(rtok("channel = '", "Mister Mv")),
}, },
), ),
( (
"channel = \"Mister Mv\"", "channel = \"Mister Mv\"",
Fc::Operator { Fc::Condition {
fid: rtok("", "channel"), fid: rtok("", "channel"),
op: Operator::Equal(rtok("channel = \"", "Mister Mv")), op: Condition::Equal(rtok("channel = \"", "Mister Mv")),
}, },
), ),
( (
"'dog race' = Borzoi", "'dog race' = Borzoi",
Fc::Operator { Fc::Condition {
fid: rtok("'", "dog race"), fid: rtok("'", "dog race"),
op: Operator::Equal(rtok("'dog race' = ", "Borzoi")), op: Condition::Equal(rtok("'dog race' = ", "Borzoi")),
}, },
), ),
( (
"\"dog race\" = Chusky", "\"dog race\" = Chusky",
Fc::Operator { Fc::Condition {
fid: rtok("\"", "dog race"), fid: rtok("\"", "dog race"),
op: Operator::Equal(rtok("\"dog race\" = ", "Chusky")), op: Condition::Equal(rtok("\"dog race\" = ", "Chusky")),
}, },
), ),
( (
"\"dog race\" = \"Bernese Mountain\"", "\"dog race\" = \"Bernese Mountain\"",
Fc::Operator { Fc::Condition {
fid: rtok("\"", "dog race"), fid: rtok("\"", "dog race"),
op: Operator::Equal(rtok("\"dog race\" = \"", "Bernese Mountain")), op: Condition::Equal(rtok("\"dog race\" = \"", "Bernese Mountain")),
}, },
), ),
( (
"'dog race' = 'Bernese Mountain'", "'dog race' = 'Bernese Mountain'",
Fc::Operator { Fc::Condition {
fid: rtok("'", "dog race"), fid: rtok("'", "dog race"),
op: Operator::Equal(rtok("'dog race' = '", "Bernese Mountain")), op: Condition::Equal(rtok("'dog race' = '", "Bernese Mountain")),
}, },
), ),
( (
"\"dog race\" = 'Bernese Mountain'", "\"dog race\" = 'Bernese Mountain'",
Fc::Operator { Fc::Condition {
fid: rtok("\"", "dog race"), fid: rtok("\"", "dog race"),
op: Operator::Equal(rtok("\"dog race\" = \"", "Bernese Mountain")), op: Condition::Equal(rtok("\"dog race\" = \"", "Bernese Mountain")),
}, },
), ),
// test all the operators // test all the operators
( (
"channel != ponce", "channel != ponce",
Fc::Operator { Fc::Condition {
fid: rtok("", "channel"), fid: rtok("", "channel"),
op: Operator::NotEqual(rtok("channel != ", "ponce")), op: Condition::NotEqual(rtok("channel != ", "ponce")),
}, },
), ),
( (
"NOT channel = ponce", "NOT channel = ponce",
Fc::Operator { Fc::Condition {
fid: rtok("NOT ", "channel"), fid: rtok("NOT ", "channel"),
op: Operator::NotEqual(rtok("NOT channel = ", "ponce")), op: Condition::NotEqual(rtok("NOT channel = ", "ponce")),
}, },
), ),
( (
"subscribers < 1000", "subscribers < 1000",
Fc::Operator { Fc::Condition {
fid: rtok("", "subscribers"), fid: rtok("", "subscribers"),
op: Operator::LowerThan(rtok("subscribers < ", "1000")), op: Condition::LowerThan(rtok("subscribers < ", "1000")),
}, },
), ),
( (
"subscribers > 1000", "subscribers > 1000",
Fc::Operator { Fc::Condition {
fid: rtok("", "subscribers"), fid: rtok("", "subscribers"),
op: Operator::GreaterThan(rtok("subscribers > ", "1000")), op: Condition::GreaterThan(rtok("subscribers > ", "1000")),
}, },
), ),
( (
"subscribers <= 1000", "subscribers <= 1000",
Fc::Operator { Fc::Condition {
fid: rtok("", "subscribers"), fid: rtok("", "subscribers"),
op: Operator::LowerThanOrEqual(rtok("subscribers <= ", "1000")), op: Condition::LowerThanOrEqual(rtok("subscribers <= ", "1000")),
}, },
), ),
( (
"subscribers >= 1000", "subscribers >= 1000",
Fc::Operator { Fc::Condition {
fid: rtok("", "subscribers"), fid: rtok("", "subscribers"),
op: Operator::GreaterThanOrEqual(rtok("subscribers >= ", "1000")), op: Condition::GreaterThanOrEqual(rtok("subscribers >= ", "1000")),
}, },
), ),
( (
"NOT subscribers < 1000", "NOT subscribers < 1000",
Fc::Operator { Fc::Condition {
fid: rtok("NOT ", "subscribers"), fid: rtok("NOT ", "subscribers"),
op: Operator::GreaterThanOrEqual(rtok("NOT subscribers < ", "1000")), op: Condition::GreaterThanOrEqual(rtok("NOT subscribers < ", "1000")),
}, },
), ),
( (
"NOT subscribers > 1000", "NOT subscribers > 1000",
Fc::Operator { Fc::Condition {
fid: rtok("NOT ", "subscribers"), fid: rtok("NOT ", "subscribers"),
op: Operator::LowerThanOrEqual(rtok("NOT subscribers > ", "1000")), op: Condition::LowerThanOrEqual(rtok("NOT subscribers > ", "1000")),
}, },
), ),
( (
"NOT subscribers <= 1000", "NOT subscribers <= 1000",
Fc::Operator { Fc::Condition {
fid: rtok("NOT ", "subscribers"), fid: rtok("NOT ", "subscribers"),
op: Operator::GreaterThan(rtok("NOT subscribers <= ", "1000")), op: Condition::GreaterThan(rtok("NOT subscribers <= ", "1000")),
}, },
), ),
( (
"NOT subscribers >= 1000", "NOT subscribers >= 1000",
Fc::Operator { Fc::Condition {
fid: rtok("NOT ", "subscribers"), fid: rtok("NOT ", "subscribers"),
op: Operator::LowerThan(rtok("NOT subscribers >= ", "1000")), op: Condition::LowerThan(rtok("NOT subscribers >= ", "1000")),
}, },
), ),
( (
"subscribers 100 TO 1000", "subscribers 100 TO 1000",
Fc::Operator { Fc::Condition {
fid: rtok("", "subscribers"), fid: rtok("", "subscribers"),
op: Operator::Between { op: Condition::Between {
from: rtok("subscribers ", "100"), from: rtok("subscribers ", "100"),
to: rtok("subscribers 100 TO ", "1000"), to: rtok("subscribers 100 TO ", "1000"),
}, },
@ -457,14 +348,14 @@ mod tests {
( (
"NOT subscribers 100 TO 1000", "NOT subscribers 100 TO 1000",
Fc::Or( Fc::Or(
Fc::Operator { Fc::Condition {
fid: rtok("NOT ", "subscribers"), fid: rtok("NOT ", "subscribers"),
op: Operator::LowerThan(rtok("NOT subscribers ", "100")), op: Condition::LowerThan(rtok("NOT subscribers ", "100")),
} }
.into(), .into(),
Fc::Operator { Fc::Condition {
fid: rtok("NOT ", "subscribers"), fid: rtok("NOT ", "subscribers"),
op: Operator::GreaterThan(rtok("NOT subscribers 100 TO ", "1000")), op: Condition::GreaterThan(rtok("NOT subscribers 100 TO ", "1000")),
} }
.into(), .into(),
), ),
@ -487,14 +378,14 @@ mod tests {
( (
"channel = ponce AND 'dog race' != 'bernese mountain'", "channel = ponce AND 'dog race' != 'bernese mountain'",
Fc::And( Fc::And(
Fc::Operator { Fc::Condition {
fid: rtok("", "channel"), fid: rtok("", "channel"),
op: Operator::Equal(rtok("channel = ", "ponce")), op: Condition::Equal(rtok("channel = ", "ponce")),
} }
.into(), .into(),
Fc::Operator { Fc::Condition {
fid: rtok("channel = ponce AND '", "dog race"), fid: rtok("channel = ponce AND '", "dog race"),
op: Operator::NotEqual(rtok( op: Condition::NotEqual(rtok(
"channel = ponce AND 'dog race' != '", "channel = ponce AND 'dog race' != '",
"bernese mountain", "bernese mountain",
)), )),
@ -505,14 +396,14 @@ mod tests {
( (
"channel = ponce OR 'dog race' != 'bernese mountain'", "channel = ponce OR 'dog race' != 'bernese mountain'",
Fc::Or( Fc::Or(
Fc::Operator { Fc::Condition {
fid: rtok("", "channel"), fid: rtok("", "channel"),
op: Operator::Equal(rtok("channel = ", "ponce")), op: Condition::Equal(rtok("channel = ", "ponce")),
} }
.into(), .into(),
Fc::Operator { Fc::Condition {
fid: rtok("channel = ponce OR '", "dog race"), fid: rtok("channel = ponce OR '", "dog race"),
op: Operator::NotEqual(rtok( op: Condition::NotEqual(rtok(
"channel = ponce OR 'dog race' != '", "channel = ponce OR 'dog race' != '",
"bernese mountain", "bernese mountain",
)), )),
@ -524,14 +415,14 @@ mod tests {
"channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000", "channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000",
Fc::Or( Fc::Or(
Fc::And( Fc::And(
Fc::Operator { Fc::Condition {
fid: rtok("", "channel"), fid: rtok("", "channel"),
op: Operator::Equal(rtok("channel = ", "ponce")), op: Condition::Equal(rtok("channel = ", "ponce")),
} }
.into(), .into(),
Fc::Operator { Fc::Condition {
fid: rtok("channel = ponce AND '", "dog race"), fid: rtok("channel = ponce AND '", "dog race"),
op: Operator::NotEqual(rtok( op: Condition::NotEqual(rtok(
"channel = ponce AND 'dog race' != '", "channel = ponce AND 'dog race' != '",
"bernese mountain", "bernese mountain",
)), )),
@ -539,12 +430,12 @@ mod tests {
.into(), .into(),
) )
.into(), .into(),
Fc::Operator { Fc::Condition {
fid: rtok( fid: rtok(
"channel = ponce AND 'dog race' != 'bernese mountain' OR ", "channel = ponce AND 'dog race' != 'bernese mountain' OR ",
"subscribers", "subscribers",
), ),
op: Operator::GreaterThan(rtok( op: Condition::GreaterThan(rtok(
"channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > ", "channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > ",
"1000", "1000",
)), )),
@ -556,10 +447,10 @@ mod tests {
( (
"channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > 1000 )", "channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > 1000 )",
Fc::And( Fc::And(
Fc::Operator { fid: rtok("", "channel"), op: Operator::Equal(rtok("channel = ", "ponce")) }.into(), Fc::Condition { fid: rtok("", "channel"), op: Condition::Equal(rtok("channel = ", "ponce")) }.into(),
Fc::Or( Fc::Or(
Fc::Operator { fid: rtok("channel = ponce AND ( '", "dog race"), op: Operator::NotEqual(rtok("channel = ponce AND ( 'dog race' != '", "bernese mountain"))}.into(), Fc::Condition { fid: rtok("channel = ponce AND ( '", "dog race"), op: Condition::NotEqual(rtok("channel = ponce AND ( 'dog race' != '", "bernese mountain"))}.into(),
Fc::Operator { fid: rtok("channel = ponce AND ( 'dog race' != 'bernese mountain' OR ", "subscribers"), op: Operator::GreaterThan(rtok("channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > ", "1000")) }.into(), Fc::Condition { fid: rtok("channel = ponce AND ( 'dog race' != 'bernese mountain' OR ", "subscribers"), op: Condition::GreaterThan(rtok("channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > ", "1000")) }.into(),
).into()), ).into()),
), ),
( (
@ -567,10 +458,10 @@ mod tests {
Fc::And( Fc::And(
Fc::Or( Fc::Or(
Fc::And( Fc::And(
Fc::Operator { fid: rtok("(", "channel"), op: Operator::Equal(rtok("(channel = ", "ponce")) }.into(), Fc::Condition { fid: rtok("(", "channel"), op: Condition::Equal(rtok("(channel = ", "ponce")) }.into(),
Fc::Operator { fid: rtok("(channel = ponce AND '", "dog race"), op: Operator::NotEqual(rtok("(channel = ponce AND 'dog race' != '", "bernese mountain")) }.into(), Fc::Condition { fid: rtok("(channel = ponce AND '", "dog race"), op: Condition::NotEqual(rtok("(channel = ponce AND 'dog race' != '", "bernese mountain")) }.into(),
).into(), ).into(),
Fc::Operator { fid: rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR ", "subscribers"), op: Operator::GreaterThan(rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > ", "1000")) }.into(), Fc::Condition { fid: rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR ", "subscribers"), op: Condition::GreaterThan(rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > ", "1000")) }.into(),
).into(), ).into(),
Fc::GeoLowerThan { point: [rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(", "12"), rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, ", "13")], radius: rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, ", "14") }.into() Fc::GeoLowerThan { point: [rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(", "12"), rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, ", "13")], radius: rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, ", "14") }.into()
) )
@ -590,34 +481,4 @@ mod tests {
assert_eq!(filter, expected, "Filter `{}` failed.", input); assert_eq!(filter, expected, "Filter `{}` failed.", input);
} }
} }
#[test]
fn name() {
use FilterCondition as Fc;
// new_from_raw_offset is unsafe
let test_case = [
// simple test
(
"channel=Ponce",
Fc::Operator {
fid: rtok("", "channel"),
op: Operator::Equal(rtok("channel = ", "Ponce")),
},
),
];
for (input, expected) in test_case {
let result = Fc::parse(input);
assert!(
result.is_ok(),
"Filter `{:?}` was supposed to be parsed but failed with the following error: `{}`",
expected,
result.unwrap_err()
);
let filter = result.unwrap().1;
assert_eq!(filter, expected, "Filter `{}` failed.", input);
}
}
} }

View File

@ -0,0 +1,71 @@
use nom::branch::alt;
use nom::bytes::complete::{take_till, take_while1};
use nom::character::complete::char;
use nom::sequence::delimited;
use nom::IResult;
use crate::{ws, Span, Token};
/// value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS*
pub fn parse_value(input: Span) -> IResult<Span, Token> {
// singleQuoted = "'" .* all but quotes "'"
let simple_quoted_key = |input| take_till(|c: char| c == '\'')(input);
// doubleQuoted = "\"" (word | spaces)* "\""
let quoted_key = |input| take_till(|c: char| c == '"')(input);
// word = (alphanumeric | _ | - | .)+
let word = |input| take_while1(is_key_component)(input);
alt((
ws(delimited(char('\''), simple_quoted_key, char('\''))),
ws(delimited(char('"'), quoted_key, char('"'))),
ws(word),
))(input)
.map(|(s, t)| (s, t.into()))
}
fn is_key_component(c: char) -> bool {
c.is_alphanumeric() || ['_', '-', '.'].contains(&c)
}
#[cfg(test)]
pub mod tests {
use super::*;
use crate::tests::rtok;
#[test]
fn name() {
let test_case = [
("channel", rtok("", "channel")),
(".private", rtok("", ".private")),
("I-love-kebab", rtok("", "I-love-kebab")),
("but_snakes_are_also_good", rtok("", "but_snakes_are_also_good")),
("parens(", rtok("", "parens")),
("parens)", rtok("", "parens")),
("not!", rtok("", "not")),
(" channel", rtok(" ", "channel")),
("channel ", rtok("", "channel")),
("'channel'", rtok("'", "channel")),
("\"channel\"", rtok("\"", "channel")),
("'cha)nnel'", rtok("'", "cha)nnel")),
("'cha\"nnel'", rtok("'", "cha\"nnel")),
("\"cha'nnel\"", rtok("\"", "cha'nnel")),
("\" some spaces \"", rtok("\"", " some spaces ")),
("\"cha'nnel\"", rtok("'", "cha'nnel")),
("\"cha'nnel\"", rtok("'", "cha'nnel")),
];
for (input, expected) in test_case {
let input = Span::new(input);
let result = parse_value(input);
assert!(
result.is_ok(),
"Filter `{:?}` was supposed to be parsed but failed with the following error: `{}`",
expected,
result.unwrap_err()
);
let value = result.unwrap().1;
assert_eq!(value, expected, "Filter `{}` failed.", input);
}
}
}

View File

@ -40,6 +40,7 @@ uuid = { version = "0.8.2", features = ["v4"] }
# facet filter parser # facet filter parser
nom = "7.0.0" nom = "7.0.0"
nom_locate = "4.0.0"
# documents words self-join # documents words self-join
itertools = "0.10.0" itertools = "0.10.0"

View File

@ -16,20 +16,20 @@ use crate::heed_codec::facet::{
}; };
use crate::{distance_between_two_points, CboRoaringBitmapCodec, FieldId, Index, Result}; use crate::{distance_between_two_points, CboRoaringBitmapCodec, FieldId, Index, Result};
#[derive(Debug, Clone, PartialEq)] #[derive(Debug, Clone)]
pub enum FilterCondition { pub enum FilterCondition<'a> {
Operator(FieldId, Operator), Operator(FieldId, Operator<'a>),
Or(Box<Self>, Box<Self>), Or(Box<Self>, Box<Self>),
And(Box<Self>, Box<Self>), And(Box<Self>, Box<Self>),
Empty, Empty,
} }
impl FilterCondition { impl<'a> FilterCondition<'a> {
pub fn from_array<I, J, A, B>( pub fn from_array<I, J, A: 'a, B: 'a>(
rtxn: &heed::RoTxn, rtxn: &heed::RoTxn,
index: &Index, index: &Index,
array: I, array: I,
) -> Result<Option<FilterCondition>> ) -> Result<Option<FilterCondition<'a>>>
where where
I: IntoIterator<Item = Either<J, B>>, I: IntoIterator<Item = Either<J, B>>,
J: IntoIterator<Item = A>, J: IntoIterator<Item = A>,
@ -73,8 +73,8 @@ impl FilterCondition {
pub fn from_str( pub fn from_str(
rtxn: &heed::RoTxn, rtxn: &heed::RoTxn,
index: &Index, index: &Index,
expression: &str, expression: &'a str,
) -> Result<FilterCondition> { ) -> Result<FilterCondition<'a>> {
let fields_ids_map = index.fields_ids_map(rtxn)?; let fields_ids_map = index.fields_ids_map(rtxn)?;
let filterable_fields = index.filterable_fields(rtxn)?; let filterable_fields = index.filterable_fields(rtxn)?;
let ctx = let ctx =
@ -93,7 +93,7 @@ impl FilterCondition {
} }
} }
} }
pub fn negate(self) -> FilterCondition { pub fn negate(self) -> FilterCondition<'a> {
match self { match self {
Operator(fid, op) => match op.negate() { Operator(fid, op) => match op.negate() {
(op, None) => Operator(fid, op), (op, None) => Operator(fid, op),
@ -106,7 +106,7 @@ impl FilterCondition {
} }
} }
impl FilterCondition { impl<'a> FilterCondition<'a> {
/// Aggregates the documents ids that are part of the specified range automatically /// Aggregates the documents ids that are part of the specified range automatically
/// going deeper through the levels. /// going deeper through the levels.
fn explore_facet_number_levels( fn explore_facet_number_levels(
@ -221,7 +221,7 @@ impl FilterCondition {
numbers_db: heed::Database<FacetLevelValueF64Codec, CboRoaringBitmapCodec>, numbers_db: heed::Database<FacetLevelValueF64Codec, CboRoaringBitmapCodec>,
strings_db: heed::Database<FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec>, strings_db: heed::Database<FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec>,
field_id: FieldId, field_id: FieldId,
operator: &Operator, operator: &Operator<'a>,
) -> Result<RoaringBitmap> { ) -> Result<RoaringBitmap> {
// Make sure we always bound the ranges with the field id and the level, // Make sure we always bound the ranges with the field id and the level,
// as the facets values are all in the same database and prefixed by the // as the facets values are all in the same database and prefixed by the

View File

@ -28,25 +28,38 @@ use nom::multi::{many0, separated_list1};
use nom::number::complete::recognize_float; use nom::number::complete::recognize_float;
use nom::sequence::{delimited, preceded, tuple}; use nom::sequence::{delimited, preceded, tuple};
use nom::IResult; use nom::IResult;
use nom_locate::LocatedSpan;
use self::Operator::*; use self::Operator::*;
use super::FilterCondition; use super::FilterCondition;
use crate::{FieldId, FieldsIdsMap}; use crate::{FieldId, FieldsIdsMap};
#[derive(Debug, Clone, PartialEq)] pub enum FilterError {
pub enum Operator { AttributeNotFilterable(String),
GreaterThan(f64),
GreaterThanOrEqual(f64),
Equal(Option<f64>, String),
NotEqual(Option<f64>, String),
LowerThan(f64),
LowerThanOrEqual(f64),
Between(f64, f64),
GeoLowerThan([f64; 2], f64),
GeoGreaterThan([f64; 2], f64),
} }
impl Operator { #[derive(Debug, Clone, PartialEq, Eq)]
struct Token<'a> {
pub position: Span<'a>,
pub inner: &'a str,
}
type Span<'a> = LocatedSpan<&'a str>;
#[derive(Debug, Clone)]
pub enum Operator<'a> {
GreaterThan(Token<'a>),
GreaterThanOrEqual(Token<'a>),
Equal(Option<Token<'a>>, Token<'a>),
NotEqual(Option<Token<'a>>, Token<'a>),
LowerThan(Token<'a>),
LowerThanOrEqual(Token<'a>),
Between(Token<'a>, Token<'a>),
GeoLowerThan([Token<'a>; 2], Token<'a>),
GeoGreaterThan([Token<'a>; 2], Token<'a>),
}
impl<'a> Operator<'a> {
/// This method can return two operations in case it must express /// This method can return two operations in case it must express
/// an OR operation for the between case (i.e. `TO`). /// an OR operation for the between case (i.e. `TO`).
pub fn negate(self) -> (Self, Option<Self>) { pub fn negate(self) -> (Self, Option<Self>) {
@ -180,16 +193,13 @@ impl<'a> ParseContext<'a> {
where where
E: FilterParserError<'a>, E: FilterParserError<'a>,
{ {
let error = match input.chars().nth(0) {
Some(ch) => Err(nom::Err::Failure(E::from_char(input, ch))),
None => Err(nom::Err::Failure(E::from_error_kind(input, ErrorKind::Eof))),
};
if !self.filterable_fields.contains(key) {
return error;
}
match self.fields_ids_map.id(key) { match self.fields_ids_map.id(key) {
Some(fid) => Ok(fid), Some(fid) if self.filterable_fields.contains(key) => Ok(fid),
None => error, _ => Err(nom::Err::Failure(E::add_context(
input,
"Attribute is not filterable",
E::from_char(input, 'T'),
))),
} }
} }

View File

@ -34,7 +34,8 @@ mod query_tree;
pub struct Search<'a> { pub struct Search<'a> {
query: Option<String>, query: Option<String>,
filter: Option<FilterCondition>, // this should be linked to the String in the query
filter: Option<FilterCondition<'a>>,
offset: usize, offset: usize,
limit: usize, limit: usize,
sort_criteria: Option<Vec<AscDesc>>, sort_criteria: Option<Vec<AscDesc>>,