update some names and move some parser out of the lib.rs

This commit is contained in:
Tamo 2021-10-22 01:59:38 +02:00
parent 7e5c5c4d27
commit 01dedde1c9
No known key found for this signature in database
GPG Key ID: 20CD8020AFA88D69
8 changed files with 318 additions and 285 deletions

View File

@ -8,8 +8,3 @@ edition = "2021"
[dependencies]
nom = "7.0.0"
nom_locate = "4.0.0"
[dev-dependencies]
big_s = "1.0.2"
maplit = "1.0.2"
rand = "0.8.3"

View File

@ -0,0 +1,94 @@
//! BNF grammar:
//!
//! ```text
//! condition = value ("==" | ">" ...) value
//! to = value value TO value
//! value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS*
//! singleQuoted = "'" .* all but quotes "'"
//! doubleQuoted = "\"" (word | spaces)* "\""
//! word = (alphanumeric | _ | - | .)+
//! geoRadius = WS* ~ "_geoRadius(float ~ "," ~ float ~ "," float)
//! ```
use nom::branch::alt;
use nom::bytes::complete::tag;
use nom::sequence::tuple;
use nom::IResult;
use Condition::*;
use crate::{parse_value, ws, FilterCondition, Span, Token};
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Condition<'a> {
GreaterThan(Token<'a>),
GreaterThanOrEqual(Token<'a>),
Equal(Token<'a>),
NotEqual(Token<'a>),
LowerThan(Token<'a>),
LowerThanOrEqual(Token<'a>),
Between { from: Token<'a>, to: Token<'a> },
}
impl<'a> Condition<'a> {
/// This method can return two operations in case it must express
/// an OR operation for the between case (i.e. `TO`).
pub fn negate(self) -> (Self, Option<Self>) {
match self {
GreaterThan(n) => (LowerThanOrEqual(n), None),
GreaterThanOrEqual(n) => (LowerThan(n), None),
Equal(s) => (NotEqual(s), None),
NotEqual(s) => (Equal(s), None),
LowerThan(n) => (GreaterThanOrEqual(n), None),
LowerThanOrEqual(n) => (GreaterThan(n), None),
Between { from, to } => (LowerThan(from), Some(GreaterThan(to))),
}
}
}
/// condition = value ("==" | ">" ...) value
pub fn parse_condition(input: Span) -> IResult<Span, FilterCondition> {
let operator = alt((tag("<="), tag(">="), tag("!="), tag("<"), tag(">"), tag("=")));
let (input, (key, op, value)) =
tuple((|c| parse_value(c), operator, |c| parse_value(c)))(input)?;
let fid = key;
// TODO
match *op.fragment() {
"=" => {
let k = FilterCondition::Condition { fid, op: Equal(value) };
Ok((input, k))
}
"!=" => {
let k = FilterCondition::Condition { fid, op: NotEqual(value) };
Ok((input, k))
}
">" | "<" | "<=" | ">=" => {
let k = match *op.fragment() {
">" => FilterCondition::Condition { fid, op: GreaterThan(value) },
"<" => FilterCondition::Condition { fid, op: LowerThan(value) },
"<=" => FilterCondition::Condition { fid, op: LowerThanOrEqual(value) },
">=" => FilterCondition::Condition { fid, op: GreaterThanOrEqual(value) },
_ => unreachable!(),
};
Ok((input, k))
}
_ => unreachable!(),
}
}
/// to = value value TO value
pub fn parse_to(input: Span) -> IResult<Span, FilterCondition> {
let (input, (key, from, _, to)) =
tuple((ws(|c| parse_value(c)), ws(|c| parse_value(c)), tag("TO"), ws(|c| parse_value(c))))(
input,
)?;
Ok((
input,
FilterCondition::Condition {
fid: key.into(),
op: Between { from: from.into(), to: to.into() },
},
))
}

View File

@ -1,4 +1,3 @@
#![allow(unused_imports)]
//! BNF grammar:
//!
//! ```text
@ -7,8 +6,8 @@
//! and = not (~ "AND" not)*
//! not = ("NOT" | "!") not | primary
//! primary = (WS* ~ "(" expression ")" ~ WS*) | condition | to | geoRadius
//! to = value value TO value
//! condition = value ("==" | ">" ...) value
//! to = value value TO value
//! value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS*
//! singleQuoted = "'" .* all but quotes "'"
//! doubleQuoted = "\"" (word | spaces)* "\""
@ -16,61 +15,24 @@
//! geoRadius = WS* ~ "_geoRadius(float ~ "," ~ float ~ "," float)
//! ```
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum FilterCondition<'a> {
Operator { fid: Token<'a>, op: Operator<'a> },
Or(Box<Self>, Box<Self>),
And(Box<Self>, Box<Self>),
GeoLowerThan { point: [Token<'a>; 2], radius: Token<'a> },
GeoGreaterThan { point: [Token<'a>; 2], radius: Token<'a> },
Empty,
}
impl<'a> FilterCondition<'a> {
pub fn negate(self) -> FilterCondition<'a> {
use FilterCondition::*;
match self {
Operator { fid, op } => match op.negate() {
(op, None) => Operator { fid, op },
(a, Some(b)) => {
Or(Operator { fid: fid.clone(), op: a }.into(), Operator { fid, op: b }.into())
}
},
Or(a, b) => And(a.negate().into(), b.negate().into()),
And(a, b) => Or(a.negate().into(), b.negate().into()),
Empty => Empty,
GeoLowerThan { point, radius } => GeoGreaterThan { point, radius },
GeoGreaterThan { point, radius } => GeoLowerThan { point, radius },
}
}
pub fn parse(input: &'a str) -> IResult<Span, Self> {
let span = Span::new(input);
parse_expression(span)
}
}
use std::collections::HashSet;
mod condition;
mod value;
use std::fmt::Debug;
use std::result::Result as StdResult;
pub use condition::{parse_condition, parse_to, Condition};
use nom::branch::alt;
use nom::bytes::complete::{tag, take_till, take_while1};
use nom::bytes::complete::tag;
use nom::character::complete::{char, multispace0};
use nom::combinator::map;
use nom::error::{ContextError, ErrorKind, ParseError, VerboseError};
use nom::error::{ContextError, ParseError};
use nom::multi::{many0, separated_list1};
use nom::number::complete::recognize_float;
use nom::sequence::{delimited, preceded, tuple};
use nom::sequence::{delimited, preceded};
use nom::IResult;
use nom_locate::LocatedSpan;
pub(crate) use value::parse_value;
use self::Operator::*;
pub enum FilterError {
AttributeNotFilterable(String),
}
type Span<'a> = LocatedSpan<&'a str>;
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Token<'a> {
@ -90,41 +52,48 @@ impl<'a> From<Span<'a>> for Token<'a> {
}
}
type Span<'a> = LocatedSpan<&'a str>;
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Operator<'a> {
GreaterThan(Token<'a>),
GreaterThanOrEqual(Token<'a>),
Equal(Token<'a>),
NotEqual(Token<'a>),
LowerThan(Token<'a>),
LowerThanOrEqual(Token<'a>),
Between { from: Token<'a>, to: Token<'a> },
pub enum FilterCondition<'a> {
Condition { fid: Token<'a>, op: Condition<'a> },
Or(Box<Self>, Box<Self>),
And(Box<Self>, Box<Self>),
GeoLowerThan { point: [Token<'a>; 2], radius: Token<'a> },
GeoGreaterThan { point: [Token<'a>; 2], radius: Token<'a> },
Empty,
}
impl<'a> Operator<'a> {
/// This method can return two operations in case it must express
/// an OR operation for the between case (i.e. `TO`).
pub fn negate(self) -> (Self, Option<Self>) {
impl<'a> FilterCondition<'a> {
pub fn negate(self) -> FilterCondition<'a> {
use FilterCondition::*;
match self {
GreaterThan(n) => (LowerThanOrEqual(n), None),
GreaterThanOrEqual(n) => (LowerThan(n), None),
Equal(s) => (NotEqual(s), None),
NotEqual(s) => (Equal(s), None),
LowerThan(n) => (GreaterThanOrEqual(n), None),
LowerThanOrEqual(n) => (GreaterThan(n), None),
Between { from, to } => (LowerThan(from), Some(GreaterThan(to))),
}
Condition { fid, op } => match op.negate() {
(op, None) => Condition { fid, op },
(a, Some(b)) => Or(
Condition { fid: fid.clone(), op: a }.into(),
Condition { fid, op: b }.into(),
),
},
Or(a, b) => And(a.negate().into(), b.negate().into()),
And(a, b) => Or(a.negate().into(), b.negate().into()),
Empty => Empty,
GeoLowerThan { point, radius } => GeoGreaterThan { point, radius },
GeoGreaterThan { point, radius } => GeoLowerThan { point, radius },
}
}
pub trait FilterParserError<'a>:
nom::error::ParseError<&'a str> + ContextError<&'a str> + std::fmt::Debug
{
pub fn parse(input: &'a str) -> IResult<Span, Self> {
let span = Span::new(input);
parse_expression(span)
}
}
impl<'a> FilterParserError<'a> for VerboseError<&'a str> {}
// remove OPTIONAL whitespaces before AND after the the provided parser
fn ws<'a, O>(
inner: impl FnMut(Span<'a>) -> IResult<Span, O>,
) -> impl FnMut(Span<'a>) -> IResult<Span, O> {
delimited(multispace0, inner, multispace0)
}
/// and = not (~ "AND" not)*
fn parse_or(input: Span) -> IResult<Span, FilterCondition> {
@ -153,60 +122,6 @@ fn parse_not(input: Span) -> IResult<Span, FilterCondition> {
}))(input)
}
fn ws<'a, O>(
inner: impl FnMut(Span<'a>) -> IResult<Span, O>,
) -> impl FnMut(Span<'a>) -> IResult<Span, O> {
delimited(multispace0, inner, multispace0)
}
/// condition = value ("==" | ">" ...) value
fn parse_condition(input: Span) -> IResult<Span, FilterCondition> {
let operator = alt((tag("<="), tag(">="), tag("!="), tag("<"), tag(">"), tag("=")));
let (input, (key, op, value)) =
tuple((|c| parse_value(c), operator, |c| parse_value(c)))(input)?;
let fid = key.into();
// TODO
match *op.fragment() {
"=" => {
let k = FilterCondition::Operator { fid, op: Equal(value.into()) };
Ok((input, k))
}
"!=" => {
let k = FilterCondition::Operator { fid, op: NotEqual(value.into()) };
Ok((input, k))
}
">" | "<" | "<=" | ">=" => {
let k = match *op.fragment() {
">" => FilterCondition::Operator { fid, op: GreaterThan(value.into()) },
"<" => FilterCondition::Operator { fid, op: LowerThan(value.into()) },
"<=" => FilterCondition::Operator { fid, op: LowerThanOrEqual(value.into()) },
">=" => FilterCondition::Operator { fid, op: GreaterThanOrEqual(value.into()) },
_ => unreachable!(),
};
Ok((input, k))
}
_ => unreachable!(),
}
}
/// to = value value TO value
fn parse_to(input: Span) -> IResult<Span, FilterCondition> {
let (input, (key, from, _, to)) =
tuple((ws(|c| parse_value(c)), ws(|c| parse_value(c)), tag("TO"), ws(|c| parse_value(c))))(
input,
)?;
Ok((
input,
FilterCondition::Operator {
fid: key.into(),
op: Between { from: from.into(), to: to.into() },
},
))
}
/// geoRadius = WS* ~ "_geoRadius(float ~ "," ~ float ~ "," float)
fn parse_geo_radius(input: Span) -> IResult<Span, FilterCondition> {
let err_msg_args_incomplete = "_geoRadius. The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`";
@ -262,40 +177,17 @@ fn parse_primary(input: Span) -> IResult<Span, FilterCondition> {
))(input)
}
/// value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS*
fn parse_value(input: Span) -> IResult<Span, Span> {
// singleQuoted = "'" .* all but quotes "'"
let simple_quoted_key = |input| take_till(|c: char| c == '\'')(input);
// doubleQuoted = "\"" (word | spaces)* "\""
let quoted_key = |input| take_till(|c: char| c == '"')(input);
// word = (alphanumeric | _ | - | .)+
let word = |input| take_while1(is_key_component)(input);
alt((
ws(delimited(char('\''), simple_quoted_key, char('\''))),
ws(delimited(char('"'), quoted_key, char('"'))),
ws(word),
))(input)
}
fn is_key_component(c: char) -> bool {
c.is_alphanumeric() || ['_', '-', '.'].contains(&c)
}
/// expression = or
pub fn parse_expression(input: Span) -> IResult<Span, FilterCondition> {
parse_or(input)
}
#[cfg(test)]
mod tests {
use big_s::S;
use maplit::hashset;
pub mod tests {
use super::*;
/// Create a raw [Token]. You must specify the string that appear BEFORE your element followed by your element
fn rtok<'a>(before: &'a str, value: &'a str) -> Token<'a> {
pub fn rtok<'a>(before: &'a str, value: &'a str) -> Token<'a> {
// if the string is empty we still need to return 1 for the line number
let lines = before.is_empty().then(|| 1).unwrap_or_else(|| before.lines().count());
let offset = before.chars().count();
@ -306,149 +198,148 @@ mod tests {
fn parse() {
use FilterCondition as Fc;
// new_from_raw_offset is unsafe
let test_case = [
// simple test
(
"channel = Ponce",
Fc::Operator {
Fc::Condition {
fid: rtok("", "channel"),
op: Operator::Equal(rtok("channel = ", "Ponce")),
op: Condition::Equal(rtok("channel = ", "Ponce")),
},
),
(
"subscribers = 12",
Fc::Operator {
Fc::Condition {
fid: rtok("", "subscribers"),
op: Operator::Equal(rtok("subscribers = ", "12")),
op: Condition::Equal(rtok("subscribers = ", "12")),
},
),
// test all the quotes and simple quotes
(
"channel = 'Mister Mv'",
Fc::Operator {
Fc::Condition {
fid: rtok("", "channel"),
op: Operator::Equal(rtok("channel = '", "Mister Mv")),
op: Condition::Equal(rtok("channel = '", "Mister Mv")),
},
),
(
"channel = \"Mister Mv\"",
Fc::Operator {
Fc::Condition {
fid: rtok("", "channel"),
op: Operator::Equal(rtok("channel = \"", "Mister Mv")),
op: Condition::Equal(rtok("channel = \"", "Mister Mv")),
},
),
(
"'dog race' = Borzoi",
Fc::Operator {
Fc::Condition {
fid: rtok("'", "dog race"),
op: Operator::Equal(rtok("'dog race' = ", "Borzoi")),
op: Condition::Equal(rtok("'dog race' = ", "Borzoi")),
},
),
(
"\"dog race\" = Chusky",
Fc::Operator {
Fc::Condition {
fid: rtok("\"", "dog race"),
op: Operator::Equal(rtok("\"dog race\" = ", "Chusky")),
op: Condition::Equal(rtok("\"dog race\" = ", "Chusky")),
},
),
(
"\"dog race\" = \"Bernese Mountain\"",
Fc::Operator {
Fc::Condition {
fid: rtok("\"", "dog race"),
op: Operator::Equal(rtok("\"dog race\" = \"", "Bernese Mountain")),
op: Condition::Equal(rtok("\"dog race\" = \"", "Bernese Mountain")),
},
),
(
"'dog race' = 'Bernese Mountain'",
Fc::Operator {
Fc::Condition {
fid: rtok("'", "dog race"),
op: Operator::Equal(rtok("'dog race' = '", "Bernese Mountain")),
op: Condition::Equal(rtok("'dog race' = '", "Bernese Mountain")),
},
),
(
"\"dog race\" = 'Bernese Mountain'",
Fc::Operator {
Fc::Condition {
fid: rtok("\"", "dog race"),
op: Operator::Equal(rtok("\"dog race\" = \"", "Bernese Mountain")),
op: Condition::Equal(rtok("\"dog race\" = \"", "Bernese Mountain")),
},
),
// test all the operators
(
"channel != ponce",
Fc::Operator {
Fc::Condition {
fid: rtok("", "channel"),
op: Operator::NotEqual(rtok("channel != ", "ponce")),
op: Condition::NotEqual(rtok("channel != ", "ponce")),
},
),
(
"NOT channel = ponce",
Fc::Operator {
Fc::Condition {
fid: rtok("NOT ", "channel"),
op: Operator::NotEqual(rtok("NOT channel = ", "ponce")),
op: Condition::NotEqual(rtok("NOT channel = ", "ponce")),
},
),
(
"subscribers < 1000",
Fc::Operator {
Fc::Condition {
fid: rtok("", "subscribers"),
op: Operator::LowerThan(rtok("subscribers < ", "1000")),
op: Condition::LowerThan(rtok("subscribers < ", "1000")),
},
),
(
"subscribers > 1000",
Fc::Operator {
Fc::Condition {
fid: rtok("", "subscribers"),
op: Operator::GreaterThan(rtok("subscribers > ", "1000")),
op: Condition::GreaterThan(rtok("subscribers > ", "1000")),
},
),
(
"subscribers <= 1000",
Fc::Operator {
Fc::Condition {
fid: rtok("", "subscribers"),
op: Operator::LowerThanOrEqual(rtok("subscribers <= ", "1000")),
op: Condition::LowerThanOrEqual(rtok("subscribers <= ", "1000")),
},
),
(
"subscribers >= 1000",
Fc::Operator {
Fc::Condition {
fid: rtok("", "subscribers"),
op: Operator::GreaterThanOrEqual(rtok("subscribers >= ", "1000")),
op: Condition::GreaterThanOrEqual(rtok("subscribers >= ", "1000")),
},
),
(
"NOT subscribers < 1000",
Fc::Operator {
Fc::Condition {
fid: rtok("NOT ", "subscribers"),
op: Operator::GreaterThanOrEqual(rtok("NOT subscribers < ", "1000")),
op: Condition::GreaterThanOrEqual(rtok("NOT subscribers < ", "1000")),
},
),
(
"NOT subscribers > 1000",
Fc::Operator {
Fc::Condition {
fid: rtok("NOT ", "subscribers"),
op: Operator::LowerThanOrEqual(rtok("NOT subscribers > ", "1000")),
op: Condition::LowerThanOrEqual(rtok("NOT subscribers > ", "1000")),
},
),
(
"NOT subscribers <= 1000",
Fc::Operator {
Fc::Condition {
fid: rtok("NOT ", "subscribers"),
op: Operator::GreaterThan(rtok("NOT subscribers <= ", "1000")),
op: Condition::GreaterThan(rtok("NOT subscribers <= ", "1000")),
},
),
(
"NOT subscribers >= 1000",
Fc::Operator {
Fc::Condition {
fid: rtok("NOT ", "subscribers"),
op: Operator::LowerThan(rtok("NOT subscribers >= ", "1000")),
op: Condition::LowerThan(rtok("NOT subscribers >= ", "1000")),
},
),
(
"subscribers 100 TO 1000",
Fc::Operator {
Fc::Condition {
fid: rtok("", "subscribers"),
op: Operator::Between {
op: Condition::Between {
from: rtok("subscribers ", "100"),
to: rtok("subscribers 100 TO ", "1000"),
},
@ -457,14 +348,14 @@ mod tests {
(
"NOT subscribers 100 TO 1000",
Fc::Or(
Fc::Operator {
Fc::Condition {
fid: rtok("NOT ", "subscribers"),
op: Operator::LowerThan(rtok("NOT subscribers ", "100")),
op: Condition::LowerThan(rtok("NOT subscribers ", "100")),
}
.into(),
Fc::Operator {
Fc::Condition {
fid: rtok("NOT ", "subscribers"),
op: Operator::GreaterThan(rtok("NOT subscribers 100 TO ", "1000")),
op: Condition::GreaterThan(rtok("NOT subscribers 100 TO ", "1000")),
}
.into(),
),
@ -487,14 +378,14 @@ mod tests {
(
"channel = ponce AND 'dog race' != 'bernese mountain'",
Fc::And(
Fc::Operator {
Fc::Condition {
fid: rtok("", "channel"),
op: Operator::Equal(rtok("channel = ", "ponce")),
op: Condition::Equal(rtok("channel = ", "ponce")),
}
.into(),
Fc::Operator {
Fc::Condition {
fid: rtok("channel = ponce AND '", "dog race"),
op: Operator::NotEqual(rtok(
op: Condition::NotEqual(rtok(
"channel = ponce AND 'dog race' != '",
"bernese mountain",
)),
@ -505,14 +396,14 @@ mod tests {
(
"channel = ponce OR 'dog race' != 'bernese mountain'",
Fc::Or(
Fc::Operator {
Fc::Condition {
fid: rtok("", "channel"),
op: Operator::Equal(rtok("channel = ", "ponce")),
op: Condition::Equal(rtok("channel = ", "ponce")),
}
.into(),
Fc::Operator {
Fc::Condition {
fid: rtok("channel = ponce OR '", "dog race"),
op: Operator::NotEqual(rtok(
op: Condition::NotEqual(rtok(
"channel = ponce OR 'dog race' != '",
"bernese mountain",
)),
@ -524,14 +415,14 @@ mod tests {
"channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000",
Fc::Or(
Fc::And(
Fc::Operator {
Fc::Condition {
fid: rtok("", "channel"),
op: Operator::Equal(rtok("channel = ", "ponce")),
op: Condition::Equal(rtok("channel = ", "ponce")),
}
.into(),
Fc::Operator {
Fc::Condition {
fid: rtok("channel = ponce AND '", "dog race"),
op: Operator::NotEqual(rtok(
op: Condition::NotEqual(rtok(
"channel = ponce AND 'dog race' != '",
"bernese mountain",
)),
@ -539,12 +430,12 @@ mod tests {
.into(),
)
.into(),
Fc::Operator {
Fc::Condition {
fid: rtok(
"channel = ponce AND 'dog race' != 'bernese mountain' OR ",
"subscribers",
),
op: Operator::GreaterThan(rtok(
op: Condition::GreaterThan(rtok(
"channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > ",
"1000",
)),
@ -556,10 +447,10 @@ mod tests {
(
"channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > 1000 )",
Fc::And(
Fc::Operator { fid: rtok("", "channel"), op: Operator::Equal(rtok("channel = ", "ponce")) }.into(),
Fc::Condition { fid: rtok("", "channel"), op: Condition::Equal(rtok("channel = ", "ponce")) }.into(),
Fc::Or(
Fc::Operator { fid: rtok("channel = ponce AND ( '", "dog race"), op: Operator::NotEqual(rtok("channel = ponce AND ( 'dog race' != '", "bernese mountain"))}.into(),
Fc::Operator { fid: rtok("channel = ponce AND ( 'dog race' != 'bernese mountain' OR ", "subscribers"), op: Operator::GreaterThan(rtok("channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > ", "1000")) }.into(),
Fc::Condition { fid: rtok("channel = ponce AND ( '", "dog race"), op: Condition::NotEqual(rtok("channel = ponce AND ( 'dog race' != '", "bernese mountain"))}.into(),
Fc::Condition { fid: rtok("channel = ponce AND ( 'dog race' != 'bernese mountain' OR ", "subscribers"), op: Condition::GreaterThan(rtok("channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > ", "1000")) }.into(),
).into()),
),
(
@ -567,10 +458,10 @@ mod tests {
Fc::And(
Fc::Or(
Fc::And(
Fc::Operator { fid: rtok("(", "channel"), op: Operator::Equal(rtok("(channel = ", "ponce")) }.into(),
Fc::Operator { fid: rtok("(channel = ponce AND '", "dog race"), op: Operator::NotEqual(rtok("(channel = ponce AND 'dog race' != '", "bernese mountain")) }.into(),
Fc::Condition { fid: rtok("(", "channel"), op: Condition::Equal(rtok("(channel = ", "ponce")) }.into(),
Fc::Condition { fid: rtok("(channel = ponce AND '", "dog race"), op: Condition::NotEqual(rtok("(channel = ponce AND 'dog race' != '", "bernese mountain")) }.into(),
).into(),
Fc::Operator { fid: rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR ", "subscribers"), op: Operator::GreaterThan(rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > ", "1000")) }.into(),
Fc::Condition { fid: rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR ", "subscribers"), op: Condition::GreaterThan(rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > ", "1000")) }.into(),
).into(),
Fc::GeoLowerThan { point: [rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(", "12"), rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, ", "13")], radius: rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, ", "14") }.into()
)
@ -590,34 +481,4 @@ mod tests {
assert_eq!(filter, expected, "Filter `{}` failed.", input);
}
}
#[test]
fn name() {
use FilterCondition as Fc;
// new_from_raw_offset is unsafe
let test_case = [
// simple test
(
"channel=Ponce",
Fc::Operator {
fid: rtok("", "channel"),
op: Operator::Equal(rtok("channel = ", "Ponce")),
},
),
];
for (input, expected) in test_case {
let result = Fc::parse(input);
assert!(
result.is_ok(),
"Filter `{:?}` was supposed to be parsed but failed with the following error: `{}`",
expected,
result.unwrap_err()
);
let filter = result.unwrap().1;
assert_eq!(filter, expected, "Filter `{}` failed.", input);
}
}
}

View File

@ -0,0 +1,71 @@
use nom::branch::alt;
use nom::bytes::complete::{take_till, take_while1};
use nom::character::complete::char;
use nom::sequence::delimited;
use nom::IResult;
use crate::{ws, Span, Token};
/// value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS*
pub fn parse_value(input: Span) -> IResult<Span, Token> {
// singleQuoted = "'" .* all but quotes "'"
let simple_quoted_key = |input| take_till(|c: char| c == '\'')(input);
// doubleQuoted = "\"" (word | spaces)* "\""
let quoted_key = |input| take_till(|c: char| c == '"')(input);
// word = (alphanumeric | _ | - | .)+
let word = |input| take_while1(is_key_component)(input);
alt((
ws(delimited(char('\''), simple_quoted_key, char('\''))),
ws(delimited(char('"'), quoted_key, char('"'))),
ws(word),
))(input)
.map(|(s, t)| (s, t.into()))
}
fn is_key_component(c: char) -> bool {
c.is_alphanumeric() || ['_', '-', '.'].contains(&c)
}
#[cfg(test)]
pub mod tests {
use super::*;
use crate::tests::rtok;
#[test]
fn name() {
let test_case = [
("channel", rtok("", "channel")),
(".private", rtok("", ".private")),
("I-love-kebab", rtok("", "I-love-kebab")),
("but_snakes_are_also_good", rtok("", "but_snakes_are_also_good")),
("parens(", rtok("", "parens")),
("parens)", rtok("", "parens")),
("not!", rtok("", "not")),
(" channel", rtok(" ", "channel")),
("channel ", rtok("", "channel")),
("'channel'", rtok("'", "channel")),
("\"channel\"", rtok("\"", "channel")),
("'cha)nnel'", rtok("'", "cha)nnel")),
("'cha\"nnel'", rtok("'", "cha\"nnel")),
("\"cha'nnel\"", rtok("\"", "cha'nnel")),
("\" some spaces \"", rtok("\"", " some spaces ")),
("\"cha'nnel\"", rtok("'", "cha'nnel")),
("\"cha'nnel\"", rtok("'", "cha'nnel")),
];
for (input, expected) in test_case {
let input = Span::new(input);
let result = parse_value(input);
assert!(
result.is_ok(),
"Filter `{:?}` was supposed to be parsed but failed with the following error: `{}`",
expected,
result.unwrap_err()
);
let value = result.unwrap().1;
assert_eq!(value, expected, "Filter `{}` failed.", input);
}
}
}

View File

@ -40,6 +40,7 @@ uuid = { version = "0.8.2", features = ["v4"] }
# facet filter parser
nom = "7.0.0"
nom_locate = "4.0.0"
# documents words self-join
itertools = "0.10.0"

View File

@ -16,20 +16,20 @@ use crate::heed_codec::facet::{
};
use crate::{distance_between_two_points, CboRoaringBitmapCodec, FieldId, Index, Result};
#[derive(Debug, Clone, PartialEq)]
pub enum FilterCondition {
Operator(FieldId, Operator),
#[derive(Debug, Clone)]
pub enum FilterCondition<'a> {
Operator(FieldId, Operator<'a>),
Or(Box<Self>, Box<Self>),
And(Box<Self>, Box<Self>),
Empty,
}
impl FilterCondition {
pub fn from_array<I, J, A, B>(
impl<'a> FilterCondition<'a> {
pub fn from_array<I, J, A: 'a, B: 'a>(
rtxn: &heed::RoTxn,
index: &Index,
array: I,
) -> Result<Option<FilterCondition>>
) -> Result<Option<FilterCondition<'a>>>
where
I: IntoIterator<Item = Either<J, B>>,
J: IntoIterator<Item = A>,
@ -73,8 +73,8 @@ impl FilterCondition {
pub fn from_str(
rtxn: &heed::RoTxn,
index: &Index,
expression: &str,
) -> Result<FilterCondition> {
expression: &'a str,
) -> Result<FilterCondition<'a>> {
let fields_ids_map = index.fields_ids_map(rtxn)?;
let filterable_fields = index.filterable_fields(rtxn)?;
let ctx =
@ -93,7 +93,7 @@ impl FilterCondition {
}
}
}
pub fn negate(self) -> FilterCondition {
pub fn negate(self) -> FilterCondition<'a> {
match self {
Operator(fid, op) => match op.negate() {
(op, None) => Operator(fid, op),
@ -106,7 +106,7 @@ impl FilterCondition {
}
}
impl FilterCondition {
impl<'a> FilterCondition<'a> {
/// Aggregates the documents ids that are part of the specified range automatically
/// going deeper through the levels.
fn explore_facet_number_levels(
@ -221,7 +221,7 @@ impl FilterCondition {
numbers_db: heed::Database<FacetLevelValueF64Codec, CboRoaringBitmapCodec>,
strings_db: heed::Database<FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec>,
field_id: FieldId,
operator: &Operator,
operator: &Operator<'a>,
) -> Result<RoaringBitmap> {
// Make sure we always bound the ranges with the field id and the level,
// as the facets values are all in the same database and prefixed by the

View File

@ -28,25 +28,38 @@ use nom::multi::{many0, separated_list1};
use nom::number::complete::recognize_float;
use nom::sequence::{delimited, preceded, tuple};
use nom::IResult;
use nom_locate::LocatedSpan;
use self::Operator::*;
use super::FilterCondition;
use crate::{FieldId, FieldsIdsMap};
#[derive(Debug, Clone, PartialEq)]
pub enum Operator {
GreaterThan(f64),
GreaterThanOrEqual(f64),
Equal(Option<f64>, String),
NotEqual(Option<f64>, String),
LowerThan(f64),
LowerThanOrEqual(f64),
Between(f64, f64),
GeoLowerThan([f64; 2], f64),
GeoGreaterThan([f64; 2], f64),
pub enum FilterError {
AttributeNotFilterable(String),
}
impl Operator {
#[derive(Debug, Clone, PartialEq, Eq)]
struct Token<'a> {
pub position: Span<'a>,
pub inner: &'a str,
}
type Span<'a> = LocatedSpan<&'a str>;
#[derive(Debug, Clone)]
pub enum Operator<'a> {
GreaterThan(Token<'a>),
GreaterThanOrEqual(Token<'a>),
Equal(Option<Token<'a>>, Token<'a>),
NotEqual(Option<Token<'a>>, Token<'a>),
LowerThan(Token<'a>),
LowerThanOrEqual(Token<'a>),
Between(Token<'a>, Token<'a>),
GeoLowerThan([Token<'a>; 2], Token<'a>),
GeoGreaterThan([Token<'a>; 2], Token<'a>),
}
impl<'a> Operator<'a> {
/// This method can return two operations in case it must express
/// an OR operation for the between case (i.e. `TO`).
pub fn negate(self) -> (Self, Option<Self>) {
@ -180,16 +193,13 @@ impl<'a> ParseContext<'a> {
where
E: FilterParserError<'a>,
{
let error = match input.chars().nth(0) {
Some(ch) => Err(nom::Err::Failure(E::from_char(input, ch))),
None => Err(nom::Err::Failure(E::from_error_kind(input, ErrorKind::Eof))),
};
if !self.filterable_fields.contains(key) {
return error;
}
match self.fields_ids_map.id(key) {
Some(fid) => Ok(fid),
None => error,
Some(fid) if self.filterable_fields.contains(key) => Ok(fid),
_ => Err(nom::Err::Failure(E::add_context(
input,
"Attribute is not filterable",
E::from_char(input, 'T'),
))),
}
}

View File

@ -34,7 +34,8 @@ mod query_tree;
pub struct Search<'a> {
query: Option<String>,
filter: Option<FilterCondition>,
// this should be linked to the String in the query
filter: Option<FilterCondition<'a>>,
offset: usize,
limit: usize,
sort_criteria: Option<Vec<AscDesc>>,