mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-25 06:00:08 +01:00
rewrite the parser logic
This commit is contained in:
parent
e1d81342cf
commit
6c15f50899
@ -1,9 +1,26 @@
|
|||||||
|
//! BNF grammar:
|
||||||
|
//!
|
||||||
|
//! ```text
|
||||||
|
//! expression = or
|
||||||
|
//! or = and (~ "OR" ~ and)
|
||||||
|
//! and = not (~ "AND" not)*
|
||||||
|
//! not = ("NOT" | "!") not | primary
|
||||||
|
//! primary = (WS* ~ "(" expression ")" ~ WS*) | condition | to | geoRadius
|
||||||
|
//! to = value value TO value
|
||||||
|
//! condition = value ("==" | ">" ...) value
|
||||||
|
//! value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS*
|
||||||
|
//! singleQuoted = "'" .* all but quotes "'"
|
||||||
|
//! doubleQuoted = "\"" (word | spaces)* "\""
|
||||||
|
//! word = (alphanumeric | _ | - | .)+
|
||||||
|
//! geoRadius = WS* ~ "_geoRadius(float ~ "," ~ float ~ "," float)
|
||||||
|
//! ```
|
||||||
|
|
||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
use std::fmt::Debug;
|
use std::fmt::Debug;
|
||||||
use std::result::Result as StdResult;
|
use std::result::Result as StdResult;
|
||||||
|
|
||||||
use nom::branch::alt;
|
use nom::branch::alt;
|
||||||
use nom::bytes::complete::{tag, take_till, take_till1, take_while1};
|
use nom::bytes::complete::{tag, take_till, take_while1};
|
||||||
use nom::character::complete::{char, multispace0};
|
use nom::character::complete::{char, multispace0};
|
||||||
use nom::combinator::map;
|
use nom::combinator::map;
|
||||||
use nom::error::{ContextError, ErrorKind, VerboseError};
|
use nom::error::{ContextError, ErrorKind, VerboseError};
|
||||||
@ -60,12 +77,14 @@ pub struct ParseContext<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> ParseContext<'a> {
|
impl<'a> ParseContext<'a> {
|
||||||
|
/// and = not (~ "AND" not)*
|
||||||
fn parse_or<E>(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E>
|
fn parse_or<E>(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E>
|
||||||
where
|
where
|
||||||
E: FilterParserError<'a>,
|
E: FilterParserError<'a>,
|
||||||
{
|
{
|
||||||
let (input, lhs) = self.parse_and(input)?;
|
let (input, lhs) = self.parse_and(input)?;
|
||||||
let (input, ors) = many0(preceded(self.ws(tag("OR")), |c| Self::parse_or(self, c)))(input)?;
|
let (input, ors) =
|
||||||
|
many0(preceded(self.ws(tag("OR")), |c| Self::parse_and(self, c)))(input)?;
|
||||||
|
|
||||||
let expr = ors
|
let expr = ors
|
||||||
.into_iter()
|
.into_iter()
|
||||||
@ -78,49 +97,40 @@ impl<'a> ParseContext<'a> {
|
|||||||
E: FilterParserError<'a>,
|
E: FilterParserError<'a>,
|
||||||
{
|
{
|
||||||
let (input, lhs) = self.parse_not(input)?;
|
let (input, lhs) = self.parse_not(input)?;
|
||||||
let (input, ors) =
|
let (input, ors) = many0(preceded(self.ws(tag("AND")), |c| self.parse_not(c)))(input)?;
|
||||||
many0(preceded(self.ws(tag("AND")), |c| Self::parse_and(self, c)))(input)?;
|
|
||||||
let expr = ors
|
let expr = ors
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.fold(lhs, |acc, branch| FilterCondition::And(Box::new(acc), Box::new(branch)));
|
.fold(lhs, |acc, branch| FilterCondition::And(Box::new(acc), Box::new(branch)));
|
||||||
Ok((input, expr))
|
Ok((input, expr))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// not = ("NOT" | "!") not | primary
|
||||||
fn parse_not<E>(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E>
|
fn parse_not<E>(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E>
|
||||||
where
|
where
|
||||||
E: FilterParserError<'a>,
|
E: FilterParserError<'a>,
|
||||||
{
|
{
|
||||||
alt((
|
alt((
|
||||||
map(
|
map(preceded(alt((tag("!"), tag("NOT"))), |c| self.parse_not(c)), |e| e.negate()),
|
||||||
preceded(alt((self.ws(tag("!")), self.ws(tag("NOT")))), |c| {
|
|c| self.parse_primary(c),
|
||||||
Self::parse_condition_expression(self, c)
|
|
||||||
}),
|
|
||||||
|e| e.negate(),
|
|
||||||
),
|
|
||||||
|c| Self::parse_condition_expression(self, c),
|
|
||||||
))(input)
|
))(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn ws<F, O, E>(&'a self, inner: F) -> impl FnMut(&'a str) -> IResult<&'a str, O, E>
|
fn ws<F, O, E>(&'a self, inner: F) -> impl FnMut(&'a str) -> IResult<&'a str, O, E>
|
||||||
where
|
where
|
||||||
F: Fn(&'a str) -> IResult<&'a str, O, E>,
|
F: FnMut(&'a str) -> IResult<&'a str, O, E>,
|
||||||
E: FilterParserError<'a>,
|
E: FilterParserError<'a>,
|
||||||
{
|
{
|
||||||
delimited(multispace0, inner, multispace0)
|
delimited(multispace0, inner, multispace0)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_simple_condition<E>(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E>
|
/// condition = value ("==" | ">" ...) value
|
||||||
|
fn parse_condition<E>(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E>
|
||||||
where
|
where
|
||||||
E: FilterParserError<'a>,
|
E: FilterParserError<'a>,
|
||||||
{
|
{
|
||||||
let operator = alt((tag("<="), tag(">="), tag("!="), tag("<"), tag(">"), tag("=")));
|
let operator = alt((tag("<="), tag(">="), tag("!="), tag("<"), tag(">"), tag("=")));
|
||||||
let k = tuple((self.ws(|c| self.parse_key(c)), operator, self.ws(|c| self.parse_value(c))))(
|
let (input, (key, op, value)) =
|
||||||
input,
|
tuple((|c| self.parse_value(c), operator, |c| self.parse_value(c)))(input)?;
|
||||||
);
|
|
||||||
let (input, (key, op, value)) = match k {
|
|
||||||
Ok(o) => o,
|
|
||||||
Err(e) => return Err(e),
|
|
||||||
};
|
|
||||||
|
|
||||||
let fid = self.parse_fid(input, key)?;
|
let fid = self.parse_fid(input, key)?;
|
||||||
let r: StdResult<f64, nom::Err<VerboseError<&str>>> = self.parse_numeric(value);
|
let r: StdResult<f64, nom::Err<VerboseError<&str>>> = self.parse_numeric(value);
|
||||||
@ -137,7 +147,17 @@ impl<'a> ParseContext<'a> {
|
|||||||
);
|
);
|
||||||
Ok((input, k))
|
Ok((input, k))
|
||||||
}
|
}
|
||||||
">" | "<" | "<=" | ">=" => self.parse_numeric_unary_condition(op, fid, value),
|
">" | "<" | "<=" | ">=" => {
|
||||||
|
let numeric: f64 = self.parse_numeric(value)?;
|
||||||
|
let k = match op {
|
||||||
|
">" => FilterCondition::Operator(fid, GreaterThan(numeric)),
|
||||||
|
"<" => FilterCondition::Operator(fid, LowerThan(numeric)),
|
||||||
|
"<=" => FilterCondition::Operator(fid, LowerThanOrEqual(numeric)),
|
||||||
|
">=" => FilterCondition::Operator(fid, GreaterThanOrEqual(numeric)),
|
||||||
|
_ => unreachable!(),
|
||||||
|
};
|
||||||
|
Ok((input, k))
|
||||||
|
}
|
||||||
_ => unreachable!(),
|
_ => unreachable!(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -156,26 +176,6 @@ impl<'a> ParseContext<'a> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_numeric_unary_condition<E>(
|
|
||||||
&'a self,
|
|
||||||
input: &'a str,
|
|
||||||
fid: u16,
|
|
||||||
value: &'a str,
|
|
||||||
) -> IResult<&'a str, FilterCondition, E>
|
|
||||||
where
|
|
||||||
E: FilterParserError<'a>,
|
|
||||||
{
|
|
||||||
let numeric: f64 = self.parse_numeric(value)?;
|
|
||||||
let k = match input {
|
|
||||||
">" => FilterCondition::Operator(fid, GreaterThan(numeric)),
|
|
||||||
"<" => FilterCondition::Operator(fid, LowerThan(numeric)),
|
|
||||||
"<=" => FilterCondition::Operator(fid, LowerThanOrEqual(numeric)),
|
|
||||||
">=" => FilterCondition::Operator(fid, GreaterThanOrEqual(numeric)),
|
|
||||||
_ => unreachable!(),
|
|
||||||
};
|
|
||||||
Ok((input, k))
|
|
||||||
}
|
|
||||||
|
|
||||||
fn parse_fid<E>(&'a self, input: &'a str, key: &'a str) -> StdResult<FieldId, nom::Err<E>>
|
fn parse_fid<E>(&'a self, input: &'a str, key: &'a str) -> StdResult<FieldId, nom::Err<E>>
|
||||||
where
|
where
|
||||||
E: FilterParserError<'a>,
|
E: FilterParserError<'a>,
|
||||||
@ -193,12 +193,13 @@ impl<'a> ParseContext<'a> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_range_condition<E>(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E>
|
/// to = value value TO value
|
||||||
|
fn parse_to<E>(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E>
|
||||||
where
|
where
|
||||||
E: FilterParserError<'a>,
|
E: FilterParserError<'a>,
|
||||||
{
|
{
|
||||||
let (input, (key, from, _, to)) = tuple((
|
let (input, (key, from, _, to)) = tuple((
|
||||||
self.ws(|c| self.parse_key(c)),
|
self.ws(|c| self.parse_value(c)),
|
||||||
self.ws(|c| self.parse_value(c)),
|
self.ws(|c| self.parse_value(c)),
|
||||||
tag("TO"),
|
tag("TO"),
|
||||||
self.ws(|c| self.parse_value(c)),
|
self.ws(|c| self.parse_value(c)),
|
||||||
@ -212,6 +213,7 @@ impl<'a> ParseContext<'a> {
|
|||||||
Ok((input, res))
|
Ok((input, res))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// geoRadius = WS* ~ "_geoRadius(float ~ "," ~ float ~ "," float)
|
||||||
fn parse_geo_radius<E>(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E>
|
fn parse_geo_radius<E>(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E>
|
||||||
where
|
where
|
||||||
E: FilterParserError<'a>,
|
E: FilterParserError<'a>,
|
||||||
@ -224,7 +226,8 @@ impl<'a> ParseContext<'a> {
|
|||||||
"_geoRadius. Longitude must be contained between -180 and 180 degrees.";
|
"_geoRadius. Longitude must be contained between -180 and 180 degrees.";
|
||||||
|
|
||||||
let parsed = preceded::<_, _, _, E, _, _>(
|
let parsed = preceded::<_, _, _, E, _, _>(
|
||||||
tag("_geoRadius"),
|
// TODO: forbid spaces between _geoRadius and parenthesis
|
||||||
|
self.ws(tag("_geoRadius")),
|
||||||
delimited(
|
delimited(
|
||||||
char('('),
|
char('('),
|
||||||
separated_list1(tag(","), self.ws(|c| recognize_float(c))),
|
separated_list1(tag(","), self.ws(|c| recognize_float(c))),
|
||||||
@ -275,54 +278,35 @@ impl<'a> ParseContext<'a> {
|
|||||||
Ok((input, res))
|
Ok((input, res))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn parse_condition<E>(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E>
|
/// primary = (WS* ~ "(" expression ")" ~ WS*) | condition | to | geoRadius
|
||||||
where
|
fn parse_primary<E>(&'a self, input: &'a str) -> IResult<&str, FilterCondition, E>
|
||||||
E: FilterParserError<'a>,
|
|
||||||
{
|
|
||||||
let l1 = |c| self.parse_simple_condition(c);
|
|
||||||
let l2 = |c| self.parse_range_condition(c);
|
|
||||||
let l3 = |c| self.parse_geo_radius(c);
|
|
||||||
alt((l1, l2, l3))(input)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn parse_condition_expression<E>(&'a self, input: &'a str) -> IResult<&str, FilterCondition, E>
|
|
||||||
where
|
where
|
||||||
E: FilterParserError<'a>,
|
E: FilterParserError<'a>,
|
||||||
{
|
{
|
||||||
alt((
|
alt((
|
||||||
delimited(self.ws(char('(')), |c| Self::parse_expression(self, c), self.ws(char(')'))),
|
delimited(self.ws(char('(')), |c| self.parse_expression(c), self.ws(char(')'))),
|
||||||
|c| Self::parse_condition(self, c),
|
|c| self.parse_condition(c),
|
||||||
))(input)
|
|c| self.parse_to(c),
|
||||||
}
|
|c| self.parse_geo_radius(c),
|
||||||
|
|
||||||
fn parse_key<E>(&'a self, input: &'a str) -> IResult<&'a str, &'a str, E>
|
|
||||||
where
|
|
||||||
E: FilterParserError<'a>,
|
|
||||||
{
|
|
||||||
let key = |input| take_while1(Self::is_key_component)(input);
|
|
||||||
let simple_quoted_key = |input| take_till(|c: char| c == '\'')(input);
|
|
||||||
let quoted_key = |input| take_till(|c: char| c == '"')(input);
|
|
||||||
|
|
||||||
alt((
|
|
||||||
delimited(char('\''), simple_quoted_key, char('\'')),
|
|
||||||
delimited(char('"'), quoted_key, char('"')),
|
|
||||||
key,
|
|
||||||
))(input)
|
))(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS*
|
||||||
fn parse_value<E>(&'a self, input: &'a str) -> IResult<&'a str, &'a str, E>
|
fn parse_value<E>(&'a self, input: &'a str) -> IResult<&'a str, &'a str, E>
|
||||||
where
|
where
|
||||||
E: FilterParserError<'a>,
|
E: FilterParserError<'a>,
|
||||||
{
|
{
|
||||||
let key =
|
// singleQuoted = "'" .* all but quotes "'"
|
||||||
|input| take_till1(|c: char| c.is_ascii_whitespace() || c == '(' || c == ')')(input);
|
|
||||||
let simple_quoted_key = |input| take_till(|c: char| c == '\'')(input);
|
let simple_quoted_key = |input| take_till(|c: char| c == '\'')(input);
|
||||||
|
// doubleQuoted = "\"" (word | spaces)* "\""
|
||||||
let quoted_key = |input| take_till(|c: char| c == '"')(input);
|
let quoted_key = |input| take_till(|c: char| c == '"')(input);
|
||||||
|
// word = (alphanumeric | _ | - | .)+
|
||||||
|
let word = |input| take_while1(Self::is_key_component)(input);
|
||||||
|
|
||||||
alt((
|
alt((
|
||||||
delimited(char('\''), simple_quoted_key, char('\'')),
|
self.ws(delimited(char('\''), simple_quoted_key, char('\''))),
|
||||||
delimited(char('"'), quoted_key, char('"')),
|
self.ws(delimited(char('"'), quoted_key, char('"'))),
|
||||||
key,
|
self.ws(word),
|
||||||
))(input)
|
))(input)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -330,11 +314,12 @@ impl<'a> ParseContext<'a> {
|
|||||||
c.is_alphanumeric() || ['_', '-', '.'].contains(&c)
|
c.is_alphanumeric() || ['_', '-', '.'].contains(&c)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// expression = or
|
||||||
pub fn parse_expression<E>(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E>
|
pub fn parse_expression<E>(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E>
|
||||||
where
|
where
|
||||||
E: FilterParserError<'a>,
|
E: FilterParserError<'a>,
|
||||||
{
|
{
|
||||||
alt((|input| self.parse_or(input), |input| self.parse_and(input)))(input)
|
self.parse_or(input)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -499,7 +484,19 @@ mod tests {
|
|||||||
),
|
),
|
||||||
),
|
),
|
||||||
// test parenthesis
|
// test parenthesis
|
||||||
/*
|
(
|
||||||
|
Fc::from_str(
|
||||||
|
&rtxn,
|
||||||
|
&index,
|
||||||
|
"channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > 1000 )",
|
||||||
|
),
|
||||||
|
Fc::And(
|
||||||
|
Box::new(Fc::Operator(0, Operator::Equal(None, S("ponce")))),
|
||||||
|
Box::new(Fc::Or(
|
||||||
|
Box::new(Fc::Operator(1, Operator::NotEqual(None, S("bernese mountain")))),
|
||||||
|
Box::new(Fc::Operator(2, Operator::GreaterThan(1000.))),
|
||||||
|
))),
|
||||||
|
),
|
||||||
(
|
(
|
||||||
Fc::from_str(
|
Fc::from_str(
|
||||||
&rtxn,
|
&rtxn,
|
||||||
@ -516,7 +513,6 @@ mod tests {
|
|||||||
)),
|
)),
|
||||||
Box::new(Fc::Operator(3, Operator::GeoLowerThan([12., 13.], 14.))))
|
Box::new(Fc::Operator(3, Operator::GeoLowerThan([12., 13.], 14.))))
|
||||||
),
|
),
|
||||||
*/
|
|
||||||
];
|
];
|
||||||
|
|
||||||
for (result, expected) in test_case {
|
for (result, expected) in test_case {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user