From 8748df2ca456db11bfe34215821df8c6e04de2d3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E7=80=9A=E9=AA=8B?= Date: Wed, 15 Sep 2021 18:57:18 +0800 Subject: [PATCH 01/15] draft without error handling --- milli/Cargo.toml | 1 + milli/src/error.rs | 11 + milli/src/search/facet/filter_condition.rs | 254 ++++++++++++++++++++- 3 files changed, 265 insertions(+), 1 deletion(-) diff --git a/milli/Cargo.toml b/milli/Cargo.toml index 7d93f5995..50ada6bfb 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -24,6 +24,7 @@ levenshtein_automata = { version = "0.2.0", features = ["fst_automaton"] } linked-hash-map = "0.5.4" meilisearch-tokenizer = { git = "https://github.com/meilisearch/tokenizer.git", tag = "v0.2.5" } memmap = "0.7.0" +nom = "7" obkv = "0.2.0" once_cell = "1.5.2" ordered-float = "2.1.1" diff --git a/milli/src/error.rs b/milli/src/error.rs index 1f1cc5264..be3fbfdef 100644 --- a/milli/src/error.rs +++ b/milli/src/error.rs @@ -62,6 +62,9 @@ pub enum UserError { InvalidFilter(pest::error::Error), InvalidFilterAttribute(pest::error::Error), InvalidGeoField { document_id: Value, object: Value }, + InvalidFilterAttributeNom, + InvalidFilterValue, + InvalidSortName { name: String }, InvalidSortableAttribute { field: String, valid_fields: HashSet }, SortRankingRuleMissing, InvalidStoreFile, @@ -82,6 +85,11 @@ impl From for Error { Error::IoError(error) } } +impl From for UserError { + fn from(_: std::num::ParseFloatError) -> UserError { + UserError::InvalidFilterValue + } +} impl From for Error { fn from(error: fst::Error) -> Error { @@ -208,6 +216,9 @@ impl StdError for InternalError {} impl fmt::Display for UserError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { + //TODO + Self::InvalidFilterAttributeNom => f.write_str("parser error "), + Self::InvalidFilterValue => f.write_str("parser error "), Self::AttributeLimitReached => f.write_str("maximum number of attributes reached"), Self::CriterionError(error) => write!(f, "{}", error), Self::DocumentLimitReached => f.write_str("maximum number of documents reached"), diff --git a/milli/src/search/facet/filter_condition.rs b/milli/src/search/facet/filter_condition.rs index f1055b2f8..b14c3648f 100644 --- a/milli/src/search/facet/filter_condition.rs +++ b/milli/src/search/facet/filter_condition.rs @@ -4,6 +4,7 @@ use std::ops::Bound::{self, Excluded, Included}; use std::result::Result as StdResult; use std::str::FromStr; +use crate::error::UserError as IError; use either::Either; use heed::types::DecodeIgnore; use itertools::Itertools; @@ -13,6 +14,17 @@ use pest::iterators::{Pair, Pairs}; use pest::Parser; use roaring::RoaringBitmap; +use nom::{ + branch::alt, + bytes::complete::{tag, take_while1}, + character::complete::{char, multispace0}, + combinator::map, + error::ParseError, + multi::many0, + sequence::{delimited, preceded, tuple}, + IResult, +}; + use self::FilterCondition::*; use self::Operator::*; use super::parser::{FilterParser, Rule, PREC_CLIMBER}; @@ -64,6 +76,156 @@ pub enum FilterCondition { Empty, } +struct ParseContext<'a> { + fields_ids_map: &'a FieldsIdsMap, + filterable_fields: &'a HashSet, +} +// impl From + +impl<'a> ParseContext<'a> { + fn parse_or_nom(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition> { + let (input, lhs) = self.parse_and_nom(input)?; + let (input, ors) = many0(preceded(tag("OR"), |c| Self::parse_or_nom(self, c)))(input)?; + let expr = ors + .into_iter() + .fold(lhs, |acc, branch| FilterCondition::Or(Box::new(acc), Box::new(branch))); + Ok((input, expr)) + } + fn parse_and_nom(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition> { + let (input, lhs) = self.parse_not_nom(input)?; + let (input, ors) = many0(preceded(tag("AND"), |c| Self::parse_and_nom(self, c)))(input)?; + let expr = ors + .into_iter() + .fold(lhs, |acc, branch| FilterCondition::And(Box::new(acc), Box::new(branch))); + Ok((input, expr)) + } + + fn parse_not_nom(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition> { + let r = alt(( + map( + preceded(alt((Self::ws(tag("!")), Self::ws(tag("NOT")))), |c| { + Self::parse_condition_expression(self, c) + }), + |e| e.negate(), + ), + |c| Self::parse_condition_expression(self, c), + ))(input); + return r; + } + + fn ws<'b, F: 'b, O, E: ParseError<&'b str>>( + inner: F, + ) -> impl FnMut(&'b str) -> IResult<&'b str, O, E> + where + F: Fn(&'b str) -> IResult<&'b str, O, E>, + { + delimited(multispace0, inner, multispace0) + } + + fn parse_simple_condition( + &self, + input: &'a str, + ) -> StdResult<(&'a str, FilterCondition), UserError> { + let operator = alt((tag(">"), tag(">="), tag("="), tag("<"), tag("!="), tag("<="))); + let (input, (key, op, value)) = + match tuple((Self::ws(Self::parse_key), operator, Self::ws(Self::parse_key)))(input) { + Ok((input, (key, op, value))) => (input, (key, op, value)), + Err(_) => return Err(UserError::InvalidFilterAttributeNom), + }; + + let fid = match field_id_by_key(self.fields_ids_map, self.filterable_fields, key)? { + Some(fid) => fid, + None => return Err(UserError::InvalidFilterAttributeNom), + }; + let r = nom_parse::(value); + let k = match op { + ">" => Operator(fid, GreaterThan(value.parse::()?)), + "<" => Operator(fid, LowerThan(value.parse::()?)), + "<=" => Operator(fid, LowerThanOrEqual(value.parse::()?)), + ">=" => Operator(fid, GreaterThanOrEqual(value.parse::()?)), + "=" => Operator(fid, Equal(r.0.ok(), value.to_string().to_lowercase())), + "!=" => Operator(fid, NotEqual(r.0.ok(), value.to_string().to_lowercase())), + _ => unreachable!(), + }; + Ok((input, k)) + } + + fn parse_range_condition( + &'a self, + input: &'a str, + ) -> StdResult<(&str, FilterCondition), UserError> { + let (input, (key, from, _, to)) = match tuple(( + Self::ws(Self::parse_key), + Self::ws(Self::parse_key), + tag("TO"), + Self::ws(Self::parse_key), + ))(input) + { + Ok((input, (key, from, tag, to))) => (input, (key, from, tag, to)), + Err(_) => return Err(UserError::InvalidFilterAttributeNom), + }; + let fid = match field_id_by_key(self.fields_ids_map, self.filterable_fields, key)? { + Some(fid) => fid, + None => return Err(UserError::InvalidFilterAttributeNom), + }; + let res = Operator(fid, Between(from.parse::()?, to.parse::()?)); + Ok((input, res)) + } + + fn parse_condition(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition> { + let l1 = |c| self.wrap(|c| self.parse_simple_condition(c), c); + let l2 = |c| self.wrap(|c| self.parse_range_condition(c), c); + let (input, condition) = match alt((l1, l2))(input) { + Ok((i, c)) => (i, c), + Err(_) => { + return Err(nom::Err::Error(nom::error::Error::from_error_kind( + "foo", + nom::error::ErrorKind::Fail, + ))) + } + }; + Ok((input, condition)) + } + fn wrap(&'a self, inner: F, input: &'a str) -> IResult<&'a str, FilterCondition> + where + F: Fn(&'a str) -> StdResult<(&'a str, FilterCondition), E>, + { + match inner(input) { + Ok(e) => Ok(e), + Err(_) => { + return Err(nom::Err::Error(nom::error::Error::from_error_kind( + "foo", + nom::error::ErrorKind::Fail, + ))) + } + } + } + + fn parse_condition_expression(&'a self, input: &'a str) -> IResult<&str, FilterCondition> { + return alt(( + delimited( + Self::ws(char('(')), + |c| Self::parse_expression(self, c), + Self::ws(char(')')), + ), + |c| Self::parse_condition(self, c), + ))(input); + } + + fn parse_key(input: &str) -> IResult<&str, &str> { + let key = |input| take_while1(Self::is_key_component)(input); + alt((key, delimited(char('"'), key, char('"'))))(input) + } + fn is_key_component(c: char) -> bool { + c.is_alphanumeric() || ['_', '-', '.'].contains(&c) + } + + pub fn parse_expression(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition> { + self.parse_or_nom(input) + } +} + +//for nom impl FilterCondition { pub fn from_array( rtxn: &heed::RoTxn, @@ -109,11 +271,75 @@ impl FilterCondition { Ok(ands) } - pub fn from_str( rtxn: &heed::RoTxn, index: &Index, expression: &str, + ) -> Result { + let fields_ids_map = index.fields_ids_map(rtxn)?; + let filterable_fields = index.filterable_fields(rtxn)?; + let ctx = + ParseContext { fields_ids_map: &fields_ids_map, filterable_fields: &filterable_fields }; + match ctx.parse_expression(expression) { + Ok((_, fc)) => Ok(fc), + Err(e) => { + println!("{:?}", e); + unreachable!() + } + } + } +} + +impl FilterCondition { + pub fn from_array_pest( + rtxn: &heed::RoTxn, + index: &Index, + array: I, + ) -> Result> + where + I: IntoIterator>, + J: IntoIterator, + A: AsRef, + B: AsRef, + { + let mut ands = None; + + for either in array { + match either { + Either::Left(array) => { + let mut ors = None; + for rule in array { + let condition = FilterCondition::from_str(rtxn, index, rule.as_ref())?; + ors = match ors.take() { + Some(ors) => Some(Or(Box::new(ors), Box::new(condition))), + None => Some(condition), + }; + } + + if let Some(rule) = ors { + ands = match ands.take() { + Some(ands) => Some(And(Box::new(ands), Box::new(rule))), + None => Some(rule), + }; + } + } + Either::Right(rule) => { + let condition = FilterCondition::from_str(rtxn, index, rule.as_ref())?; + ands = match ands.take() { + Some(ands) => Some(And(Box::new(ands), Box::new(condition))), + None => Some(condition), + }; + } + } + } + + Ok(ands) + } + + pub fn from_str_pest( + rtxn: &heed::RoTxn, + index: &Index, + expression: &str, ) -> Result { let fields_ids_map = index.fields_ids_map(rtxn)?; let filterable_fields = index.filterable_fields(rtxn)?; @@ -586,6 +812,19 @@ impl FilterCondition { } } +fn field_id_by_key( + fields_ids_map: &FieldsIdsMap, + filterable_fields: &HashSet, + key: &str, +) -> StdResult, IError> { + // lexing ensures that we at least have a key + if !filterable_fields.contains(key) { + return StdResult::Err(UserError::InvalidFilterAttributeNom); + } + + Ok(fields_ids_map.id(key)) +} + /// Retrieve the field id base on the pest value. /// /// Returns an error if the given value is not filterable. @@ -640,6 +879,19 @@ fn field_id( Ok(fields_ids_map.id(key.as_str())) } +fn nom_parse(input: &str) -> (StdResult, String) +where + T: FromStr, + T::Err: ToString, +{ + let result = match input.parse::() { + Ok(value) => Ok(value), + Err(e) => Err(UserError::InvalidFilterValue), + }; + + (result, input.to_string()) +} + /// Tries to parse the pest pair into the type `T` specified, always returns /// the original string that we tried to parse. /// From 50ad750ec119cc157f383d6717b4c7f010a1b245 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E7=80=9A=E9=AA=8B?= Date: Thu, 16 Sep 2021 17:56:18 +0800 Subject: [PATCH 02/15] enhance error handling --- milli/src/error.rs | 9 +- milli/src/search/facet/filter_condition.rs | 241 +++++++++++---------- 2 files changed, 127 insertions(+), 123 deletions(-) diff --git a/milli/src/error.rs b/milli/src/error.rs index be3fbfdef..a798539cd 100644 --- a/milli/src/error.rs +++ b/milli/src/error.rs @@ -64,6 +64,7 @@ pub enum UserError { InvalidGeoField { document_id: Value, object: Value }, InvalidFilterAttributeNom, InvalidFilterValue, + InvalidFilterNom { input: String }, InvalidSortName { name: String }, InvalidSortableAttribute { field: String, valid_fields: HashSet }, SortRankingRuleMissing, @@ -85,11 +86,6 @@ impl From for Error { Error::IoError(error) } } -impl From for UserError { - fn from(_: std::num::ParseFloatError) -> UserError { - UserError::InvalidFilterValue - } -} impl From for Error { fn from(error: fst::Error) -> Error { @@ -217,8 +213,7 @@ impl fmt::Display for UserError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { //TODO - Self::InvalidFilterAttributeNom => f.write_str("parser error "), - Self::InvalidFilterValue => f.write_str("parser error "), + Self::InvalidFilterNom { input } => write!(f, "parser error {}", input), Self::AttributeLimitReached => f.write_str("maximum number of attributes reached"), Self::CriterionError(error) => write!(f, "{}", error), Self::DocumentLimitReached => f.write_str("maximum number of documents reached"), diff --git a/milli/src/search/facet/filter_condition.rs b/milli/src/search/facet/filter_condition.rs index b14c3648f..e6ed79230 100644 --- a/milli/src/search/facet/filter_condition.rs +++ b/milli/src/search/facet/filter_condition.rs @@ -4,7 +4,6 @@ use std::ops::Bound::{self, Excluded, Included}; use std::result::Result as StdResult; use std::str::FromStr; -use crate::error::UserError as IError; use either::Either; use heed::types::DecodeIgnore; use itertools::Itertools; @@ -19,7 +18,9 @@ use nom::{ bytes::complete::{tag, take_while1}, character::complete::{char, multispace0}, combinator::map, + error::ErrorKind, error::ParseError, + error::VerboseError, multi::many0, sequence::{delimited, preceded, tuple}, IResult, @@ -29,7 +30,7 @@ use self::FilterCondition::*; use self::Operator::*; use super::parser::{FilterParser, Rule, PREC_CLIMBER}; use super::FacetNumberRange; -use crate::error::UserError; +use crate::error::{Error, UserError}; use crate::heed_codec::facet::{ FacetLevelValueF64Codec, FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec, }; @@ -83,7 +84,10 @@ struct ParseContext<'a> { // impl From impl<'a> ParseContext<'a> { - fn parse_or_nom(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition> { + fn parse_or_nom(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> + where + E: ParseError<&'a str>, + { let (input, lhs) = self.parse_and_nom(input)?; let (input, ors) = many0(preceded(tag("OR"), |c| Self::parse_or_nom(self, c)))(input)?; let expr = ors @@ -91,7 +95,11 @@ impl<'a> ParseContext<'a> { .fold(lhs, |acc, branch| FilterCondition::Or(Box::new(acc), Box::new(branch))); Ok((input, expr)) } - fn parse_and_nom(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition> { + + fn parse_and_nom(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> + where + E: ParseError<&'a str>, + { let (input, lhs) = self.parse_not_nom(input)?; let (input, ors) = many0(preceded(tag("AND"), |c| Self::parse_and_nom(self, c)))(input)?; let expr = ors @@ -100,119 +108,146 @@ impl<'a> ParseContext<'a> { Ok((input, expr)) } - fn parse_not_nom(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition> { - let r = alt(( + fn parse_not_nom(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> + where + E: ParseError<&'a str>, + { + alt(( map( - preceded(alt((Self::ws(tag("!")), Self::ws(tag("NOT")))), |c| { + preceded(alt((self.ws(tag("!")), self.ws(tag("NOT")))), |c| { Self::parse_condition_expression(self, c) }), |e| e.negate(), ), |c| Self::parse_condition_expression(self, c), - ))(input); - return r; + ))(input) } - fn ws<'b, F: 'b, O, E: ParseError<&'b str>>( - inner: F, - ) -> impl FnMut(&'b str) -> IResult<&'b str, O, E> + fn ws(&'a self, inner: F) -> impl FnMut(&'a str) -> IResult<&'a str, O, E> where - F: Fn(&'b str) -> IResult<&'b str, O, E>, + F: Fn(&'a str) -> IResult<&'a str, O, E>, + E: ParseError<&'a str>, { delimited(multispace0, inner, multispace0) } - fn parse_simple_condition( - &self, - input: &'a str, - ) -> StdResult<(&'a str, FilterCondition), UserError> { + fn parse_simple_condition(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> + where + E: ParseError<&'a str>, + { let operator = alt((tag(">"), tag(">="), tag("="), tag("<"), tag("!="), tag("<="))); let (input, (key, op, value)) = - match tuple((Self::ws(Self::parse_key), operator, Self::ws(Self::parse_key)))(input) { - Ok((input, (key, op, value))) => (input, (key, op, value)), - Err(_) => return Err(UserError::InvalidFilterAttributeNom), - }; - - let fid = match field_id_by_key(self.fields_ids_map, self.filterable_fields, key)? { - Some(fid) => fid, - None => return Err(UserError::InvalidFilterAttributeNom), - }; - let r = nom_parse::(value); + tuple((self.ws(|c| self.parse_key(c)), operator, self.ws(|c| self.parse_key(c))))( + input, + )?; + let fid = self.parse_fid(input, key)?; + let r: StdResult>> = self.parse_numeric(value); let k = match op { - ">" => Operator(fid, GreaterThan(value.parse::()?)), - "<" => Operator(fid, LowerThan(value.parse::()?)), - "<=" => Operator(fid, LowerThanOrEqual(value.parse::()?)), - ">=" => Operator(fid, GreaterThanOrEqual(value.parse::()?)), - "=" => Operator(fid, Equal(r.0.ok(), value.to_string().to_lowercase())), - "!=" => Operator(fid, NotEqual(r.0.ok(), value.to_string().to_lowercase())), + "=" => Operator(fid, Equal(r.ok(), value.to_string().to_lowercase())), + "!=" => Operator(fid, NotEqual(r.ok(), value.to_string().to_lowercase())), + ">" | "<" | "<=" | ">=" => { + return self.parse_numeric_unary_condition(input, fid, value) + } _ => unreachable!(), }; Ok((input, k)) } - fn parse_range_condition( - &'a self, - input: &'a str, - ) -> StdResult<(&str, FilterCondition), UserError> { - let (input, (key, from, _, to)) = match tuple(( - Self::ws(Self::parse_key), - Self::ws(Self::parse_key), - tag("TO"), - Self::ws(Self::parse_key), - ))(input) - { - Ok((input, (key, from, tag, to))) => (input, (key, from, tag, to)), - Err(_) => return Err(UserError::InvalidFilterAttributeNom), - }; - let fid = match field_id_by_key(self.fields_ids_map, self.filterable_fields, key)? { - Some(fid) => fid, - None => return Err(UserError::InvalidFilterAttributeNom), - }; - let res = Operator(fid, Between(from.parse::()?, to.parse::()?)); - Ok((input, res)) - } - - fn parse_condition(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition> { - let l1 = |c| self.wrap(|c| self.parse_simple_condition(c), c); - let l2 = |c| self.wrap(|c| self.parse_range_condition(c), c); - let (input, condition) = match alt((l1, l2))(input) { - Ok((i, c)) => (i, c), - Err(_) => { - return Err(nom::Err::Error(nom::error::Error::from_error_kind( - "foo", - nom::error::ErrorKind::Fail, - ))) - } - }; - Ok((input, condition)) - } - fn wrap(&'a self, inner: F, input: &'a str) -> IResult<&'a str, FilterCondition> + fn parse_numeric(&'a self, input: &'a str) -> StdResult> where - F: Fn(&'a str) -> StdResult<(&'a str, FilterCondition), E>, + E: ParseError<&'a str>, + T: std::str::FromStr, { - match inner(input) { - Ok(e) => Ok(e), + match input.parse::() { + Ok(n) => Ok(n), Err(_) => { - return Err(nom::Err::Error(nom::error::Error::from_error_kind( - "foo", - nom::error::ErrorKind::Fail, - ))) + return match input.chars().nth(0) { + Some(ch) => Err(nom::Err::Failure(E::from_char(input, ch))), + None => Err(nom::Err::Failure(E::from_error_kind(input, ErrorKind::Eof))), + }; } } } - fn parse_condition_expression(&'a self, input: &'a str) -> IResult<&str, FilterCondition> { + fn parse_numeric_unary_condition( + &'a self, + input: &'a str, + fid: u16, + value: &'a str, + ) -> IResult<&'a str, FilterCondition, E> + where + E: ParseError<&'a str>, + { + let numeric: f64 = self.parse_numeric(value)?; + let k = match input { + ">" => Operator(fid, GreaterThan(numeric)), + "<" => Operator(fid, LowerThan(numeric)), + "<=" => Operator(fid, LowerThanOrEqual(numeric)), + ">=" => Operator(fid, GreaterThanOrEqual(numeric)), + _ => unreachable!(), + }; + Ok((input, k)) + } + + fn parse_fid(&'a self, input: &'a str, key: &'a str) -> StdResult> + where + E: ParseError<&'a str>, + { + let error = match input.chars().nth(0) { + Some(ch) => Err(nom::Err::Failure(E::from_char(input, ch))), + None => Err(nom::Err::Failure(E::from_error_kind(input, ErrorKind::Eof))), + }; + if !self.filterable_fields.contains(key) { + return error; + } + match self.fields_ids_map.id(key) { + Some(fid) => Ok(fid), + None => error, + } + } + + fn parse_range_condition(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> + where + E: ParseError<&'a str>, + { + let (input, (key, from, _, to)) = tuple(( + self.ws(|c| self.parse_key(c)), + self.ws(|c| self.parse_key(c)), + tag("TO"), + self.ws(|c| self.parse_key(c)), + ))(input)?; + + let fid = self.parse_fid(input, key)?; + let numeric_from: f64 = self.parse_numeric(from)?; + let numeric_to: f64 = self.parse_numeric(to)?; + let res = Operator(fid, Between(numeric_from, numeric_to)); + Ok((input, res)) + } + + fn parse_condition(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> + where + E: ParseError<&'a str>, + { + let l1 = |c| self.parse_simple_condition(c); + let l2 = |c| self.parse_range_condition(c); + let (input, condition) = alt((l1, l2))(input)?; + Ok((input, condition)) + } + + fn parse_condition_expression(&'a self, input: &'a str) -> IResult<&str, FilterCondition, E> + where + E: ParseError<&'a str>, + { return alt(( - delimited( - Self::ws(char('(')), - |c| Self::parse_expression(self, c), - Self::ws(char(')')), - ), + delimited(self.ws(char('(')), |c| Self::parse_expression(self, c), self.ws(char(')'))), |c| Self::parse_condition(self, c), ))(input); } - fn parse_key(input: &str) -> IResult<&str, &str> { + fn parse_key(&'a self, input: &'a str) -> IResult<&'a str, &'a str, E> + where + E: ParseError<&'a str>, + { let key = |input| take_while1(Self::is_key_component)(input); alt((key, delimited(char('"'), key, char('"'))))(input) } @@ -220,7 +255,10 @@ impl<'a> ParseContext<'a> { c.is_alphanumeric() || ['_', '-', '.'].contains(&c) } - pub fn parse_expression(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition> { + pub fn parse_expression(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> + where + E: ParseError<&'a str>, + { self.parse_or_nom(input) } } @@ -280,12 +318,9 @@ impl FilterCondition { let filterable_fields = index.filterable_fields(rtxn)?; let ctx = ParseContext { fields_ids_map: &fields_ids_map, filterable_fields: &filterable_fields }; - match ctx.parse_expression(expression) { + match ctx.parse_expression::>(expression) { Ok((_, fc)) => Ok(fc), - Err(e) => { - println!("{:?}", e); - unreachable!() - } + Err(e) => Err(Error::UserError(UserError::InvalidFilterNom { input: e.to_string() })), } } } @@ -812,19 +847,6 @@ impl FilterCondition { } } -fn field_id_by_key( - fields_ids_map: &FieldsIdsMap, - filterable_fields: &HashSet, - key: &str, -) -> StdResult, IError> { - // lexing ensures that we at least have a key - if !filterable_fields.contains(key) { - return StdResult::Err(UserError::InvalidFilterAttributeNom); - } - - Ok(fields_ids_map.id(key)) -} - /// Retrieve the field id base on the pest value. /// /// Returns an error if the given value is not filterable. @@ -879,19 +901,6 @@ fn field_id( Ok(fields_ids_map.id(key.as_str())) } -fn nom_parse(input: &str) -> (StdResult, String) -where - T: FromStr, - T::Err: ToString, -{ - let result = match input.parse::() { - Ok(value) => Ok(value), - Err(e) => Err(UserError::InvalidFilterValue), - }; - - (result, input.to_string()) -} - /// Tries to parse the pest pair into the type `T` specified, always returns /// the original string that we tried to parse. /// From ac1df9d9d7fc15e223aed66e9f53c2184ea442af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E7=80=9A=E9=AA=8B?= Date: Tue, 28 Sep 2021 17:17:52 +0800 Subject: [PATCH 03/15] fix typo and remove pest --- milli/src/search/facet/filter_condition.rs | 307 ++------------------- 1 file changed, 24 insertions(+), 283 deletions(-) diff --git a/milli/src/search/facet/filter_condition.rs b/milli/src/search/facet/filter_condition.rs index e6ed79230..d51ae27cd 100644 --- a/milli/src/search/facet/filter_condition.rs +++ b/milli/src/search/facet/filter_condition.rs @@ -2,15 +2,10 @@ use std::collections::HashSet; use std::fmt::Debug; use std::ops::Bound::{self, Excluded, Included}; use std::result::Result as StdResult; -use std::str::FromStr; use either::Either; use heed::types::DecodeIgnore; -use itertools::Itertools; use log::debug; -use pest::error::{Error as PestError, ErrorVariant}; -use pest::iterators::{Pair, Pairs}; -use pest::Parser; use roaring::RoaringBitmap; use nom::{ @@ -28,7 +23,8 @@ use nom::{ use self::FilterCondition::*; use self::Operator::*; -use super::parser::{FilterParser, Rule, PREC_CLIMBER}; +use super::parser::FilterParser; +use super::parser::{Rule, PREC_CLIMBER}; use super::FacetNumberRange; use crate::error::{Error, UserError}; use crate::heed_codec::facet::{ @@ -145,9 +141,7 @@ impl<'a> ParseContext<'a> { let k = match op { "=" => Operator(fid, Equal(r.ok(), value.to_string().to_lowercase())), "!=" => Operator(fid, NotEqual(r.ok(), value.to_string().to_lowercase())), - ">" | "<" | "<=" | ">=" => { - return self.parse_numeric_unary_condition(input, fid, value) - } + ">" | "<" | "<=" | ">=" => return self.parse_numeric_unary_condition(op, fid, value), _ => unreachable!(), }; Ok((input, k)) @@ -326,92 +320,6 @@ impl FilterCondition { } impl FilterCondition { - pub fn from_array_pest( - rtxn: &heed::RoTxn, - index: &Index, - array: I, - ) -> Result> - where - I: IntoIterator>, - J: IntoIterator, - A: AsRef, - B: AsRef, - { - let mut ands = None; - - for either in array { - match either { - Either::Left(array) => { - let mut ors = None; - for rule in array { - let condition = FilterCondition::from_str(rtxn, index, rule.as_ref())?; - ors = match ors.take() { - Some(ors) => Some(Or(Box::new(ors), Box::new(condition))), - None => Some(condition), - }; - } - - if let Some(rule) = ors { - ands = match ands.take() { - Some(ands) => Some(And(Box::new(ands), Box::new(rule))), - None => Some(rule), - }; - } - } - Either::Right(rule) => { - let condition = FilterCondition::from_str(rtxn, index, rule.as_ref())?; - ands = match ands.take() { - Some(ands) => Some(And(Box::new(ands), Box::new(condition))), - None => Some(condition), - }; - } - } - } - - Ok(ands) - } - - pub fn from_str_pest( - rtxn: &heed::RoTxn, - index: &Index, - expression: &str, - ) -> Result { - let fields_ids_map = index.fields_ids_map(rtxn)?; - let filterable_fields = index.filterable_fields(rtxn)?; - let lexed = - FilterParser::parse(Rule::prgm, expression).map_err(UserError::InvalidFilter)?; - FilterCondition::from_pairs(&fields_ids_map, &filterable_fields, lexed) - } - - fn from_pairs( - fim: &FieldsIdsMap, - ff: &HashSet, - expression: Pairs, - ) -> Result { - PREC_CLIMBER.climb( - expression, - |pair: Pair| match pair.as_rule() { - Rule::greater => Ok(Self::greater_than(fim, ff, pair)?), - Rule::geq => Ok(Self::greater_than_or_equal(fim, ff, pair)?), - Rule::eq => Ok(Self::equal(fim, ff, pair)?), - Rule::neq => Ok(Self::equal(fim, ff, pair)?.negate()), - Rule::leq => Ok(Self::lower_than_or_equal(fim, ff, pair)?), - Rule::less => Ok(Self::lower_than(fim, ff, pair)?), - Rule::between => Ok(Self::between(fim, ff, pair)?), - Rule::geo_radius => Ok(Self::geo_radius(fim, ff, pair)?), - Rule::not => Ok(Self::from_pairs(fim, ff, pair.into_inner())?.negate()), - Rule::prgm => Self::from_pairs(fim, ff, pair.into_inner()), - Rule::term => Self::from_pairs(fim, ff, pair.into_inner()), - _ => unreachable!(), - }, - |lhs: Result, op: Pair, rhs: Result| match op.as_rule() { - Rule::or => Ok(Or(Box::new(lhs?), Box::new(rhs?))), - Rule::and => Ok(And(Box::new(lhs?), Box::new(rhs?))), - _ => unreachable!(), - }, - ) - } - fn negate(self) -> FilterCondition { match self { Operator(fid, op) => match op.negate() { @@ -484,128 +392,6 @@ impl FilterCondition { } Ok(Operator(fid, GeoLowerThan([lat.0, lng.0], distance))) } - - fn between( - fields_ids_map: &FieldsIdsMap, - filterable_fields: &HashSet, - item: Pair, - ) -> Result { - let mut items = item.into_inner(); - let fid = match field_id(fields_ids_map, filterable_fields, &mut items) - .map_err(UserError::InvalidFilterAttribute)? - { - Some(fid) => fid, - None => return Ok(Empty), - }; - - let (lresult, _) = pest_parse(items.next().unwrap()); - let (rresult, _) = pest_parse(items.next().unwrap()); - - let lvalue = lresult.map_err(UserError::InvalidFilter)?; - let rvalue = rresult.map_err(UserError::InvalidFilter)?; - - Ok(Operator(fid, Between(lvalue, rvalue))) - } - - fn equal( - fields_ids_map: &FieldsIdsMap, - filterable_fields: &HashSet, - item: Pair, - ) -> Result { - let mut items = item.into_inner(); - let fid = match field_id(fields_ids_map, filterable_fields, &mut items) - .map_err(UserError::InvalidFilterAttribute)? - { - Some(fid) => fid, - None => return Ok(Empty), - }; - - let value = items.next().unwrap(); - let (result, svalue) = pest_parse(value); - - let svalue = svalue.to_lowercase(); - Ok(Operator(fid, Equal(result.ok(), svalue))) - } - - fn greater_than( - fields_ids_map: &FieldsIdsMap, - filterable_fields: &HashSet, - item: Pair, - ) -> Result { - let mut items = item.into_inner(); - let fid = match field_id(fields_ids_map, filterable_fields, &mut items) - .map_err(UserError::InvalidFilterAttribute)? - { - Some(fid) => fid, - None => return Ok(Empty), - }; - - let value = items.next().unwrap(); - let (result, _svalue) = pest_parse(value); - let value = result.map_err(UserError::InvalidFilter)?; - - Ok(Operator(fid, GreaterThan(value))) - } - - fn greater_than_or_equal( - fields_ids_map: &FieldsIdsMap, - filterable_fields: &HashSet, - item: Pair, - ) -> Result { - let mut items = item.into_inner(); - let fid = match field_id(fields_ids_map, filterable_fields, &mut items) - .map_err(UserError::InvalidFilterAttribute)? - { - Some(fid) => fid, - None => return Ok(Empty), - }; - - let value = items.next().unwrap(); - let (result, _svalue) = pest_parse(value); - let value = result.map_err(UserError::InvalidFilter)?; - - Ok(Operator(fid, GreaterThanOrEqual(value))) - } - - fn lower_than( - fields_ids_map: &FieldsIdsMap, - filterable_fields: &HashSet, - item: Pair, - ) -> Result { - let mut items = item.into_inner(); - let fid = match field_id(fields_ids_map, filterable_fields, &mut items) - .map_err(UserError::InvalidFilterAttribute)? - { - Some(fid) => fid, - None => return Ok(Empty), - }; - - let value = items.next().unwrap(); - let (result, _svalue) = pest_parse(value); - let value = result.map_err(UserError::InvalidFilter)?; - - Ok(Operator(fid, LowerThan(value))) - } - - fn lower_than_or_equal( - fields_ids_map: &FieldsIdsMap, - filterable_fields: &HashSet, - item: Pair, - ) -> Result { - let mut items = item.into_inner(); - let fid = match field_id(fields_ids_map, filterable_fields, &mut items) - .map_err(UserError::InvalidFilterAttribute)? - { - Some(fid) => fid, - None => return Ok(Empty), - }; - - let value = items.next().unwrap(); - let (result, _svalue) = pest_parse(value); - let value = result.map_err(UserError::InvalidFilter)?; - - Ok(Operator(fid, LowerThanOrEqual(value))) - } } impl FilterCondition { @@ -855,72 +641,6 @@ impl FilterCondition { /// /// The pest pair is simply a string associated with a span, a location to highlight in /// the error message. -fn field_id( - fields_ids_map: &FieldsIdsMap, - filterable_fields: &HashSet, - items: &mut Pairs, -) -> StdResult, PestError> { - // lexing ensures that we at least have a key - let key = items.next().unwrap(); - if key.as_rule() == Rule::reserved { - let message = match key.as_str() { - key if key.starts_with("_geoPoint") => { - format!( - "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. \ - Use the `_geoRadius(latitude, longitude, distance)` built-in rule to filter on `_geo` field coordinates.", - ) - } - key @ "_geo" => { - format!( - "`{}` is a reserved keyword and thus can't be used as a filter expression. \ - Use the `_geoRadius(latitude, longitude, distance)` built-in rule to filter on `_geo` field coordinates.", - key - ) - } - key => format!( - "`{}` is a reserved keyword and thus can't be used as a filter expression.", - key - ), - }; - return Err(PestError::new_from_span(ErrorVariant::CustomError { message }, key.as_span())); - } - - if !filterable_fields.contains(key.as_str()) { - return Err(PestError::new_from_span( - ErrorVariant::CustomError { - message: format!( - "attribute `{}` is not filterable, available filterable attributes are: {}.", - key.as_str(), - filterable_fields.iter().join(", "), - ), - }, - key.as_span(), - )); - } - - Ok(fields_ids_map.id(key.as_str())) -} - -/// Tries to parse the pest pair into the type `T` specified, always returns -/// the original string that we tried to parse. -/// -/// Returns the parsing error associated with the span if the conversion fails. -fn pest_parse(pair: Pair) -> (StdResult>, String) -where - T: FromStr, - T::Err: ToString, -{ - let result = match pair.as_str().parse::() { - Ok(value) => Ok(value), - Err(e) => Err(PestError::::new_from_span( - ErrorVariant::CustomError { message: e.to_string() }, - pair.as_span(), - )), - }; - - (result, pair.as_str().to_string()) -} - #[cfg(test)] mod tests { use big_s::S; @@ -991,6 +711,27 @@ mod tests { assert_eq!(condition, expected); } + #[test] + fn compare() { + let path = tempfile::tempdir().unwrap(); + let mut options = EnvOpenOptions::new(); + options.map_size(10 * 1024 * 1024); // 10 MB + let index = Index::new(options, &path).unwrap(); + + let mut wtxn = index.write_txn().unwrap(); + let mut builder = Settings::new(&mut wtxn, &index, 0); + builder.set_searchable_fields(vec![S("channel"), S("timestamp")]); // to keep the fields order + builder.set_filterable_fields(hashset! { S("channel"), S("timestamp") }); + builder.execute(|_, _| ()).unwrap(); + wtxn.commit().unwrap(); + + let rtxn = index.read_txn().unwrap(); + let condition = FilterCondition::from_str(&rtxn, &index, "channel < 20").unwrap(); + let expected = Operator(0, LowerThan(20.0)); + + assert_eq!(condition, expected); + } + #[test] fn parentheses() { let path = tempfile::tempdir().unwrap(); From f7796edc7e6a814680b658701ac6331aee85cf42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E7=80=9A=E9=AA=8B?= Date: Tue, 28 Sep 2021 17:23:49 +0800 Subject: [PATCH 04/15] remove everything about pest --- milli/Cargo.toml | 4 +--- milli/src/error.rs | 12 +++++++----- milli/src/lib.rs | 3 --- milli/src/search/facet/filter_condition.rs | 3 +-- milli/src/search/facet/mod.rs | 2 -- milli/src/search/facet/parser.rs | 12 ------------ milli/src/search/mod.rs | 1 - 7 files changed, 9 insertions(+), 28 deletions(-) delete mode 100644 milli/src/search/facet/parser.rs diff --git a/milli/Cargo.toml b/milli/Cargo.toml index 50ada6bfb..007d9d415 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -24,7 +24,6 @@ levenshtein_automata = { version = "0.2.0", features = ["fst_automaton"] } linked-hash-map = "0.5.4" meilisearch-tokenizer = { git = "https://github.com/meilisearch/tokenizer.git", tag = "v0.2.5" } memmap = "0.7.0" -nom = "7" obkv = "0.2.0" once_cell = "1.5.2" ordered-float = "2.1.1" @@ -40,8 +39,7 @@ tempfile = "3.2.0" uuid = { version = "0.8.2", features = ["v4"] } # facet filter parser -pest = { git = "https://github.com/pest-parser/pest.git", rev = "51fd1d49f1041f7839975664ef71fe15c7dcaf67" } -pest_derive = "2.1.0" +nom = "7" # documents words self-join itertools = "0.10.0" diff --git a/milli/src/error.rs b/milli/src/error.rs index a798539cd..b80238468 100644 --- a/milli/src/error.rs +++ b/milli/src/error.rs @@ -7,7 +7,6 @@ use heed::{Error as HeedError, MdbError}; use rayon::ThreadPoolBuildError; use serde_json::{Map, Value}; -use crate::search::ParserRule; use crate::{CriterionError, DocumentId, FieldId, SortError}; pub type Object = Map; @@ -59,8 +58,6 @@ pub enum UserError { DocumentLimitReached, InvalidDocumentId { document_id: Value }, InvalidFacetsDistribution { invalid_facets_name: HashSet }, - InvalidFilter(pest::error::Error), - InvalidFilterAttribute(pest::error::Error), InvalidGeoField { document_id: Value, object: Value }, InvalidFilterAttributeNom, InvalidFilterValue, @@ -226,12 +223,15 @@ impl fmt::Display for UserError { name_list ) } - Self::InvalidFilter(error) => error.fmt(f), Self::InvalidGeoField { document_id, object } => write!( f, "the document with the id: {} contains an invalid _geo field: {}", document_id, object ), + Self::InvalidAscDescSyntax { name } => { + write!(f, "invalid asc/desc syntax for {}", name) + } + Self::InvalidCriterionName { name } => write!(f, "invalid criterion {}", name), Self::InvalidDocumentId { document_id } => { let json = serde_json::to_string(document_id).unwrap(); write!( @@ -242,7 +242,9 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco json ) } - Self::InvalidFilterAttribute(error) => error.fmt(f), + Self::InvalidSortName { name } => { + write!(f, "Invalid syntax for the sort parameter: {}", name) + } Self::InvalidSortableAttribute { field, valid_fields } => { let valid_names = valid_fields.iter().map(AsRef::as_ref).collect::>().join(", "); diff --git a/milli/src/lib.rs b/milli/src/lib.rs index 838817d98..33b3d9c5c 100644 --- a/milli/src/lib.rs +++ b/milli/src/lib.rs @@ -1,6 +1,3 @@ -#[macro_use] -extern crate pest_derive; - #[macro_use] pub mod documents; diff --git a/milli/src/search/facet/filter_condition.rs b/milli/src/search/facet/filter_condition.rs index d51ae27cd..e39687117 100644 --- a/milli/src/search/facet/filter_condition.rs +++ b/milli/src/search/facet/filter_condition.rs @@ -23,8 +23,7 @@ use nom::{ use self::FilterCondition::*; use self::Operator::*; -use super::parser::FilterParser; -use super::parser::{Rule, PREC_CLIMBER}; + use super::FacetNumberRange; use crate::error::{Error, UserError}; use crate::heed_codec::facet::{ diff --git a/milli/src/search/facet/mod.rs b/milli/src/search/facet/mod.rs index ddf710e32..a5c041dd5 100644 --- a/milli/src/search/facet/mod.rs +++ b/milli/src/search/facet/mod.rs @@ -2,10 +2,8 @@ pub use self::facet_distribution::FacetDistribution; pub use self::facet_number::{FacetNumberIter, FacetNumberRange, FacetNumberRevRange}; pub use self::facet_string::FacetStringIter; pub use self::filter_condition::{FilterCondition, Operator}; -pub(crate) use self::parser::Rule as ParserRule; mod facet_distribution; mod facet_number; mod facet_string; mod filter_condition; -mod parser; diff --git a/milli/src/search/facet/parser.rs b/milli/src/search/facet/parser.rs deleted file mode 100644 index 1bff27cfb..000000000 --- a/milli/src/search/facet/parser.rs +++ /dev/null @@ -1,12 +0,0 @@ -use once_cell::sync::Lazy; -use pest::prec_climber::{Assoc, Operator, PrecClimber}; - -pub static PREC_CLIMBER: Lazy> = Lazy::new(|| { - use Assoc::*; - use Rule::*; - pest::prec_climber::PrecClimber::new(vec![Operator::new(or, Left), Operator::new(and, Left)]) -}); - -#[derive(Parser)] -#[grammar = "search/facet/grammar.pest"] -pub struct FilterParser; diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index bec059d46..85d5dc8a7 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -14,7 +14,6 @@ use meilisearch_tokenizer::{Analyzer, AnalyzerConfig}; use once_cell::sync::Lazy; use roaring::bitmap::RoaringBitmap; -pub(crate) use self::facet::ParserRule; pub use self::facet::{FacetDistribution, FacetNumberIter, FilterCondition, Operator}; pub use self::matching_words::MatchingWords; use self::query_tree::QueryTreeBuilder; From 7a90a101ee792af7df908cb2ef5e514a253096f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E7=80=9A=E9=AA=8B?= Date: Tue, 28 Sep 2021 17:50:15 +0800 Subject: [PATCH 05/15] reorganize parser logic --- milli/src/search/facet/filter_condition.rs | 617 +-------------------- milli/src/search/facet/filter_parser.rs | 500 +++++++++++++++++ milli/src/search/facet/mod.rs | 3 +- milli/src/search/mod.rs | 2 +- 4 files changed, 518 insertions(+), 604 deletions(-) create mode 100644 milli/src/search/facet/filter_parser.rs diff --git a/milli/src/search/facet/filter_condition.rs b/milli/src/search/facet/filter_condition.rs index e39687117..c728e0acd 100644 --- a/milli/src/search/facet/filter_condition.rs +++ b/milli/src/search/facet/filter_condition.rs @@ -1,29 +1,16 @@ use std::collections::HashSet; use std::fmt::Debug; use std::ops::Bound::{self, Excluded, Included}; -use std::result::Result as StdResult; use either::Either; use heed::types::DecodeIgnore; use log::debug; +use nom::error::VerboseError; use roaring::RoaringBitmap; -use nom::{ - branch::alt, - bytes::complete::{tag, take_while1}, - character::complete::{char, multispace0}, - combinator::map, - error::ErrorKind, - error::ParseError, - error::VerboseError, - multi::many0, - sequence::{delimited, preceded, tuple}, - IResult, -}; - use self::FilterCondition::*; -use self::Operator::*; +use super::filter_parser::{Operator, ParseContext}; use super::FacetNumberRange; use crate::error::{Error, UserError}; use crate::heed_codec::facet::{ @@ -33,37 +20,6 @@ use crate::{ distance_between_two_points, CboRoaringBitmapCodec, FieldId, FieldsIdsMap, Index, Result, }; -#[derive(Debug, Clone, PartialEq)] -pub enum Operator { - GreaterThan(f64), - GreaterThanOrEqual(f64), - Equal(Option, String), - NotEqual(Option, String), - LowerThan(f64), - LowerThanOrEqual(f64), - Between(f64, f64), - GeoLowerThan([f64; 2], f64), - GeoGreaterThan([f64; 2], f64), -} - -impl Operator { - /// This method can return two operations in case it must express - /// an OR operation for the between case (i.e. `TO`). - fn negate(self) -> (Self, Option) { - match self { - GreaterThan(n) => (LowerThanOrEqual(n), None), - GreaterThanOrEqual(n) => (LowerThan(n), None), - Equal(n, s) => (NotEqual(n, s), None), - NotEqual(n, s) => (Equal(n, s), None), - LowerThan(n) => (GreaterThanOrEqual(n), None), - LowerThanOrEqual(n) => (GreaterThan(n), None), - Between(n, m) => (LowerThan(n), Some(GreaterThan(m))), - GeoLowerThan(point, distance) => (GeoGreaterThan(point, distance), None), - GeoGreaterThan(point, distance) => (GeoLowerThan(point, distance), None), - } - } -} - #[derive(Debug, Clone, PartialEq)] pub enum FilterCondition { Operator(FieldId, Operator), @@ -72,190 +28,8 @@ pub enum FilterCondition { Empty, } -struct ParseContext<'a> { - fields_ids_map: &'a FieldsIdsMap, - filterable_fields: &'a HashSet, -} // impl From -impl<'a> ParseContext<'a> { - fn parse_or_nom(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> - where - E: ParseError<&'a str>, - { - let (input, lhs) = self.parse_and_nom(input)?; - let (input, ors) = many0(preceded(tag("OR"), |c| Self::parse_or_nom(self, c)))(input)?; - let expr = ors - .into_iter() - .fold(lhs, |acc, branch| FilterCondition::Or(Box::new(acc), Box::new(branch))); - Ok((input, expr)) - } - - fn parse_and_nom(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> - where - E: ParseError<&'a str>, - { - let (input, lhs) = self.parse_not_nom(input)?; - let (input, ors) = many0(preceded(tag("AND"), |c| Self::parse_and_nom(self, c)))(input)?; - let expr = ors - .into_iter() - .fold(lhs, |acc, branch| FilterCondition::And(Box::new(acc), Box::new(branch))); - Ok((input, expr)) - } - - fn parse_not_nom(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> - where - E: ParseError<&'a str>, - { - alt(( - map( - preceded(alt((self.ws(tag("!")), self.ws(tag("NOT")))), |c| { - Self::parse_condition_expression(self, c) - }), - |e| e.negate(), - ), - |c| Self::parse_condition_expression(self, c), - ))(input) - } - - fn ws(&'a self, inner: F) -> impl FnMut(&'a str) -> IResult<&'a str, O, E> - where - F: Fn(&'a str) -> IResult<&'a str, O, E>, - E: ParseError<&'a str>, - { - delimited(multispace0, inner, multispace0) - } - - fn parse_simple_condition(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> - where - E: ParseError<&'a str>, - { - let operator = alt((tag(">"), tag(">="), tag("="), tag("<"), tag("!="), tag("<="))); - let (input, (key, op, value)) = - tuple((self.ws(|c| self.parse_key(c)), operator, self.ws(|c| self.parse_key(c))))( - input, - )?; - let fid = self.parse_fid(input, key)?; - let r: StdResult>> = self.parse_numeric(value); - let k = match op { - "=" => Operator(fid, Equal(r.ok(), value.to_string().to_lowercase())), - "!=" => Operator(fid, NotEqual(r.ok(), value.to_string().to_lowercase())), - ">" | "<" | "<=" | ">=" => return self.parse_numeric_unary_condition(op, fid, value), - _ => unreachable!(), - }; - Ok((input, k)) - } - - fn parse_numeric(&'a self, input: &'a str) -> StdResult> - where - E: ParseError<&'a str>, - T: std::str::FromStr, - { - match input.parse::() { - Ok(n) => Ok(n), - Err(_) => { - return match input.chars().nth(0) { - Some(ch) => Err(nom::Err::Failure(E::from_char(input, ch))), - None => Err(nom::Err::Failure(E::from_error_kind(input, ErrorKind::Eof))), - }; - } - } - } - - fn parse_numeric_unary_condition( - &'a self, - input: &'a str, - fid: u16, - value: &'a str, - ) -> IResult<&'a str, FilterCondition, E> - where - E: ParseError<&'a str>, - { - let numeric: f64 = self.parse_numeric(value)?; - let k = match input { - ">" => Operator(fid, GreaterThan(numeric)), - "<" => Operator(fid, LowerThan(numeric)), - "<=" => Operator(fid, LowerThanOrEqual(numeric)), - ">=" => Operator(fid, GreaterThanOrEqual(numeric)), - _ => unreachable!(), - }; - Ok((input, k)) - } - - fn parse_fid(&'a self, input: &'a str, key: &'a str) -> StdResult> - where - E: ParseError<&'a str>, - { - let error = match input.chars().nth(0) { - Some(ch) => Err(nom::Err::Failure(E::from_char(input, ch))), - None => Err(nom::Err::Failure(E::from_error_kind(input, ErrorKind::Eof))), - }; - if !self.filterable_fields.contains(key) { - return error; - } - match self.fields_ids_map.id(key) { - Some(fid) => Ok(fid), - None => error, - } - } - - fn parse_range_condition(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> - where - E: ParseError<&'a str>, - { - let (input, (key, from, _, to)) = tuple(( - self.ws(|c| self.parse_key(c)), - self.ws(|c| self.parse_key(c)), - tag("TO"), - self.ws(|c| self.parse_key(c)), - ))(input)?; - - let fid = self.parse_fid(input, key)?; - let numeric_from: f64 = self.parse_numeric(from)?; - let numeric_to: f64 = self.parse_numeric(to)?; - let res = Operator(fid, Between(numeric_from, numeric_to)); - Ok((input, res)) - } - - fn parse_condition(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> - where - E: ParseError<&'a str>, - { - let l1 = |c| self.parse_simple_condition(c); - let l2 = |c| self.parse_range_condition(c); - let (input, condition) = alt((l1, l2))(input)?; - Ok((input, condition)) - } - - fn parse_condition_expression(&'a self, input: &'a str) -> IResult<&str, FilterCondition, E> - where - E: ParseError<&'a str>, - { - return alt(( - delimited(self.ws(char('(')), |c| Self::parse_expression(self, c), self.ws(char(')'))), - |c| Self::parse_condition(self, c), - ))(input); - } - - fn parse_key(&'a self, input: &'a str) -> IResult<&'a str, &'a str, E> - where - E: ParseError<&'a str>, - { - let key = |input| take_while1(Self::is_key_component)(input); - alt((key, delimited(char('"'), key, char('"'))))(input) - } - fn is_key_component(c: char) -> bool { - c.is_alphanumeric() || ['_', '-', '.'].contains(&c) - } - - pub fn parse_expression(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> - where - E: ParseError<&'a str>, - { - self.parse_or_nom(input) - } -} - //for nom impl FilterCondition { pub fn from_array( @@ -269,7 +43,7 @@ impl FilterCondition { A: AsRef, B: AsRef, { - let mut ands = None; + let mut ands: Option = None; for either in array { match either { @@ -316,10 +90,7 @@ impl FilterCondition { Err(e) => Err(Error::UserError(UserError::InvalidFilterNom { input: e.to_string() })), } } -} - -impl FilterCondition { - fn negate(self) -> FilterCondition { + pub fn negate(self) -> FilterCondition { match self { Operator(fid, op) => match op.negate() { (op, None) => Operator(fid, op), @@ -389,7 +160,7 @@ impl FilterCondition { lng.1.clone(), )))?; } - Ok(Operator(fid, GeoLowerThan([lat.0, lng.0], distance))) + Ok(Operator(fid, Operator::GeoLowerThan([lat.0, lng.0], distance))) } } @@ -514,9 +285,9 @@ impl FilterCondition { // as the facets values are all in the same database and prefixed by the // field id and the level. let (left, right) = match operator { - GreaterThan(val) => (Excluded(*val), Included(f64::MAX)), - GreaterThanOrEqual(val) => (Included(*val), Included(f64::MAX)), - Equal(number, string) => { + Operator::GreaterThan(val) => (Excluded(*val), Included(f64::MAX)), + Operator::GreaterThanOrEqual(val) => (Included(*val), Included(f64::MAX)), + Operator::Equal(number, string) => { let (_original_value, string_docids) = strings_db.get(rtxn, &(field_id, &string))?.unwrap_or_default(); let number_docids = match number { @@ -538,23 +309,23 @@ impl FilterCondition { }; return Ok(string_docids | number_docids); } - NotEqual(number, string) => { + Operator::NotEqual(number, string) => { let all_numbers_ids = if number.is_some() { index.number_faceted_documents_ids(rtxn, field_id)? } else { RoaringBitmap::new() }; let all_strings_ids = index.string_faceted_documents_ids(rtxn, field_id)?; - let operator = Equal(*number, string.clone()); + let operator = Operator::Equal(*number, string.clone()); let docids = Self::evaluate_operator( rtxn, index, numbers_db, strings_db, field_id, &operator, )?; return Ok((all_numbers_ids | all_strings_ids) - docids); } - LowerThan(val) => (Included(f64::MIN), Excluded(*val)), - LowerThanOrEqual(val) => (Included(f64::MIN), Included(*val)), - Between(left, right) => (Included(*left), Included(*right)), - GeoLowerThan(base_point, distance) => { + Operator::LowerThan(val) => (Included(f64::MIN), Excluded(*val)), + Operator::LowerThanOrEqual(val) => (Included(f64::MIN), Included(*val)), + Operator::Between(left, right) => (Included(*left), Included(*right)), + Operator::GeoLowerThan(base_point, distance) => { let rtree = match index.geo_rtree(rtxn)? { Some(rtree) => rtree, None => return Ok(RoaringBitmap::new()), @@ -570,7 +341,7 @@ impl FilterCondition { return Ok(result); } - GeoGreaterThan(point, distance) => { + Operator::GeoGreaterThan(point, distance) => { let result = Self::evaluate_operator( rtxn, index, @@ -631,361 +402,3 @@ impl FilterCondition { } } } - -/// Retrieve the field id base on the pest value. -/// -/// Returns an error if the given value is not filterable. -/// -/// Returns Ok(None) if the given value is filterable, but is not yet ascociated to a field_id. -/// -/// The pest pair is simply a string associated with a span, a location to highlight in -/// the error message. -#[cfg(test)] -mod tests { - use big_s::S; - use heed::EnvOpenOptions; - use maplit::hashset; - - use super::*; - use crate::update::Settings; - - #[test] - fn string() { - let path = tempfile::tempdir().unwrap(); - let mut options = EnvOpenOptions::new(); - options.map_size(10 * 1024 * 1024); // 10 MB - let index = Index::new(options, &path).unwrap(); - - // Set the filterable fields to be the channel. - let mut wtxn = index.write_txn().unwrap(); - let mut map = index.fields_ids_map(&wtxn).unwrap(); - map.insert("channel"); - index.put_fields_ids_map(&mut wtxn, &map).unwrap(); - let mut builder = Settings::new(&mut wtxn, &index, 0); - builder.set_filterable_fields(hashset! { S("channel") }); - builder.execute(|_, _| ()).unwrap(); - wtxn.commit().unwrap(); - - // Test that the facet condition is correctly generated. - let rtxn = index.read_txn().unwrap(); - let condition = FilterCondition::from_str(&rtxn, &index, "channel = Ponce").unwrap(); - let expected = Operator(0, Operator::Equal(None, S("ponce"))); - assert_eq!(condition, expected); - - let condition = FilterCondition::from_str(&rtxn, &index, "channel != ponce").unwrap(); - let expected = Operator(0, Operator::NotEqual(None, S("ponce"))); - assert_eq!(condition, expected); - - let condition = FilterCondition::from_str(&rtxn, &index, "NOT channel = ponce").unwrap(); - let expected = Operator(0, Operator::NotEqual(None, S("ponce"))); - assert_eq!(condition, expected); - } - - #[test] - fn number() { - let path = tempfile::tempdir().unwrap(); - let mut options = EnvOpenOptions::new(); - options.map_size(10 * 1024 * 1024); // 10 MB - let index = Index::new(options, &path).unwrap(); - - // Set the filterable fields to be the channel. - let mut wtxn = index.write_txn().unwrap(); - let mut map = index.fields_ids_map(&wtxn).unwrap(); - map.insert("timestamp"); - index.put_fields_ids_map(&mut wtxn, &map).unwrap(); - let mut builder = Settings::new(&mut wtxn, &index, 0); - builder.set_filterable_fields(hashset! { "timestamp".into() }); - builder.execute(|_, _| ()).unwrap(); - wtxn.commit().unwrap(); - - // Test that the facet condition is correctly generated. - let rtxn = index.read_txn().unwrap(); - let condition = FilterCondition::from_str(&rtxn, &index, "timestamp 22 TO 44").unwrap(); - let expected = Operator(0, Between(22.0, 44.0)); - assert_eq!(condition, expected); - - let condition = FilterCondition::from_str(&rtxn, &index, "NOT timestamp 22 TO 44").unwrap(); - let expected = - Or(Box::new(Operator(0, LowerThan(22.0))), Box::new(Operator(0, GreaterThan(44.0)))); - assert_eq!(condition, expected); - } - - #[test] - fn compare() { - let path = tempfile::tempdir().unwrap(); - let mut options = EnvOpenOptions::new(); - options.map_size(10 * 1024 * 1024); // 10 MB - let index = Index::new(options, &path).unwrap(); - - let mut wtxn = index.write_txn().unwrap(); - let mut builder = Settings::new(&mut wtxn, &index, 0); - builder.set_searchable_fields(vec![S("channel"), S("timestamp")]); // to keep the fields order - builder.set_filterable_fields(hashset! { S("channel"), S("timestamp") }); - builder.execute(|_, _| ()).unwrap(); - wtxn.commit().unwrap(); - - let rtxn = index.read_txn().unwrap(); - let condition = FilterCondition::from_str(&rtxn, &index, "channel < 20").unwrap(); - let expected = Operator(0, LowerThan(20.0)); - - assert_eq!(condition, expected); - } - - #[test] - fn parentheses() { - let path = tempfile::tempdir().unwrap(); - let mut options = EnvOpenOptions::new(); - options.map_size(10 * 1024 * 1024); // 10 MB - let index = Index::new(options, &path).unwrap(); - - // Set the filterable fields to be the channel. - let mut wtxn = index.write_txn().unwrap(); - let mut builder = Settings::new(&mut wtxn, &index, 0); - builder.set_searchable_fields(vec![S("channel"), S("timestamp")]); // to keep the fields order - builder.set_filterable_fields(hashset! { S("channel"), S("timestamp") }); - builder.execute(|_, _| ()).unwrap(); - wtxn.commit().unwrap(); - - // Test that the facet condition is correctly generated. - let rtxn = index.read_txn().unwrap(); - let condition = FilterCondition::from_str( - &rtxn, - &index, - "channel = gotaga OR (timestamp 22 TO 44 AND channel != ponce)", - ) - .unwrap(); - let expected = Or( - Box::new(Operator(0, Operator::Equal(None, S("gotaga")))), - Box::new(And( - Box::new(Operator(1, Between(22.0, 44.0))), - Box::new(Operator(0, Operator::NotEqual(None, S("ponce")))), - )), - ); - assert_eq!(condition, expected); - - let condition = FilterCondition::from_str( - &rtxn, - &index, - "channel = gotaga OR NOT (timestamp 22 TO 44 AND channel != ponce)", - ) - .unwrap(); - let expected = Or( - Box::new(Operator(0, Operator::Equal(None, S("gotaga")))), - Box::new(Or( - Box::new(Or( - Box::new(Operator(1, LowerThan(22.0))), - Box::new(Operator(1, GreaterThan(44.0))), - )), - Box::new(Operator(0, Operator::Equal(None, S("ponce")))), - )), - ); - assert_eq!(condition, expected); - } - - #[test] - fn reserved_field_names() { - let path = tempfile::tempdir().unwrap(); - let mut options = EnvOpenOptions::new(); - options.map_size(10 * 1024 * 1024); // 10 MB - let index = Index::new(options, &path).unwrap(); - let rtxn = index.read_txn().unwrap(); - - let error = FilterCondition::from_str(&rtxn, &index, "_geo = 12").unwrap_err(); - assert!(error - .to_string() - .contains("`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` built-in rule to filter on `_geo` field coordinates."), - "{}", - error.to_string() - ); - - let error = - FilterCondition::from_str(&rtxn, &index, r#"_geoDistance <= 1000"#).unwrap_err(); - assert!(error - .to_string() - .contains("`_geoDistance` is a reserved keyword and thus can't be used as a filter expression."), - "{}", - error.to_string() - ); - - let error = FilterCondition::from_str(&rtxn, &index, r#"_geoPoint > 5"#).unwrap_err(); - assert!(error - .to_string() - .contains("`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` built-in rule to filter on `_geo` field coordinates."), - "{}", - error.to_string() - ); - - let error = - FilterCondition::from_str(&rtxn, &index, r#"_geoPoint(12, 16) > 5"#).unwrap_err(); - assert!(error - .to_string() - .contains("`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` built-in rule to filter on `_geo` field coordinates."), - "{}", - error.to_string() - ); - } - - #[test] - fn geo_radius() { - let path = tempfile::tempdir().unwrap(); - let mut options = EnvOpenOptions::new(); - options.map_size(10 * 1024 * 1024); // 10 MB - let index = Index::new(options, &path).unwrap(); - - // Set the filterable fields to be the channel. - let mut wtxn = index.write_txn().unwrap(); - let mut builder = Settings::new(&mut wtxn, &index, 0); - builder.set_searchable_fields(vec![S("_geo"), S("price")]); // to keep the fields order - builder.execute(|_, _| ()).unwrap(); - wtxn.commit().unwrap(); - - let rtxn = index.read_txn().unwrap(); - // _geo is not filterable - let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius(12, 12, 10)"); - assert!(result.is_err()); - let error = result.unwrap_err(); - assert!(error - .to_string() - .contains("attribute `_geo` is not filterable, available filterable attributes are:"),); - - let mut wtxn = index.write_txn().unwrap(); - let mut builder = Settings::new(&mut wtxn, &index, 0); - builder.set_filterable_fields(hashset! { S("_geo"), S("price") }); - builder.execute(|_, _| ()).unwrap(); - wtxn.commit().unwrap(); - - let rtxn = index.read_txn().unwrap(); - // basic test - let condition = - FilterCondition::from_str(&rtxn, &index, "_geoRadius(12, 13.0005, 2000)").unwrap(); - let expected = Operator(0, GeoLowerThan([12., 13.0005], 2000.)); - assert_eq!(condition, expected); - - // basic test with latitude and longitude at the max angle - let condition = - FilterCondition::from_str(&rtxn, &index, "_geoRadius(90, 180, 2000)").unwrap(); - let expected = Operator(0, GeoLowerThan([90., 180.], 2000.)); - assert_eq!(condition, expected); - - // basic test with latitude and longitude at the min angle - let condition = - FilterCondition::from_str(&rtxn, &index, "_geoRadius(-90, -180, 2000)").unwrap(); - let expected = Operator(0, GeoLowerThan([-90., -180.], 2000.)); - assert_eq!(condition, expected); - - // test the negation of the GeoLowerThan - let condition = - FilterCondition::from_str(&rtxn, &index, "NOT _geoRadius(50, 18, 2000.500)").unwrap(); - let expected = Operator(0, GeoGreaterThan([50., 18.], 2000.500)); - assert_eq!(condition, expected); - - // composition of multiple operations - let condition = FilterCondition::from_str( - &rtxn, - &index, - "(NOT _geoRadius(1, 2, 300) AND _geoRadius(1.001, 2.002, 1000.300)) OR price <= 10", - ) - .unwrap(); - let expected = Or( - Box::new(And( - Box::new(Operator(0, GeoGreaterThan([1., 2.], 300.))), - Box::new(Operator(0, GeoLowerThan([1.001, 2.002], 1000.300))), - )), - Box::new(Operator(1, LowerThanOrEqual(10.))), - ); - assert_eq!(condition, expected); - - // georadius don't have any parameters - let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius"); - assert!(result.is_err()); - let error = result.unwrap_err(); - assert!(error.to_string().contains("The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`")); - - // georadius don't have any parameters - let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius()"); - assert!(result.is_err()); - let error = result.unwrap_err(); - assert!(error.to_string().contains("The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`")); - - // georadius don't have enough parameters - let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius(1, 2)"); - assert!(result.is_err()); - let error = result.unwrap_err(); - assert!(error.to_string().contains("The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`")); - - // georadius have too many parameters - let result = - FilterCondition::from_str(&rtxn, &index, "_geoRadius(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)"); - assert!(result.is_err()); - let error = result.unwrap_err(); - assert!(error.to_string().contains("The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`")); - - // georadius have a bad latitude - let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius(-100, 150, 10)"); - assert!(result.is_err()); - let error = result.unwrap_err(); - assert!(error - .to_string() - .contains("Latitude must be contained between -90 and 90 degrees.")); - - // georadius have a bad latitude - let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius(-90.0000001, 150, 10)"); - assert!(result.is_err()); - let error = result.unwrap_err(); - assert!(error - .to_string() - .contains("Latitude must be contained between -90 and 90 degrees.")); - - // georadius have a bad longitude - let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius(-10, 250, 10)"); - assert!(result.is_err()); - let error = result.unwrap_err(); - assert!(error - .to_string() - .contains("Longitude must be contained between -180 and 180 degrees.")); - - // georadius have a bad longitude - let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius(-10, 180.000001, 10)"); - assert!(result.is_err()); - let error = result.unwrap_err(); - assert!(error - .to_string() - .contains("Longitude must be contained between -180 and 180 degrees.")); - } - - #[test] - fn from_array() { - let path = tempfile::tempdir().unwrap(); - let mut options = EnvOpenOptions::new(); - options.map_size(10 * 1024 * 1024); // 10 MB - let index = Index::new(options, &path).unwrap(); - - // Set the filterable fields to be the channel. - let mut wtxn = index.write_txn().unwrap(); - let mut builder = Settings::new(&mut wtxn, &index, 0); - builder.set_searchable_fields(vec![S("channel"), S("timestamp")]); // to keep the fields order - builder.set_filterable_fields(hashset! { S("channel"), S("timestamp") }); - builder.execute(|_, _| ()).unwrap(); - wtxn.commit().unwrap(); - - // Test that the facet condition is correctly generated. - let rtxn = index.read_txn().unwrap(); - let condition = FilterCondition::from_array( - &rtxn, - &index, - vec![ - Either::Right("channel = gotaga"), - Either::Left(vec!["timestamp = 44", "channel != ponce"]), - ], - ) - .unwrap() - .unwrap(); - let expected = FilterCondition::from_str( - &rtxn, - &index, - "channel = gotaga AND (timestamp = 44 OR channel != ponce)", - ) - .unwrap(); - assert_eq!(condition, expected); - } -} diff --git a/milli/src/search/facet/filter_parser.rs b/milli/src/search/facet/filter_parser.rs new file mode 100644 index 000000000..53a51ca49 --- /dev/null +++ b/milli/src/search/facet/filter_parser.rs @@ -0,0 +1,500 @@ +use std::collections::HashSet; +use std::fmt::Debug; +use std::result::Result as StdResult; + +use super::FilterCondition; +use crate::{FieldId, FieldsIdsMap}; +use nom::{ + branch::alt, + bytes::complete::{tag, take_while1}, + character::complete::{char, multispace0}, + combinator::map, + error::ErrorKind, + error::ParseError, + error::VerboseError, + multi::many0, + sequence::{delimited, preceded, tuple}, + IResult, +}; + +use self::Operator::*; +#[derive(Debug, Clone, PartialEq)] +pub enum Operator { + GreaterThan(f64), + GreaterThanOrEqual(f64), + Equal(Option, String), + NotEqual(Option, String), + LowerThan(f64), + LowerThanOrEqual(f64), + Between(f64, f64), + GeoLowerThan([f64; 2], f64), + GeoGreaterThan([f64; 2], f64), +} + +impl Operator { + /// This method can return two operations in case it must express + /// an OR operation for the between case (i.e. `TO`). + pub fn negate(self) -> (Self, Option) { + match self { + GreaterThan(n) => (LowerThanOrEqual(n), None), + GreaterThanOrEqual(n) => (LowerThan(n), None), + Equal(n, s) => (NotEqual(n, s), None), + NotEqual(n, s) => (Equal(n, s), None), + LowerThan(n) => (GreaterThanOrEqual(n), None), + LowerThanOrEqual(n) => (GreaterThan(n), None), + Between(n, m) => (LowerThan(n), Some(GreaterThan(m))), + } + } +} + +pub struct ParseContext<'a> { + pub fields_ids_map: &'a FieldsIdsMap, + pub filterable_fields: &'a HashSet, +} + +impl<'a> ParseContext<'a> { + fn parse_or_nom(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> + where + E: ParseError<&'a str>, + { + let (input, lhs) = self.parse_and_nom(input)?; + let (input, ors) = many0(preceded(tag("OR"), |c| Self::parse_or_nom(self, c)))(input)?; + let expr = ors + .into_iter() + .fold(lhs, |acc, branch| FilterCondition::Or(Box::new(acc), Box::new(branch))); + Ok((input, expr)) + } + + fn parse_and_nom(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> + where + E: ParseError<&'a str>, + { + let (input, lhs) = self.parse_not_nom(input)?; + let (input, ors) = many0(preceded(tag("AND"), |c| Self::parse_and_nom(self, c)))(input)?; + let expr = ors + .into_iter() + .fold(lhs, |acc, branch| FilterCondition::And(Box::new(acc), Box::new(branch))); + Ok((input, expr)) + } + + fn parse_not_nom(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> + where + E: ParseError<&'a str>, + { + alt(( + map( + preceded(alt((self.ws(tag("!")), self.ws(tag("NOT")))), |c| { + Self::parse_condition_expression(self, c) + }), + |e| e.negate(), + ), + |c| Self::parse_condition_expression(self, c), + ))(input) + } + + fn ws(&'a self, inner: F) -> impl FnMut(&'a str) -> IResult<&'a str, O, E> + where + F: Fn(&'a str) -> IResult<&'a str, O, E>, + E: ParseError<&'a str>, + { + delimited(multispace0, inner, multispace0) + } + + fn parse_simple_condition(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> + where + E: ParseError<&'a str>, + { + let operator = alt((tag(">"), tag(">="), tag("="), tag("<"), tag("!="), tag("<="))); + let (input, (key, op, value)) = + tuple((self.ws(|c| self.parse_key(c)), operator, self.ws(|c| self.parse_key(c))))( + input, + )?; + let fid = self.parse_fid(input, key)?; + let r: StdResult>> = self.parse_numeric(value); + let k = match op { + "=" => FilterCondition::Operator(fid, Equal(r.ok(), value.to_string().to_lowercase())), + "!=" => { + FilterCondition::Operator(fid, NotEqual(r.ok(), value.to_string().to_lowercase())) + } + ">" | "<" | "<=" | ">=" => return self.parse_numeric_unary_condition(op, fid, value), + _ => unreachable!(), + }; + Ok((input, k)) + } + + fn parse_numeric(&'a self, input: &'a str) -> StdResult> + where + E: ParseError<&'a str>, + T: std::str::FromStr, + { + match input.parse::() { + Ok(n) => Ok(n), + Err(_) => { + return match input.chars().nth(0) { + Some(ch) => Err(nom::Err::Failure(E::from_char(input, ch))), + None => Err(nom::Err::Failure(E::from_error_kind(input, ErrorKind::Eof))), + }; + } + } + } + + fn parse_numeric_unary_condition( + &'a self, + input: &'a str, + fid: u16, + value: &'a str, + ) -> IResult<&'a str, FilterCondition, E> + where + E: ParseError<&'a str>, + { + let numeric: f64 = self.parse_numeric(value)?; + let k = match input { + ">" => FilterCondition::Operator(fid, GreaterThan(numeric)), + "<" => FilterCondition::Operator(fid, LowerThan(numeric)), + "<=" => FilterCondition::Operator(fid, LowerThanOrEqual(numeric)), + ">=" => FilterCondition::Operator(fid, GreaterThanOrEqual(numeric)), + _ => unreachable!(), + }; + Ok((input, k)) + } + + fn parse_fid(&'a self, input: &'a str, key: &'a str) -> StdResult> + where + E: ParseError<&'a str>, + { + let error = match input.chars().nth(0) { + Some(ch) => Err(nom::Err::Failure(E::from_char(input, ch))), + None => Err(nom::Err::Failure(E::from_error_kind(input, ErrorKind::Eof))), + }; + if !self.filterable_fields.contains(key) { + return error; + } + match self.fields_ids_map.id(key) { + Some(fid) => Ok(fid), + None => error, + } + } + + fn parse_range_condition(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> + where + E: ParseError<&'a str>, + { + let (input, (key, from, _, to)) = tuple(( + self.ws(|c| self.parse_key(c)), + self.ws(|c| self.parse_key(c)), + tag("TO"), + self.ws(|c| self.parse_key(c)), + ))(input)?; + + let fid = self.parse_fid(input, key)?; + let numeric_from: f64 = self.parse_numeric(from)?; + let numeric_to: f64 = self.parse_numeric(to)?; + let res = FilterCondition::Operator(fid, Between(numeric_from, numeric_to)); + Ok((input, res)) + } + + fn parse_condition(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> + where + E: ParseError<&'a str>, + { + let l1 = |c| self.parse_simple_condition(c); + let l2 = |c| self.parse_range_condition(c); + let (input, condition) = alt((l1, l2))(input)?; + Ok((input, condition)) + } + + fn parse_condition_expression(&'a self, input: &'a str) -> IResult<&str, FilterCondition, E> + where + E: ParseError<&'a str>, + { + return alt(( + delimited(self.ws(char('(')), |c| Self::parse_expression(self, c), self.ws(char(')'))), + |c| Self::parse_condition(self, c), + ))(input); + } + + fn parse_key(&'a self, input: &'a str) -> IResult<&'a str, &'a str, E> + where + E: ParseError<&'a str>, + { + let key = |input| take_while1(Self::is_key_component)(input); + alt((key, delimited(char('"'), key, char('"'))))(input) + } + fn is_key_component(c: char) -> bool { + c.is_alphanumeric() || ['_', '-', '.'].contains(&c) + } + + pub fn parse_expression(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> + where + E: ParseError<&'a str>, + { + self.parse_or_nom(input) + } +} + +#[cfg(test)] +mod tests { + use big_s::S; + use either::Either; + use heed::EnvOpenOptions; + use maplit::hashset; + + use super::*; + use crate::{update::Settings, Index}; + + #[test] + fn string() { + let path = tempfile::tempdir().unwrap(); + let mut options = EnvOpenOptions::new(); + options.map_size(10 * 1024 * 1024); // 10 MB + let index = Index::new(options, &path).unwrap(); + + // Set the filterable fields to be the channel. + let mut wtxn = index.write_txn().unwrap(); + let mut map = index.fields_ids_map(&wtxn).unwrap(); + map.insert("channel"); + index.put_fields_ids_map(&mut wtxn, &map).unwrap(); + let mut builder = Settings::new(&mut wtxn, &index, 0); + builder.set_filterable_fields(hashset! { S("channel") }); + builder.execute(|_, _| ()).unwrap(); + wtxn.commit().unwrap(); + + // Test that the facet condition is correctly generated. + let rtxn = index.read_txn().unwrap(); + let condition = FilterCondition::from_str(&rtxn, &index, "channel = Ponce").unwrap(); + let expected = FilterCondition::Operator(0, Operator::Equal(None, S("ponce"))); + assert_eq!(condition, expected); + + let condition = FilterCondition::from_str(&rtxn, &index, "channel != ponce").unwrap(); + let expected = FilterCondition::Operator(0, Operator::NotEqual(None, S("ponce"))); + assert_eq!(condition, expected); + + let condition = FilterCondition::from_str(&rtxn, &index, "NOT channel = ponce").unwrap(); + let expected = FilterCondition::Operator(0, Operator::NotEqual(None, S("ponce"))); + assert_eq!(condition, expected); + } + + #[test] + fn number() { + let path = tempfile::tempdir().unwrap(); + let mut options = EnvOpenOptions::new(); + options.map_size(10 * 1024 * 1024); // 10 MB + let index = Index::new(options, &path).unwrap(); + + // Set the filterable fields to be the channel. + let mut wtxn = index.write_txn().unwrap(); + let mut map = index.fields_ids_map(&wtxn).unwrap(); + map.insert("timestamp"); + index.put_fields_ids_map(&mut wtxn, &map).unwrap(); + let mut builder = Settings::new(&mut wtxn, &index, 0); + builder.set_filterable_fields(hashset! { "timestamp".into() }); + builder.execute(|_, _| ()).unwrap(); + wtxn.commit().unwrap(); + + // Test that the facet condition is correctly generated. + let rtxn = index.read_txn().unwrap(); + let condition = FilterCondition::from_str(&rtxn, &index, "timestamp 22 TO 44").unwrap(); + let expected = FilterCondition::Operator(0, Between(22.0, 44.0)); + assert_eq!(condition, expected); + + let condition = FilterCondition::from_str(&rtxn, &index, "NOT timestamp 22 TO 44").unwrap(); + let expected = FilterCondition::Or( + Box::new(FilterCondition::Operator(0, LowerThan(22.0))), + Box::new(FilterCondition::Operator(0, GreaterThan(44.0))), + ); + assert_eq!(condition, expected); + } + + #[test] + fn compare() { + let path = tempfile::tempdir().unwrap(); + let mut options = EnvOpenOptions::new(); + options.map_size(10 * 1024 * 1024); // 10 MB + let index = Index::new(options, &path).unwrap(); + + let mut wtxn = index.write_txn().unwrap(); + let mut builder = Settings::new(&mut wtxn, &index, 0); + builder.set_searchable_fields(vec![S("channel"), S("timestamp")]); // to keep the fields order + builder.set_filterable_fields(hashset! { S("channel"), S("timestamp") }); + builder.execute(|_, _| ()).unwrap(); + wtxn.commit().unwrap(); + + let rtxn = index.read_txn().unwrap(); + let condition = FilterCondition::from_str(&rtxn, &index, "channel < 20").unwrap(); + let expected = FilterCondition::Operator(0, LowerThan(20.0)); + + assert_eq!(condition, expected); + } + + #[test] + fn parentheses() { + let path = tempfile::tempdir().unwrap(); + let mut options = EnvOpenOptions::new(); + options.map_size(10 * 1024 * 1024); // 10 MB + let index = Index::new(options, &path).unwrap(); + + // Set the filterable fields to be the channel. + let mut wtxn = index.write_txn().unwrap(); + let mut builder = Settings::new(&mut wtxn, &index, 0); + builder.set_searchable_fields(vec![S("channel"), S("timestamp")]); // to keep the fields order + builder.set_filterable_fields(hashset! { S("channel"), S("timestamp") }); + builder.execute(|_, _| ()).unwrap(); + wtxn.commit().unwrap(); + + // Test that the facet condition is correctly generated. + let rtxn = index.read_txn().unwrap(); + let condition = FilterCondition::from_str( + &rtxn, + &index, + "channel = gotaga OR (timestamp 22 TO 44 AND channel != ponce)", + ) + .unwrap(); + let expected = FilterCondition::Or( + Box::new(FilterCondition::Operator(0, Operator::Equal(None, S("gotaga")))), + Box::new(FilterCondition::And( + Box::new(FilterCondition::Operator(1, Between(22.0, 44.0))), + Box::new(FilterCondition::Operator(0, Operator::NotEqual(None, S("ponce")))), + )), + ); + assert_eq!(condition, expected); + + let condition = FilterCondition::from_str( + &rtxn, + &index, + "channel = gotaga OR NOT (timestamp 22 TO 44 AND channel != ponce)", + ) + .unwrap(); + let expected = FilterCondition::Or( + Box::new(FilterCondition::Operator(0, Operator::Equal(None, S("gotaga")))), + Box::new(FilterCondition::Or( + Box::new(FilterCondition::Or( + Box::new(FilterCondition::Operator(1, LowerThan(22.0))), + Box::new(FilterCondition::Operator(1, GreaterThan(44.0))), + )), + Box::new(FilterCondition::Operator(0, Operator::Equal(None, S("ponce")))), + )), + ); + assert_eq!(condition, expected); + } + + #[test] + fn from_array() { + let path = tempfile::tempdir().unwrap(); + let mut options = EnvOpenOptions::new(); + options.map_size(10 * 1024 * 1024); // 10 MB + let index = Index::new(options, &path).unwrap(); + + // Set the filterable fields to be the channel. + let mut wtxn = index.write_txn().unwrap(); + let mut builder = Settings::new(&mut wtxn, &index, 0); + builder.set_searchable_fields(vec![S("channel"), S("timestamp")]); // to keep the fields order + builder.set_filterable_fields(hashset! { S("channel"), S("timestamp") }); + builder.execute(|_, _| ()).unwrap(); + wtxn.commit().unwrap(); + + // Test that the facet condition is correctly generated. + let rtxn = index.read_txn().unwrap(); + let condition = FilterCondition::from_array( + &rtxn, + &index, + vec![ + Either::Right("channel = gotaga"), + Either::Left(vec!["timestamp = 44", "channel != ponce"]), + ], + ) + .unwrap() + .unwrap(); + let expected = FilterCondition::from_str( + &rtxn, + &index, + "channel = gotaga AND (timestamp = 44 OR channel != ponce)", + ) + .unwrap(); + assert_eq!(condition, expected); + } + #[test] + fn geo_radius() { + let path = tempfile::tempdir().unwrap(); + let mut options = EnvOpenOptions::new(); + options.map_size(10 * 1024 * 1024); // 10 MB + let index = Index::new(options, &path).unwrap(); + + // Set the filterable fields to be the channel. + let mut wtxn = index.write_txn().unwrap(); + let mut builder = Settings::new(&mut wtxn, &index, 0); + builder.set_searchable_fields(vec![S("_geo"), S("price")]); // to keep the fields order + builder.set_filterable_fields(hashset! { S("_geo"), S("price") }); + builder.execute(|_, _| ()).unwrap(); + wtxn.commit().unwrap(); + + let rtxn = index.read_txn().unwrap(); + // basic test + let condition = + FilterCondition::from_str(&rtxn, &index, "_geoRadius(12, 13.0005, 2000)").unwrap(); + let expected = Operator(0, GeoLowerThan([12., 13.0005], 2000.)); + assert_eq!(condition, expected); + + // test the negation of the GeoLowerThan + let condition = + FilterCondition::from_str(&rtxn, &index, "NOT _geoRadius(50, 18, 2000.500)").unwrap(); + let expected = Operator(0, GeoGreaterThan([50., 18.], 2000.500)); + assert_eq!(condition, expected); + + // composition of multiple operations + let condition = FilterCondition::from_str( + &rtxn, + &index, + "(NOT _geoRadius(1, 2, 300) AND _geoRadius(1.001, 2.002, 1000.300)) OR price <= 10", + ) + .unwrap(); + let expected = Or( + Box::new(And( + Box::new(Operator(0, GeoGreaterThan([1., 2.], 300.))), + Box::new(Operator(0, GeoLowerThan([1.001, 2.002], 1000.300))), + )), + Box::new(Operator(1, LowerThanOrEqual(10.))), + ); + assert_eq!(condition, expected); + + // georadius don't have any parameters + let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius"); + assert!(result.is_err()); + let error = result.unwrap_err(); + assert!(error.to_string().contains("The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`")); + + // georadius don't have any parameters + let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius()"); + assert!(result.is_err()); + let error = result.unwrap_err(); + assert!(error.to_string().contains("The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`")); + + // georadius don't have enough parameters + let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius(1, 2)"); + assert!(result.is_err()); + let error = result.unwrap_err(); + assert!(error.to_string().contains("The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`")); + + // georadius have too many parameters + let result = + FilterCondition::from_str(&rtxn, &index, "_geoRadius(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)"); + assert!(result.is_err()); + let error = result.unwrap_err(); + assert!(error.to_string().contains("The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`")); + + // georadius have a bad latitude + let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius(-200, 150, 10)"); + assert!(result.is_err()); + let error = result.unwrap_err(); + assert!(error + .to_string() + .contains("Latitude and longitude must be contained between -180 to 180 degrees.")); + + // georadius have a bad longitude + let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius(-10, 181, 10)"); + assert!(result.is_err()); + let error = result.unwrap_err(); + assert!(error + .to_string() + .contains("Latitude and longitude must be contained between -180 to 180 degrees.")); + } +} diff --git a/milli/src/search/facet/mod.rs b/milli/src/search/facet/mod.rs index a5c041dd5..3efa0262f 100644 --- a/milli/src/search/facet/mod.rs +++ b/milli/src/search/facet/mod.rs @@ -1,9 +1,10 @@ pub use self::facet_distribution::FacetDistribution; pub use self::facet_number::{FacetNumberIter, FacetNumberRange, FacetNumberRevRange}; pub use self::facet_string::FacetStringIter; -pub use self::filter_condition::{FilterCondition, Operator}; +pub use self::filter_condition::FilterCondition; mod facet_distribution; mod facet_number; mod facet_string; mod filter_condition; +mod filter_parser; diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 85d5dc8a7..9b76ca851 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -14,7 +14,7 @@ use meilisearch_tokenizer::{Analyzer, AnalyzerConfig}; use once_cell::sync::Lazy; use roaring::bitmap::RoaringBitmap; -pub use self::facet::{FacetDistribution, FacetNumberIter, FilterCondition, Operator}; +pub use self::facet::{FacetDistribution, FacetNumberIter, FilterCondition}; pub use self::matching_words::MatchingWords; use self::query_tree::QueryTreeBuilder; use crate::error::UserError; From 469d92c569a41e0f98a81e9597ccfec02466282c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E7=80=9A=E9=AA=8B?= Date: Sun, 10 Oct 2021 14:50:59 +0800 Subject: [PATCH 06/15] tweak error handling --- milli/src/error.rs | 6 +- milli/src/search/facet/filter_condition.rs | 75 ++++------------------ milli/src/search/facet/filter_parser.rs | 71 +++++++++++++++++--- 3 files changed, 74 insertions(+), 78 deletions(-) diff --git a/milli/src/error.rs b/milli/src/error.rs index b80238468..3ae18165f 100644 --- a/milli/src/error.rs +++ b/milli/src/error.rs @@ -210,6 +210,8 @@ impl fmt::Display for UserError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { //TODO + Self::InvalidFilterAttributeNom => write!(f, "parser error "), + Self::InvalidFilterValue => write!(f, "parser error "), Self::InvalidFilterNom { input } => write!(f, "parser error {}", input), Self::AttributeLimitReached => f.write_str("maximum number of attributes reached"), Self::CriterionError(error) => write!(f, "{}", error), @@ -228,10 +230,6 @@ impl fmt::Display for UserError { "the document with the id: {} contains an invalid _geo field: {}", document_id, object ), - Self::InvalidAscDescSyntax { name } => { - write!(f, "invalid asc/desc syntax for {}", name) - } - Self::InvalidCriterionName { name } => write!(f, "invalid criterion {}", name), Self::InvalidDocumentId { document_id } => { let json = serde_json::to_string(document_id).unwrap(); write!( diff --git a/milli/src/search/facet/filter_condition.rs b/milli/src/search/facet/filter_condition.rs index c728e0acd..c4fa8e561 100644 --- a/milli/src/search/facet/filter_condition.rs +++ b/milli/src/search/facet/filter_condition.rs @@ -5,7 +5,7 @@ use std::ops::Bound::{self, Excluded, Included}; use either::Either; use heed::types::DecodeIgnore; use log::debug; -use nom::error::VerboseError; +use nom::error::{convert_error, VerboseError}; use roaring::RoaringBitmap; use self::FilterCondition::*; @@ -87,7 +87,15 @@ impl FilterCondition { ParseContext { fields_ids_map: &fields_ids_map, filterable_fields: &filterable_fields }; match ctx.parse_expression::>(expression) { Ok((_, fc)) => Ok(fc), - Err(e) => Err(Error::UserError(UserError::InvalidFilterNom { input: e.to_string() })), + Err(e) => { + match e { + nom::Err::Error(x) => { + println!("verbose err:\n{}", convert_error(expression, x)) + } + _ => unreachable!(), + } + Err(Error::UserError(UserError::InvalidFilterNom { input: "whatever".to_string() })) + } } } pub fn negate(self) -> FilterCondition { @@ -101,67 +109,6 @@ impl FilterCondition { Empty => Empty, } } - - fn geo_radius( - fields_ids_map: &FieldsIdsMap, - filterable_fields: &HashSet, - item: Pair, - ) -> Result { - if !filterable_fields.contains("_geo") { - return Err(UserError::InvalidFilterAttribute(PestError::new_from_span( - ErrorVariant::CustomError { - message: format!( - "attribute `_geo` is not filterable, available filterable attributes are: {}", - filterable_fields.iter().join(", "), - ), - }, - item.as_span(), - )))?; - } - let mut items = item.into_inner(); - let fid = match fields_ids_map.id("_geo") { - Some(fid) => fid, - None => return Ok(Empty), - }; - let parameters_item = items.next().unwrap(); - // We don't need more than 3 parameters, but to handle errors correctly we are still going - // to extract the first 4 parameters - let param_span = parameters_item.as_span(); - let parameters = parameters_item - .into_inner() - .take(4) - .map(|param| (param.clone(), param.as_span())) - .map(|(param, span)| pest_parse(param).0.map(|arg| (arg, span))) - .collect::, _>>() - .map_err(UserError::InvalidFilter)?; - if parameters.len() != 3 { - return Err(UserError::InvalidFilter(PestError::new_from_span( - ErrorVariant::CustomError { - message: format!("The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`"), - }, - // we want to point to the last parameters and if there was no parameters we - // point to the parenthesis - parameters.last().map(|param| param.1.clone()).unwrap_or(param_span), - )))?; - } - let (lat, lng, distance) = (¶meters[0], ¶meters[1], parameters[2].0); - if !(-90.0..=90.0).contains(&lat.0) { - return Err(UserError::InvalidFilter(PestError::new_from_span( - ErrorVariant::CustomError { - message: format!("Latitude must be contained between -90 and 90 degrees."), - }, - lat.1.clone(), - )))?; - } else if !(-180.0..=180.0).contains(&lng.0) { - return Err(UserError::InvalidFilter(PestError::new_from_span( - ErrorVariant::CustomError { - message: format!("Longitude must be contained between -180 and 180 degrees."), - }, - lng.1.clone(), - )))?; - } - Ok(Operator(fid, Operator::GeoLowerThan([lat.0, lng.0], distance))) - } } impl FilterCondition { @@ -348,7 +295,7 @@ impl FilterCondition { numbers_db, strings_db, field_id, - &GeoLowerThan(point.clone(), *distance), + &Operator::GeoLowerThan(point.clone(), *distance), )?; let geo_faceted_doc_ids = index.geo_faceted_documents_ids(rtxn)?; return Ok(geo_faceted_doc_ids - result); diff --git a/milli/src/search/facet/filter_parser.rs b/milli/src/search/facet/filter_parser.rs index 53a51ca49..419c148d7 100644 --- a/milli/src/search/facet/filter_parser.rs +++ b/milli/src/search/facet/filter_parser.rs @@ -9,10 +9,11 @@ use nom::{ bytes::complete::{tag, take_while1}, character::complete::{char, multispace0}, combinator::map, - error::ErrorKind, error::ParseError, error::VerboseError, + error::{ContextError, ErrorKind}, multi::many0, + multi::separated_list1, sequence::{delimited, preceded, tuple}, IResult, }; @@ -43,6 +44,8 @@ impl Operator { LowerThan(n) => (GreaterThanOrEqual(n), None), LowerThanOrEqual(n) => (GreaterThan(n), None), Between(n, m) => (LowerThan(n), Some(GreaterThan(m))), + GeoLowerThan(point, distance) => (GeoGreaterThan(point, distance), None), + GeoGreaterThan(point, distance) => (GeoLowerThan(point, distance), None), } } } @@ -193,13 +196,52 @@ impl<'a> ParseContext<'a> { Ok((input, res)) } + fn parse_geo_radius(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> + where + E: ParseError<&'a str>, + { + let (input, args) = preceded( + tag("_geoRadius"), + delimited( + tag("("), + separated_list1(tag(","), self.ws(|c| self.parse_value(c))), + tag(")"), + ), + )(input)?; + + if args.len() != 3 { + let e = E::from_char(input, '('); + return Err(nom::Err::Failure(e)); + } + let lat = self.parse_numeric(args[0])?; + let lng = self.parse_numeric(args[1])?; + let dis = self.parse_numeric(args[2])?; + + let fid = match self.fields_ids_map.id("_geo") { + Some(fid) => fid, + None => return Ok((input, FilterCondition::Empty)), + }; + + if let Some(span) = (!(-181.0..181.).contains(&lat)) + .then(|| &lat) + .or((!(-181.0..181.).contains(&lng)).then(|| &lng)) + { + let e = E::from_char(input, '('); + return Err(nom::Err::Failure(e)); + } + + let res = FilterCondition::Operator(fid, GeoLowerThan([lat, lng], dis)); + Ok((input, res)) + } + fn parse_condition(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> where E: ParseError<&'a str>, { + let l0 = |c| self.parse_geo_radius(c); let l1 = |c| self.parse_simple_condition(c); let l2 = |c| self.parse_range_condition(c); - let (input, condition) = alt((l1, l2))(input)?; + let (input, condition) = alt((l0, l1, l2))(input)?; Ok((input, condition)) } @@ -220,6 +262,15 @@ impl<'a> ParseContext<'a> { let key = |input| take_while1(Self::is_key_component)(input); alt((key, delimited(char('"'), key, char('"'))))(input) } + + fn parse_value(&'a self, input: &'a str) -> IResult<&'a str, &'a str, E> + where + E: ParseError<&'a str>, + { + let key = |input| take_while1(Self::is_key_component)(input); + alt((key, delimited(char('"'), key, char('"'))))(input) + } + fn is_key_component(c: char) -> bool { c.is_alphanumeric() || ['_', '-', '.'].contains(&c) } @@ -431,13 +482,13 @@ mod tests { // basic test let condition = FilterCondition::from_str(&rtxn, &index, "_geoRadius(12, 13.0005, 2000)").unwrap(); - let expected = Operator(0, GeoLowerThan([12., 13.0005], 2000.)); + let expected = FilterCondition::Operator(0, GeoLowerThan([12., 13.0005], 2000.)); assert_eq!(condition, expected); // test the negation of the GeoLowerThan let condition = FilterCondition::from_str(&rtxn, &index, "NOT _geoRadius(50, 18, 2000.500)").unwrap(); - let expected = Operator(0, GeoGreaterThan([50., 18.], 2000.500)); + let expected = FilterCondition::Operator(0, GeoGreaterThan([50., 18.], 2000.500)); assert_eq!(condition, expected); // composition of multiple operations @@ -446,13 +497,13 @@ mod tests { &index, "(NOT _geoRadius(1, 2, 300) AND _geoRadius(1.001, 2.002, 1000.300)) OR price <= 10", ) - .unwrap(); - let expected = Or( - Box::new(And( - Box::new(Operator(0, GeoGreaterThan([1., 2.], 300.))), - Box::new(Operator(0, GeoLowerThan([1.001, 2.002], 1000.300))), + .unwrap_or_else(|e| FilterCondition::Empty); + let expected = FilterCondition::Or( + Box::new(FilterCondition::And( + Box::new(FilterCondition::Operator(0, GeoGreaterThan([1., 2.], 300.))), + Box::new(FilterCondition::Operator(0, GeoLowerThan([1.001, 2.002], 1000.300))), )), - Box::new(Operator(1, LowerThanOrEqual(10.))), + Box::new(FilterCondition::Operator(1, LowerThanOrEqual(10.))), ); assert_eq!(condition, expected); From 28f9be8d7c3cdc449d5b9a125bd600bc685640a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E7=80=9A=E9=AA=8B?= Date: Tue, 12 Oct 2021 08:20:01 +0800 Subject: [PATCH 07/15] support syntax --- milli/src/search/facet/filter_parser.rs | 86 +++++++++++++++++-------- 1 file changed, 60 insertions(+), 26 deletions(-) diff --git a/milli/src/search/facet/filter_parser.rs b/milli/src/search/facet/filter_parser.rs index 419c148d7..7f7ec83c2 100644 --- a/milli/src/search/facet/filter_parser.rs +++ b/milli/src/search/facet/filter_parser.rs @@ -58,10 +58,12 @@ pub struct ParseContext<'a> { impl<'a> ParseContext<'a> { fn parse_or_nom(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> where - E: ParseError<&'a str>, + E: ParseError<&'a str> + Debug, { let (input, lhs) = self.parse_and_nom(input)?; - let (input, ors) = many0(preceded(tag("OR"), |c| Self::parse_or_nom(self, c)))(input)?; + let (input, ors) = + many0(preceded(self.ws(tag("OR")), |c| Self::parse_or_nom(self, c)))(input)?; + let expr = ors .into_iter() .fold(lhs, |acc, branch| FilterCondition::Or(Box::new(acc), Box::new(branch))); @@ -70,10 +72,16 @@ impl<'a> ParseContext<'a> { fn parse_and_nom(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> where - E: ParseError<&'a str>, + E: ParseError<&'a str> + Debug, { let (input, lhs) = self.parse_not_nom(input)?; - let (input, ors) = many0(preceded(tag("AND"), |c| Self::parse_and_nom(self, c)))(input)?; + // let (input, lhs) = alt(( + // delimited(self.ws(char('(')), |c| Self::parse_not_nom(self, c), self.ws(char(')'))), + // |c| self.parse_not_nom(c), + // ))(input)?; + + let (input, ors) = + many0(preceded(self.ws(tag("AND")), |c| Self::parse_and_nom(self, c)))(input)?; let expr = ors .into_iter() .fold(lhs, |acc, branch| FilterCondition::And(Box::new(acc), Box::new(branch))); @@ -82,7 +90,7 @@ impl<'a> ParseContext<'a> { fn parse_not_nom(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> where - E: ParseError<&'a str>, + E: ParseError<&'a str> + Debug, { alt(( map( @@ -98,20 +106,26 @@ impl<'a> ParseContext<'a> { fn ws(&'a self, inner: F) -> impl FnMut(&'a str) -> IResult<&'a str, O, E> where F: Fn(&'a str) -> IResult<&'a str, O, E>, - E: ParseError<&'a str>, + E: ParseError<&'a str> + Debug, { delimited(multispace0, inner, multispace0) } fn parse_simple_condition(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> where - E: ParseError<&'a str>, + E: ParseError<&'a str> + std::fmt::Debug, { - let operator = alt((tag(">"), tag(">="), tag("="), tag("<"), tag("!="), tag("<="))); - let (input, (key, op, value)) = - tuple((self.ws(|c| self.parse_key(c)), operator, self.ws(|c| self.parse_key(c))))( - input, - )?; + let operator = alt((tag("<="), tag(">="), tag(">"), tag("="), tag("<"), tag("!="))); + let k = tuple((self.ws(|c| self.parse_key(c)), operator, self.ws(|c| self.parse_key(c))))( + input, + ); + let (input, (key, op, value)) = match k { + Ok(o) => o, + Err(e) => { + return Err(e); + } + }; + let fid = self.parse_fid(input, key)?; let r: StdResult>> = self.parse_numeric(value); let k = match op { @@ -127,7 +141,7 @@ impl<'a> ParseContext<'a> { fn parse_numeric(&'a self, input: &'a str) -> StdResult> where - E: ParseError<&'a str>, + E: ParseError<&'a str> + Debug, T: std::str::FromStr, { match input.parse::() { @@ -148,7 +162,7 @@ impl<'a> ParseContext<'a> { value: &'a str, ) -> IResult<&'a str, FilterCondition, E> where - E: ParseError<&'a str>, + E: ParseError<&'a str> + Debug, { let numeric: f64 = self.parse_numeric(value)?; let k = match input { @@ -163,7 +177,7 @@ impl<'a> ParseContext<'a> { fn parse_fid(&'a self, input: &'a str, key: &'a str) -> StdResult> where - E: ParseError<&'a str>, + E: ParseError<&'a str> + Debug, { let error = match input.chars().nth(0) { Some(ch) => Err(nom::Err::Failure(E::from_char(input, ch))), @@ -180,7 +194,7 @@ impl<'a> ParseContext<'a> { fn parse_range_condition(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> where - E: ParseError<&'a str>, + E: ParseError<&'a str> + Debug, { let (input, (key, from, _, to)) = tuple(( self.ws(|c| self.parse_key(c)), @@ -193,19 +207,20 @@ impl<'a> ParseContext<'a> { let numeric_from: f64 = self.parse_numeric(from)?; let numeric_to: f64 = self.parse_numeric(to)?; let res = FilterCondition::Operator(fid, Between(numeric_from, numeric_to)); + Ok((input, res)) } fn parse_geo_radius(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> where - E: ParseError<&'a str>, + E: ParseError<&'a str> + Debug, { let (input, args) = preceded( tag("_geoRadius"), delimited( - tag("("), + char('('), separated_list1(tag(","), self.ws(|c| self.parse_value(c))), - tag(")"), + char(')'), ), )(input)?; @@ -236,7 +251,7 @@ impl<'a> ParseContext<'a> { fn parse_condition(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> where - E: ParseError<&'a str>, + E: ParseError<&'a str> + Debug, { let l0 = |c| self.parse_geo_radius(c); let l1 = |c| self.parse_simple_condition(c); @@ -247,12 +262,12 @@ impl<'a> ParseContext<'a> { fn parse_condition_expression(&'a self, input: &'a str) -> IResult<&str, FilterCondition, E> where - E: ParseError<&'a str>, + E: ParseError<&'a str> + Debug, { - return alt(( + alt(( delimited(self.ws(char('(')), |c| Self::parse_expression(self, c), self.ws(char(')'))), |c| Self::parse_condition(self, c), - ))(input); + ))(input) } fn parse_key(&'a self, input: &'a str) -> IResult<&'a str, &'a str, E> @@ -265,7 +280,7 @@ impl<'a> ParseContext<'a> { fn parse_value(&'a self, input: &'a str) -> IResult<&'a str, &'a str, E> where - E: ParseError<&'a str>, + E: ParseError<&'a str> + Debug, { let key = |input| take_while1(Self::is_key_component)(input); alt((key, delimited(char('"'), key, char('"'))))(input) @@ -277,9 +292,10 @@ impl<'a> ParseContext<'a> { pub fn parse_expression(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> where - E: ParseError<&'a str>, + E: ParseError<&'a str> + Debug, { - self.parse_or_nom(input) + let a = self.parse_or_nom(input); + a } } @@ -506,6 +522,24 @@ mod tests { Box::new(FilterCondition::Operator(1, LowerThanOrEqual(10.))), ); assert_eq!(condition, expected); + } + + #[test] + fn geo_radius_error() { + let path = tempfile::tempdir().unwrap(); + let mut options = EnvOpenOptions::new(); + options.map_size(10 * 1024 * 1024); // 10 MB + let index = Index::new(options, &path).unwrap(); + + // Set the filterable fields to be the channel. + let mut wtxn = index.write_txn().unwrap(); + let mut builder = Settings::new(&mut wtxn, &index, 0); + builder.set_searchable_fields(vec![S("_geo"), S("price")]); // to keep the fields order + builder.set_filterable_fields(hashset! { S("_geo"), S("price") }); + builder.execute(|_, _| ()).unwrap(); + wtxn.commit().unwrap(); + + let rtxn = index.read_txn().unwrap(); // georadius don't have any parameters let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius"); From 70f576d5d39bfffc289f2d506a8cdd6a0c060fe6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E7=80=9A=E9=AA=8B?= Date: Tue, 12 Oct 2021 10:05:10 +0800 Subject: [PATCH 08/15] error handling --- milli/src/search/facet/filter_condition.rs | 17 +++--- milli/src/search/facet/filter_parser.rs | 63 +++++++++++++--------- 2 files changed, 47 insertions(+), 33 deletions(-) diff --git a/milli/src/search/facet/filter_condition.rs b/milli/src/search/facet/filter_condition.rs index c4fa8e561..96c8867ec 100644 --- a/milli/src/search/facet/filter_condition.rs +++ b/milli/src/search/facet/filter_condition.rs @@ -1,4 +1,4 @@ -use std::collections::HashSet; + use std::fmt::Debug; use std::ops::Bound::{self, Excluded, Included}; @@ -17,7 +17,7 @@ use crate::heed_codec::facet::{ FacetLevelValueF64Codec, FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec, }; use crate::{ - distance_between_two_points, CboRoaringBitmapCodec, FieldId, FieldsIdsMap, Index, Result, + distance_between_two_points, CboRoaringBitmapCodec, FieldId, Index, Result, }; #[derive(Debug, Clone, PartialEq)] @@ -88,13 +88,14 @@ impl FilterCondition { match ctx.parse_expression::>(expression) { Ok((_, fc)) => Ok(fc), Err(e) => { - match e { - nom::Err::Error(x) => { - println!("verbose err:\n{}", convert_error(expression, x)) - } + let ve = match e { + nom::Err::Error(x) => x, + nom::Err::Failure(x) => x, _ => unreachable!(), - } - Err(Error::UserError(UserError::InvalidFilterNom { input: "whatever".to_string() })) + }; + Err(Error::UserError(UserError::InvalidFilterNom { + input: convert_error(expression, ve).to_string(), + })) } } } diff --git a/milli/src/search/facet/filter_parser.rs b/milli/src/search/facet/filter_parser.rs index 7f7ec83c2..72acaecfb 100644 --- a/milli/src/search/facet/filter_parser.rs +++ b/milli/src/search/facet/filter_parser.rs @@ -58,7 +58,7 @@ pub struct ParseContext<'a> { impl<'a> ParseContext<'a> { fn parse_or_nom(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> where - E: ParseError<&'a str> + Debug, + E: ParseError<&'a str> + ContextError<&'a str> + Debug, { let (input, lhs) = self.parse_and_nom(input)?; let (input, ors) = @@ -72,7 +72,7 @@ impl<'a> ParseContext<'a> { fn parse_and_nom(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> where - E: ParseError<&'a str> + Debug, + E: ParseError<&'a str> + ContextError<&'a str> + Debug, { let (input, lhs) = self.parse_not_nom(input)?; // let (input, lhs) = alt(( @@ -90,7 +90,7 @@ impl<'a> ParseContext<'a> { fn parse_not_nom(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> where - E: ParseError<&'a str> + Debug, + E: ParseError<&'a str> + ContextError<&'a str> + Debug, { alt(( map( @@ -106,14 +106,14 @@ impl<'a> ParseContext<'a> { fn ws(&'a self, inner: F) -> impl FnMut(&'a str) -> IResult<&'a str, O, E> where F: Fn(&'a str) -> IResult<&'a str, O, E>, - E: ParseError<&'a str> + Debug, + E: ParseError<&'a str> + ContextError<&'a str> + Debug, { delimited(multispace0, inner, multispace0) } fn parse_simple_condition(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> where - E: ParseError<&'a str> + std::fmt::Debug, + E: ParseError<&'a str> + ContextError<&'a str> + std::fmt::Debug, { let operator = alt((tag("<="), tag(">="), tag(">"), tag("="), tag("<"), tag("!="))); let k = tuple((self.ws(|c| self.parse_key(c)), operator, self.ws(|c| self.parse_key(c))))( @@ -141,7 +141,7 @@ impl<'a> ParseContext<'a> { fn parse_numeric(&'a self, input: &'a str) -> StdResult> where - E: ParseError<&'a str> + Debug, + E: ParseError<&'a str> + ContextError<&'a str> + Debug, T: std::str::FromStr, { match input.parse::() { @@ -162,7 +162,7 @@ impl<'a> ParseContext<'a> { value: &'a str, ) -> IResult<&'a str, FilterCondition, E> where - E: ParseError<&'a str> + Debug, + E: ParseError<&'a str> + ContextError<&'a str> + Debug, { let numeric: f64 = self.parse_numeric(value)?; let k = match input { @@ -177,7 +177,7 @@ impl<'a> ParseContext<'a> { fn parse_fid(&'a self, input: &'a str, key: &'a str) -> StdResult> where - E: ParseError<&'a str> + Debug, + E: ParseError<&'a str> + ContextError<&'a str> + Debug, { let error = match input.chars().nth(0) { Some(ch) => Err(nom::Err::Failure(E::from_char(input, ch))), @@ -194,7 +194,7 @@ impl<'a> ParseContext<'a> { fn parse_range_condition(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> where - E: ParseError<&'a str> + Debug, + E: ParseError<&'a str> + ContextError<&'a str> + Debug, { let (input, (key, from, _, to)) = tuple(( self.ws(|c| self.parse_key(c)), @@ -213,20 +213,34 @@ impl<'a> ParseContext<'a> { fn parse_geo_radius(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> where - E: ParseError<&'a str> + Debug, + E: ParseError<&'a str> + ContextError<&'a str> + Debug, { - let (input, args) = preceded( + let err_msg_args_incomplete:&'static str = + "_geoRadius. The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`"; + let err_msg_args_invalid: &'static str = + "_geoRadius. Latitude and longitude must be contained between -180 to 180 degrees."; + let (input, args): (&str, Vec<&str>) = match preceded( tag("_geoRadius"), delimited( char('('), - separated_list1(tag(","), self.ws(|c| self.parse_value(c))), + separated_list1(tag(","), self.ws(|c| self.parse_value::(c))), char(')'), ), - )(input)?; + )(input) + { + Ok(e) => e, + Err(_e) => { + return Err(nom::Err::Failure(E::add_context( + input, + err_msg_args_incomplete, + E::from_char(input, '('), + ))); + } + }; if args.len() != 3 { let e = E::from_char(input, '('); - return Err(nom::Err::Failure(e)); + return Err(nom::Err::Failure(E::add_context(input, err_msg_args_incomplete, e))); } let lat = self.parse_numeric(args[0])?; let lng = self.parse_numeric(args[1])?; @@ -237,12 +251,12 @@ impl<'a> ParseContext<'a> { None => return Ok((input, FilterCondition::Empty)), }; - if let Some(span) = (!(-181.0..181.).contains(&lat)) + if let Some(_span) = (!(-181.0..181.).contains(&lat)) .then(|| &lat) .or((!(-181.0..181.).contains(&lng)).then(|| &lng)) { let e = E::from_char(input, '('); - return Err(nom::Err::Failure(e)); + return Err(nom::Err::Failure(E::add_context(input,err_msg_args_invalid, e))); } let res = FilterCondition::Operator(fid, GeoLowerThan([lat, lng], dis)); @@ -251,18 +265,18 @@ impl<'a> ParseContext<'a> { fn parse_condition(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> where - E: ParseError<&'a str> + Debug, + E: ParseError<&'a str> + ContextError<&'a str> + Debug, { - let l0 = |c| self.parse_geo_radius(c); let l1 = |c| self.parse_simple_condition(c); let l2 = |c| self.parse_range_condition(c); - let (input, condition) = alt((l0, l1, l2))(input)?; + let l3 = |c| self.parse_geo_radius(c); + let (input, condition) = alt((l1, l2,l3))(input)?; Ok((input, condition)) } fn parse_condition_expression(&'a self, input: &'a str) -> IResult<&str, FilterCondition, E> where - E: ParseError<&'a str> + Debug, + E: ParseError<&'a str> + ContextError<&'a str> + Debug, { alt(( delimited(self.ws(char('(')), |c| Self::parse_expression(self, c), self.ws(char(')'))), @@ -272,7 +286,7 @@ impl<'a> ParseContext<'a> { fn parse_key(&'a self, input: &'a str) -> IResult<&'a str, &'a str, E> where - E: ParseError<&'a str>, + E: ParseError<&'a str> + ContextError<&'a str>, { let key = |input| take_while1(Self::is_key_component)(input); alt((key, delimited(char('"'), key, char('"'))))(input) @@ -280,7 +294,7 @@ impl<'a> ParseContext<'a> { fn parse_value(&'a self, input: &'a str) -> IResult<&'a str, &'a str, E> where - E: ParseError<&'a str> + Debug, + E: ParseError<&'a str> + ContextError<&'a str> + Debug, { let key = |input| take_while1(Self::is_key_component)(input); alt((key, delimited(char('"'), key, char('"'))))(input) @@ -292,7 +306,7 @@ impl<'a> ParseContext<'a> { pub fn parse_expression(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> where - E: ParseError<&'a str> + Debug, + E: ParseError<&'a str> + ContextError<&'a str> + Debug, { let a = self.parse_or_nom(input); a @@ -512,8 +526,7 @@ mod tests { &rtxn, &index, "(NOT _geoRadius(1, 2, 300) AND _geoRadius(1.001, 2.002, 1000.300)) OR price <= 10", - ) - .unwrap_or_else(|e| FilterCondition::Empty); + ).unwrap(); let expected = FilterCondition::Or( Box::new(FilterCondition::And( Box::new(FilterCondition::Operator(0, GeoGreaterThan([1., 2.], 300.))), From d323e35001f0d83619c1aec1c6900ee20e08a0d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E7=80=9A=E9=AA=8B?= Date: Tue, 12 Oct 2021 13:22:32 +0800 Subject: [PATCH 09/15] add a test case --- milli/src/search/facet/filter_parser.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/milli/src/search/facet/filter_parser.rs b/milli/src/search/facet/filter_parser.rs index 72acaecfb..8d00d086c 100644 --- a/milli/src/search/facet/filter_parser.rs +++ b/milli/src/search/facet/filter_parser.rs @@ -395,15 +395,19 @@ mod tests { let mut wtxn = index.write_txn().unwrap(); let mut builder = Settings::new(&mut wtxn, &index, 0); - builder.set_searchable_fields(vec![S("channel"), S("timestamp")]); // to keep the fields order - builder.set_filterable_fields(hashset! { S("channel"), S("timestamp") }); + builder.set_searchable_fields(vec![S("channel"), S("timestamp"),S("id")]); // to keep the fields order + builder.set_filterable_fields(hashset! { S("channel"), S("timestamp") ,S("id")}); builder.execute(|_, _| ()).unwrap(); wtxn.commit().unwrap(); let rtxn = index.read_txn().unwrap(); let condition = FilterCondition::from_str(&rtxn, &index, "channel < 20").unwrap(); let expected = FilterCondition::Operator(0, LowerThan(20.0)); + assert_eq!(condition, expected); + let rtxn = index.read_txn().unwrap(); + let condition = FilterCondition::from_str(&rtxn, &index, "id < 200").unwrap(); + let expected = FilterCondition::Operator(2, LowerThan(200.0)); assert_eq!(condition, expected); } From 2c65781d91e23c4ea3ca702472f4bdcd998d884c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E7=80=9A=E9=AA=8B?= Date: Tue, 12 Oct 2021 22:19:28 +0800 Subject: [PATCH 10/15] format --- milli/src/search/facet/filter_condition.rs | 5 +---- milli/src/search/facet/filter_parser.rs | 12 ++++++------ 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/milli/src/search/facet/filter_condition.rs b/milli/src/search/facet/filter_condition.rs index 96c8867ec..2686e4a4b 100644 --- a/milli/src/search/facet/filter_condition.rs +++ b/milli/src/search/facet/filter_condition.rs @@ -1,4 +1,3 @@ - use std::fmt::Debug; use std::ops::Bound::{self, Excluded, Included}; @@ -16,9 +15,7 @@ use crate::error::{Error, UserError}; use crate::heed_codec::facet::{ FacetLevelValueF64Codec, FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec, }; -use crate::{ - distance_between_two_points, CboRoaringBitmapCodec, FieldId, Index, Result, -}; +use crate::{distance_between_two_points, CboRoaringBitmapCodec, FieldId, Index, Result}; #[derive(Debug, Clone, PartialEq)] pub enum FilterCondition { diff --git a/milli/src/search/facet/filter_parser.rs b/milli/src/search/facet/filter_parser.rs index 8d00d086c..01e944a98 100644 --- a/milli/src/search/facet/filter_parser.rs +++ b/milli/src/search/facet/filter_parser.rs @@ -215,8 +215,7 @@ impl<'a> ParseContext<'a> { where E: ParseError<&'a str> + ContextError<&'a str> + Debug, { - let err_msg_args_incomplete:&'static str = - "_geoRadius. The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`"; + let err_msg_args_incomplete:&'static str = "_geoRadius. The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`"; let err_msg_args_invalid: &'static str = "_geoRadius. Latitude and longitude must be contained between -180 to 180 degrees."; let (input, args): (&str, Vec<&str>) = match preceded( @@ -256,7 +255,7 @@ impl<'a> ParseContext<'a> { .or((!(-181.0..181.).contains(&lng)).then(|| &lng)) { let e = E::from_char(input, '('); - return Err(nom::Err::Failure(E::add_context(input,err_msg_args_invalid, e))); + return Err(nom::Err::Failure(E::add_context(input, err_msg_args_invalid, e))); } let res = FilterCondition::Operator(fid, GeoLowerThan([lat, lng], dis)); @@ -270,7 +269,7 @@ impl<'a> ParseContext<'a> { let l1 = |c| self.parse_simple_condition(c); let l2 = |c| self.parse_range_condition(c); let l3 = |c| self.parse_geo_radius(c); - let (input, condition) = alt((l1, l2,l3))(input)?; + let (input, condition) = alt((l1, l2, l3))(input)?; Ok((input, condition)) } @@ -395,7 +394,7 @@ mod tests { let mut wtxn = index.write_txn().unwrap(); let mut builder = Settings::new(&mut wtxn, &index, 0); - builder.set_searchable_fields(vec![S("channel"), S("timestamp"),S("id")]); // to keep the fields order + builder.set_searchable_fields(vec![S("channel"), S("timestamp"), S("id")]); // to keep the fields order builder.set_filterable_fields(hashset! { S("channel"), S("timestamp") ,S("id")}); builder.execute(|_, _| ()).unwrap(); wtxn.commit().unwrap(); @@ -530,7 +529,8 @@ mod tests { &rtxn, &index, "(NOT _geoRadius(1, 2, 300) AND _geoRadius(1.001, 2.002, 1000.300)) OR price <= 10", - ).unwrap(); + ) + .unwrap(); let expected = FilterCondition::Or( Box::new(FilterCondition::And( Box::new(FilterCondition::Operator(0, GeoGreaterThan([1., 2.], 300.))), From 5de5dd80a33fe6d7bd0a05c3f9a06841f8844562 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E7=80=9A=E9=AA=8B?= Date: Wed, 13 Oct 2021 11:06:15 +0800 Subject: [PATCH 11/15] WIP: remove '_nom' suffix/redundant error enum/... --- milli/src/error.rs | 9 ++---- milli/src/search/facet/filter_condition.rs | 2 +- milli/src/search/facet/filter_parser.rs | 22 ++++++--------- milli/src/search/facet/grammar.pest | 33 ---------------------- 4 files changed, 11 insertions(+), 55 deletions(-) delete mode 100644 milli/src/search/facet/grammar.pest diff --git a/milli/src/error.rs b/milli/src/error.rs index 3ae18165f..c0ce101c8 100644 --- a/milli/src/error.rs +++ b/milli/src/error.rs @@ -59,9 +59,7 @@ pub enum UserError { InvalidDocumentId { document_id: Value }, InvalidFacetsDistribution { invalid_facets_name: HashSet }, InvalidGeoField { document_id: Value, object: Value }, - InvalidFilterAttributeNom, - InvalidFilterValue, - InvalidFilterNom { input: String }, + InvalidFilter { input: String }, InvalidSortName { name: String }, InvalidSortableAttribute { field: String, valid_fields: HashSet }, SortRankingRuleMissing, @@ -209,10 +207,7 @@ impl StdError for InternalError {} impl fmt::Display for UserError { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { - //TODO - Self::InvalidFilterAttributeNom => write!(f, "parser error "), - Self::InvalidFilterValue => write!(f, "parser error "), - Self::InvalidFilterNom { input } => write!(f, "parser error {}", input), + Self::InvalidFilter { input } => write!(f, "parser error {}", input), Self::AttributeLimitReached => f.write_str("maximum number of attributes reached"), Self::CriterionError(error) => write!(f, "{}", error), Self::DocumentLimitReached => f.write_str("maximum number of documents reached"), diff --git a/milli/src/search/facet/filter_condition.rs b/milli/src/search/facet/filter_condition.rs index 2686e4a4b..0e98edd2c 100644 --- a/milli/src/search/facet/filter_condition.rs +++ b/milli/src/search/facet/filter_condition.rs @@ -90,7 +90,7 @@ impl FilterCondition { nom::Err::Failure(x) => x, _ => unreachable!(), }; - Err(Error::UserError(UserError::InvalidFilterNom { + Err(Error::UserError(UserError::InvalidFilter { input: convert_error(expression, ve).to_string(), })) } diff --git a/milli/src/search/facet/filter_parser.rs b/milli/src/search/facet/filter_parser.rs index 01e944a98..493c53920 100644 --- a/milli/src/search/facet/filter_parser.rs +++ b/milli/src/search/facet/filter_parser.rs @@ -56,13 +56,12 @@ pub struct ParseContext<'a> { } impl<'a> ParseContext<'a> { - fn parse_or_nom(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> + fn parse_or(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> where E: ParseError<&'a str> + ContextError<&'a str> + Debug, { - let (input, lhs) = self.parse_and_nom(input)?; - let (input, ors) = - many0(preceded(self.ws(tag("OR")), |c| Self::parse_or_nom(self, c)))(input)?; + let (input, lhs) = self.parse_and(input)?; + let (input, ors) = many0(preceded(self.ws(tag("OR")), |c| Self::parse_or(self, c)))(input)?; let expr = ors .into_iter() @@ -70,25 +69,20 @@ impl<'a> ParseContext<'a> { Ok((input, expr)) } - fn parse_and_nom(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> + fn parse_and(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> where E: ParseError<&'a str> + ContextError<&'a str> + Debug, { - let (input, lhs) = self.parse_not_nom(input)?; - // let (input, lhs) = alt(( - // delimited(self.ws(char('(')), |c| Self::parse_not_nom(self, c), self.ws(char(')'))), - // |c| self.parse_not_nom(c), - // ))(input)?; - + let (input, lhs) = self.parse_not(input)?; let (input, ors) = - many0(preceded(self.ws(tag("AND")), |c| Self::parse_and_nom(self, c)))(input)?; + many0(preceded(self.ws(tag("AND")), |c| Self::parse_and(self, c)))(input)?; let expr = ors .into_iter() .fold(lhs, |acc, branch| FilterCondition::And(Box::new(acc), Box::new(branch))); Ok((input, expr)) } - fn parse_not_nom(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> + fn parse_not(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> where E: ParseError<&'a str> + ContextError<&'a str> + Debug, { @@ -307,7 +301,7 @@ impl<'a> ParseContext<'a> { where E: ParseError<&'a str> + ContextError<&'a str> + Debug, { - let a = self.parse_or_nom(input); + let a = self.parse_or(input); a } } diff --git a/milli/src/search/facet/grammar.pest b/milli/src/search/facet/grammar.pest deleted file mode 100644 index 8bfdeb667..000000000 --- a/milli/src/search/facet/grammar.pest +++ /dev/null @@ -1,33 +0,0 @@ -key = _{reserved | quoted | word } -value = _{quoted | word } -quoted = _{ (PUSH("'") | PUSH("\"")) ~ string ~ POP } -string = {char*} -word = ${(LETTER | NUMBER | "_" | "-" | ".")+} - -char = _{ !(PEEK | "\\") ~ ANY - | "\\" ~ (PEEK | "\\" | "/" | "b" | "f" | "n" | "r" | "t") - | "\\" ~ ("u" ~ ASCII_HEX_DIGIT{4})} - -reserved = { "_geoDistance" | ("_geoPoint" ~ parameters) | "_geo" } -// we deliberately choose to allow empty parameters to generate more specific error message later -parameters = {("(" ~ (value ~ ",")* ~ value? ~ ")") | ""} -condition = _{between | eq | greater | less | geq | leq | neq} -between = {key ~ value ~ "TO" ~ value} -geq = {key ~ ">=" ~ value} -leq = {key ~ "<=" ~ value} -neq = {key ~ "!=" ~ value} -eq = {key ~ "=" ~ value} -greater = {key ~ ">" ~ value} -less = {key ~ "<" ~ value} -geo_radius = {"_geoRadius" ~ parameters } - -prgm = {SOI ~ expr ~ EOI} -expr = _{ ( term ~ (operation ~ term)* ) } -term = { ("(" ~ expr ~ ")") | condition | not | geo_radius } -operation = _{ and | or } -and = {"AND"} -or = {"OR"} - -not = {"NOT" ~ term} - -WHITESPACE = _{ " " } From cd359cd96ece0b80110813194865050f2012c792 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E7=80=9A=E9=AA=8B?= Date: Wed, 13 Oct 2021 18:04:15 +0800 Subject: [PATCH 12/15] WIP: extract the error trait bound to new trait. --- milli/src/search/facet/filter_parser.rs | 37 ++++++++++++++----------- 1 file changed, 21 insertions(+), 16 deletions(-) diff --git a/milli/src/search/facet/filter_parser.rs b/milli/src/search/facet/filter_parser.rs index 493c53920..c635ac9dd 100644 --- a/milli/src/search/facet/filter_parser.rs +++ b/milli/src/search/facet/filter_parser.rs @@ -9,7 +9,6 @@ use nom::{ bytes::complete::{tag, take_while1}, character::complete::{char, multispace0}, combinator::map, - error::ParseError, error::VerboseError, error::{ContextError, ErrorKind}, multi::many0, @@ -50,6 +49,12 @@ impl Operator { } } +pub trait FilterParserError<'a>: + nom::error::ParseError<&'a str> + ContextError<&'a str> + std::fmt::Debug +{ +} +impl<'a> FilterParserError<'a> for VerboseError<&'a str> {} + pub struct ParseContext<'a> { pub fields_ids_map: &'a FieldsIdsMap, pub filterable_fields: &'a HashSet, @@ -58,7 +63,7 @@ pub struct ParseContext<'a> { impl<'a> ParseContext<'a> { fn parse_or(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> where - E: ParseError<&'a str> + ContextError<&'a str> + Debug, + E: FilterParserError<'a>, { let (input, lhs) = self.parse_and(input)?; let (input, ors) = many0(preceded(self.ws(tag("OR")), |c| Self::parse_or(self, c)))(input)?; @@ -71,7 +76,7 @@ impl<'a> ParseContext<'a> { fn parse_and(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> where - E: ParseError<&'a str> + ContextError<&'a str> + Debug, + E: FilterParserError<'a>, { let (input, lhs) = self.parse_not(input)?; let (input, ors) = @@ -84,7 +89,7 @@ impl<'a> ParseContext<'a> { fn parse_not(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> where - E: ParseError<&'a str> + ContextError<&'a str> + Debug, + E: FilterParserError<'a>, { alt(( map( @@ -100,14 +105,14 @@ impl<'a> ParseContext<'a> { fn ws(&'a self, inner: F) -> impl FnMut(&'a str) -> IResult<&'a str, O, E> where F: Fn(&'a str) -> IResult<&'a str, O, E>, - E: ParseError<&'a str> + ContextError<&'a str> + Debug, + E: FilterParserError<'a>, { delimited(multispace0, inner, multispace0) } fn parse_simple_condition(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> where - E: ParseError<&'a str> + ContextError<&'a str> + std::fmt::Debug, + E: FilterParserError<'a>, { let operator = alt((tag("<="), tag(">="), tag(">"), tag("="), tag("<"), tag("!="))); let k = tuple((self.ws(|c| self.parse_key(c)), operator, self.ws(|c| self.parse_key(c))))( @@ -135,7 +140,7 @@ impl<'a> ParseContext<'a> { fn parse_numeric(&'a self, input: &'a str) -> StdResult> where - E: ParseError<&'a str> + ContextError<&'a str> + Debug, + E: FilterParserError<'a>, T: std::str::FromStr, { match input.parse::() { @@ -156,7 +161,7 @@ impl<'a> ParseContext<'a> { value: &'a str, ) -> IResult<&'a str, FilterCondition, E> where - E: ParseError<&'a str> + ContextError<&'a str> + Debug, + E: FilterParserError<'a>, { let numeric: f64 = self.parse_numeric(value)?; let k = match input { @@ -171,7 +176,7 @@ impl<'a> ParseContext<'a> { fn parse_fid(&'a self, input: &'a str, key: &'a str) -> StdResult> where - E: ParseError<&'a str> + ContextError<&'a str> + Debug, + E: FilterParserError<'a>, { let error = match input.chars().nth(0) { Some(ch) => Err(nom::Err::Failure(E::from_char(input, ch))), @@ -188,7 +193,7 @@ impl<'a> ParseContext<'a> { fn parse_range_condition(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> where - E: ParseError<&'a str> + ContextError<&'a str> + Debug, + E: FilterParserError<'a>, { let (input, (key, from, _, to)) = tuple(( self.ws(|c| self.parse_key(c)), @@ -207,7 +212,7 @@ impl<'a> ParseContext<'a> { fn parse_geo_radius(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> where - E: ParseError<&'a str> + ContextError<&'a str> + Debug, + E: FilterParserError<'a>, { let err_msg_args_incomplete:&'static str = "_geoRadius. The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`"; let err_msg_args_invalid: &'static str = @@ -258,7 +263,7 @@ impl<'a> ParseContext<'a> { fn parse_condition(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> where - E: ParseError<&'a str> + ContextError<&'a str> + Debug, + E: FilterParserError<'a>, { let l1 = |c| self.parse_simple_condition(c); let l2 = |c| self.parse_range_condition(c); @@ -269,7 +274,7 @@ impl<'a> ParseContext<'a> { fn parse_condition_expression(&'a self, input: &'a str) -> IResult<&str, FilterCondition, E> where - E: ParseError<&'a str> + ContextError<&'a str> + Debug, + E: FilterParserError<'a>, { alt(( delimited(self.ws(char('(')), |c| Self::parse_expression(self, c), self.ws(char(')'))), @@ -279,7 +284,7 @@ impl<'a> ParseContext<'a> { fn parse_key(&'a self, input: &'a str) -> IResult<&'a str, &'a str, E> where - E: ParseError<&'a str> + ContextError<&'a str>, + E: FilterParserError<'a>, { let key = |input| take_while1(Self::is_key_component)(input); alt((key, delimited(char('"'), key, char('"'))))(input) @@ -287,7 +292,7 @@ impl<'a> ParseContext<'a> { fn parse_value(&'a self, input: &'a str) -> IResult<&'a str, &'a str, E> where - E: ParseError<&'a str> + ContextError<&'a str> + Debug, + E: FilterParserError<'a>, { let key = |input| take_while1(Self::is_key_component)(input); alt((key, delimited(char('"'), key, char('"'))))(input) @@ -299,7 +304,7 @@ impl<'a> ParseContext<'a> { pub fn parse_expression(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> where - E: ParseError<&'a str> + ContextError<&'a str> + Debug, + E: FilterParserError<'a>, { let a = self.parse_or(input); a From e750465e15aea5f9ca6fabc03b92a8b09f044486 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E7=80=9A=E9=AA=8B?= Date: Thu, 14 Oct 2021 16:12:00 +0800 Subject: [PATCH 13/15] check logic for geolocation. --- milli/src/search/facet/filter_parser.rs | 55 ++++++++++++++++++------- 1 file changed, 41 insertions(+), 14 deletions(-) diff --git a/milli/src/search/facet/filter_parser.rs b/milli/src/search/facet/filter_parser.rs index c635ac9dd..123a80c35 100644 --- a/milli/src/search/facet/filter_parser.rs +++ b/milli/src/search/facet/filter_parser.rs @@ -214,9 +214,14 @@ impl<'a> ParseContext<'a> { where E: FilterParserError<'a>, { - let err_msg_args_incomplete:&'static str = "_geoRadius. The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`"; - let err_msg_args_invalid: &'static str = - "_geoRadius. Latitude and longitude must be contained between -180 to 180 degrees."; + let err_msg_args_incomplete= "_geoRadius. The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`"; + + let err_msg_latitude_invalid = + "_geoRadius. Latitude must be contained between -90 and 90 degrees."; + + let err_msg_longitude_invalid = + "_geoRadius. Longitude must be contained between -180 and 180 degrees."; + let (input, args): (&str, Vec<&str>) = match preceded( tag("_geoRadius"), delimited( @@ -249,12 +254,18 @@ impl<'a> ParseContext<'a> { None => return Ok((input, FilterCondition::Empty)), }; - if let Some(_span) = (!(-181.0..181.).contains(&lat)) - .then(|| &lat) - .or((!(-181.0..181.).contains(&lng)).then(|| &lng)) - { - let e = E::from_char(input, '('); - return Err(nom::Err::Failure(E::add_context(input, err_msg_args_invalid, e))); + if !(-90.0..=90.0).contains(&lat) { + return Err(nom::Err::Failure(E::add_context( + input, + err_msg_latitude_invalid, + E::from_char(input, '('), + ))); + } else if !(-180.0..=180.0).contains(&lng) { + return Err(nom::Err::Failure(E::add_context( + input, + err_msg_longitude_invalid, + E::from_char(input, '('), + ))); } let res = FilterCondition::Operator(fid, GeoLowerThan([lat, lng], dis)); @@ -582,20 +593,36 @@ mod tests { let error = result.unwrap_err(); assert!(error.to_string().contains("The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`")); - // georadius have a bad latitude - let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius(-200, 150, 10)"); + let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius(-100, 150, 10)"); assert!(result.is_err()); let error = result.unwrap_err(); assert!(error .to_string() - .contains("Latitude and longitude must be contained between -180 to 180 degrees.")); + .contains("Latitude must be contained between -90 and 90 degrees.")); + + // georadius have a bad latitude + let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius(-90.0000001, 150, 10)"); + assert!(result.is_err()); + let error = result.unwrap_err(); + assert!(error + .to_string() + .contains("Latitude must be contained between -90 and 90 degrees.")); // georadius have a bad longitude - let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius(-10, 181, 10)"); + let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius(-10, 250, 10)"); assert!(result.is_err()); let error = result.unwrap_err(); assert!(error .to_string() - .contains("Latitude and longitude must be contained between -180 to 180 degrees.")); + .contains("Longitude must be contained between -180 and 180 degrees.")); + + // georadius have a bad longitude + let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius(-10, 180.000001, 10)"); + assert!(result.is_err()); + let error = result.unwrap_err(); + assert!(error + .to_string() + .contains("Longitude must be contained between -180 and 180 degrees.")); + } } From 2ea2f7570c440022398a07b846945f591ed67401 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E7=80=9A=E9=AA=8B?= Date: Thu, 14 Oct 2021 16:46:13 +0800 Subject: [PATCH 14/15] use nightly cargo to format the code --- milli/src/search/facet/filter_condition.rs | 1 - milli/src/search/facet/filter_parser.rs | 28 ++++++++++------------ 2 files changed, 12 insertions(+), 17 deletions(-) diff --git a/milli/src/search/facet/filter_condition.rs b/milli/src/search/facet/filter_condition.rs index 0e98edd2c..c76bb9388 100644 --- a/milli/src/search/facet/filter_condition.rs +++ b/milli/src/search/facet/filter_condition.rs @@ -8,7 +8,6 @@ use nom::error::{convert_error, VerboseError}; use roaring::RoaringBitmap; use self::FilterCondition::*; - use super::filter_parser::{Operator, ParseContext}; use super::FacetNumberRange; use crate::error::{Error, UserError}; diff --git a/milli/src/search/facet/filter_parser.rs b/milli/src/search/facet/filter_parser.rs index 123a80c35..ee8249069 100644 --- a/milli/src/search/facet/filter_parser.rs +++ b/milli/src/search/facet/filter_parser.rs @@ -2,22 +2,18 @@ use std::collections::HashSet; use std::fmt::Debug; use std::result::Result as StdResult; -use super::FilterCondition; -use crate::{FieldId, FieldsIdsMap}; -use nom::{ - branch::alt, - bytes::complete::{tag, take_while1}, - character::complete::{char, multispace0}, - combinator::map, - error::VerboseError, - error::{ContextError, ErrorKind}, - multi::many0, - multi::separated_list1, - sequence::{delimited, preceded, tuple}, - IResult, -}; +use nom::branch::alt; +use nom::bytes::complete::{tag, take_while1}; +use nom::character::complete::{char, multispace0}; +use nom::combinator::map; +use nom::error::{ContextError, ErrorKind, VerboseError}; +use nom::multi::{many0, separated_list1}; +use nom::sequence::{delimited, preceded, tuple}; +use nom::IResult; use self::Operator::*; +use super::FilterCondition; +use crate::{FieldId, FieldsIdsMap}; #[derive(Debug, Clone, PartialEq)] pub enum Operator { GreaterThan(f64), @@ -330,7 +326,8 @@ mod tests { use maplit::hashset; use super::*; - use crate::{update::Settings, Index}; + use crate::update::Settings; + use crate::Index; #[test] fn string() { @@ -623,6 +620,5 @@ mod tests { assert!(error .to_string() .contains("Longitude must be contained between -180 and 180 degrees.")); - } } From 7666e4f34a6c6c73fd231c504c6ea1ef2d12e307 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=88=98=E7=80=9A=E9=AA=8B?= Date: Thu, 14 Oct 2021 21:37:59 +0800 Subject: [PATCH 15/15] follow the suggestions --- milli/Cargo.toml | 2 +- milli/src/search/facet/filter_condition.rs | 4 +--- milli/src/search/facet/filter_parser.rs | 8 +++----- 3 files changed, 5 insertions(+), 9 deletions(-) diff --git a/milli/Cargo.toml b/milli/Cargo.toml index 007d9d415..af8370309 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -39,7 +39,7 @@ tempfile = "3.2.0" uuid = { version = "0.8.2", features = ["v4"] } # facet filter parser -nom = "7" +nom = "7.0.0" # documents words self-join itertools = "0.10.0" diff --git a/milli/src/search/facet/filter_condition.rs b/milli/src/search/facet/filter_condition.rs index c76bb9388..4fedeee69 100644 --- a/milli/src/search/facet/filter_condition.rs +++ b/milli/src/search/facet/filter_condition.rs @@ -24,9 +24,6 @@ pub enum FilterCondition { Empty, } -// impl From - -//for nom impl FilterCondition { pub fn from_array( rtxn: &heed::RoTxn, @@ -72,6 +69,7 @@ impl FilterCondition { Ok(ands) } + pub fn from_str( rtxn: &heed::RoTxn, index: &Index, diff --git a/milli/src/search/facet/filter_parser.rs b/milli/src/search/facet/filter_parser.rs index ee8249069..4d8a54987 100644 --- a/milli/src/search/facet/filter_parser.rs +++ b/milli/src/search/facet/filter_parser.rs @@ -49,6 +49,7 @@ pub trait FilterParserError<'a>: nom::error::ParseError<&'a str> + ContextError<&'a str> + std::fmt::Debug { } + impl<'a> FilterParserError<'a> for VerboseError<&'a str> {} pub struct ParseContext<'a> { @@ -211,7 +212,6 @@ impl<'a> ParseContext<'a> { E: FilterParserError<'a>, { let err_msg_args_incomplete= "_geoRadius. The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`"; - let err_msg_latitude_invalid = "_geoRadius. Latitude must be contained between -90 and 90 degrees."; @@ -275,8 +275,7 @@ impl<'a> ParseContext<'a> { let l1 = |c| self.parse_simple_condition(c); let l2 = |c| self.parse_range_condition(c); let l3 = |c| self.parse_geo_radius(c); - let (input, condition) = alt((l1, l2, l3))(input)?; - Ok((input, condition)) + alt((l1, l2, l3))(input) } fn parse_condition_expression(&'a self, input: &'a str) -> IResult<&str, FilterCondition, E> @@ -313,8 +312,7 @@ impl<'a> ParseContext<'a> { where E: FilterParserError<'a>, { - let a = self.parse_or(input); - a + self.parse_or(input) } }