From c27870e76511c1f109b31ccb2ccafb5dc07dcfc3 Mon Sep 17 00:00:00 2001 From: Tamo Date: Fri, 22 Oct 2021 14:33:18 +0200 Subject: [PATCH] integrate a first version without any error handling --- filter_parser/src/condition.rs | 8 +- filter_parser/src/lib.rs | 60 +- filter_parser/src/value.rs | 7 +- milli/Cargo.toml | 2 +- milli/src/lib.rs | 2 +- milli/src/search/facet/filter_condition.rs | 547 +++++++++++-- milli/src/search/facet/filter_parser.rs | 891 --------------------- milli/src/search/facet/mod.rs | 3 +- milli/src/search/mod.rs | 6 +- 9 files changed, 507 insertions(+), 1019 deletions(-) delete mode 100644 milli/src/search/facet/filter_parser.rs diff --git a/filter_parser/src/condition.rs b/filter_parser/src/condition.rs index 5a1bb62be..75ee8c6f7 100644 --- a/filter_parser/src/condition.rs +++ b/filter_parser/src/condition.rs @@ -12,6 +12,7 @@ use nom::branch::alt; use nom::bytes::complete::tag; +use nom::error::ParseError; use nom::sequence::tuple; use nom::IResult; use Condition::*; @@ -46,14 +47,15 @@ impl<'a> Condition<'a> { } /// condition = value ("==" | ">" ...) value -pub fn parse_condition(input: Span) -> IResult { +pub fn parse_condition<'a, E: ParseError>>( + input: Span<'a>, +) -> IResult, FilterCondition, E> { let operator = alt((tag("<="), tag(">="), tag("!="), tag("<"), tag(">"), tag("="))); let (input, (key, op, value)) = tuple((|c| parse_value(c), operator, |c| parse_value(c)))(input)?; let fid = key; - // TODO match *op.fragment() { "=" => { let k = FilterCondition::Condition { fid, op: Equal(value) }; @@ -78,7 +80,7 @@ pub fn parse_condition(input: Span) -> IResult { } /// to = value value TO value -pub fn parse_to(input: Span) -> IResult { +pub fn parse_to<'a, E: ParseError>>(input: Span<'a>) -> IResult { let (input, (key, from, _, to)) = tuple((ws(|c| parse_value(c)), ws(|c| parse_value(c)), tag("TO"), ws(|c| parse_value(c))))( input, diff --git a/filter_parser/src/lib.rs b/filter_parser/src/lib.rs index 096a9e26e..bb826872f 100644 --- a/filter_parser/src/lib.rs +++ b/filter_parser/src/lib.rs @@ -27,12 +27,12 @@ use nom::combinator::map; use nom::error::{ContextError, ParseError}; use nom::multi::{many0, separated_list1}; use nom::number::complete::recognize_float; -use nom::sequence::{delimited, preceded}; +use nom::sequence::{delimited, preceded, tuple}; use nom::IResult; use nom_locate::LocatedSpan; pub(crate) use value::parse_value; -type Span<'a> = LocatedSpan<&'a str>; +pub type Span<'a> = LocatedSpan<&'a str>; #[derive(Debug, Clone, PartialEq, Eq)] pub struct Token<'a> { @@ -82,21 +82,22 @@ impl<'a> FilterCondition<'a> { } } - pub fn parse(input: &'a str) -> IResult { + pub fn parse>>(input: &'a str) -> Result { let span = Span::new(input); - parse_expression(span) + // handle error + Ok(parse_expression::<'a, E>(span).map(|(_rem, output)| output).ok().unwrap()) } } // remove OPTIONAL whitespaces before AND after the the provided parser -fn ws<'a, O>( - inner: impl FnMut(Span<'a>) -> IResult, -) -> impl FnMut(Span<'a>) -> IResult { +fn ws<'a, O, E: ParseError>>( + inner: impl FnMut(Span<'a>) -> IResult, +) -> impl FnMut(Span<'a>) -> IResult { delimited(multispace0, inner, multispace0) } /// and = not (~ "AND" not)* -fn parse_or(input: Span) -> IResult { +fn parse_or<'a, E: ParseError>>(input: Span<'a>) -> IResult { let (input, lhs) = parse_and(input)?; let (input, ors) = many0(preceded(ws(tag("OR")), |c| parse_and(c)))(input)?; @@ -106,7 +107,7 @@ fn parse_or(input: Span) -> IResult { Ok((input, expr)) } -fn parse_and(input: Span) -> IResult { +fn parse_and<'a, E: ParseError>>(input: Span<'a>) -> IResult { let (input, lhs) = parse_not(input)?; let (input, ors) = many0(preceded(ws(tag("AND")), |c| parse_not(c)))(input)?; let expr = ors @@ -116,15 +117,17 @@ fn parse_and(input: Span) -> IResult { } /// not = ("NOT" | "!") not | primary -fn parse_not(input: Span) -> IResult { +fn parse_not<'a, E: ParseError>>(input: Span<'a>) -> IResult { alt((map(preceded(alt((tag("!"), tag("NOT"))), |c| parse_not(c)), |e| e.negate()), |c| { parse_primary(c) }))(input) } /// geoRadius = WS* ~ "_geoRadius(float ~ "," ~ float ~ "," float) -fn parse_geo_radius(input: Span) -> IResult { - let err_msg_args_incomplete = "_geoRadius. The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`"; +fn parse_geo_radius<'a, E: ParseError>>( + input: Span<'a>, +) -> IResult, FilterCondition, E> { + // let err_msg_args_incomplete = "_geoRadius. The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`"; /* TODO let err_msg_latitude_invalid = @@ -134,30 +137,25 @@ fn parse_geo_radius(input: Span) -> IResult { "_geoRadius. Longitude must be contained between -180 and 180 degrees."; */ + // we want to forbid space BEFORE the _geoRadius but not after let parsed = preceded::<_, _, _, _, _, _>( - // TODO: forbid spaces between _geoRadius and parenthesis - ws(tag("_geoRadius")), + tuple((multispace0, tag("_geoRadius"))), delimited(char('('), separated_list1(tag(","), ws(|c| recognize_float(c))), char(')')), )(input); - let (input, args): (Span, Vec) = match parsed { - Ok(e) => e, - Err(_e) => { - return Err(nom::Err::Failure(nom::error::Error::add_context( - input, - err_msg_args_incomplete, - nom::error::Error::from_char(input, '('), - ))); - } - }; + let (input, args): (Span, Vec) = parsed?; if args.len() != 3 { + // TODO + panic!("todo"); + /* let e = nom::error::Error::from_char(input, '('); return Err(nom::Err::Failure(nom::error::Error::add_context( input, err_msg_args_incomplete, e, ))); + */ } let res = FilterCondition::GeoLowerThan { @@ -168,7 +166,9 @@ fn parse_geo_radius(input: Span) -> IResult { } /// primary = (WS* ~ "(" expression ")" ~ WS*) | condition | to | geoRadius -fn parse_primary(input: Span) -> IResult { +fn parse_primary<'a, E: ParseError>>( + input: Span<'a>, +) -> IResult { alt(( delimited(ws(char('(')), |c| parse_expression(c), ws(char(')'))), |c| parse_condition(c), @@ -178,12 +178,16 @@ fn parse_primary(input: Span) -> IResult { } /// expression = or -pub fn parse_expression(input: Span) -> IResult { +pub fn parse_expression<'a, E: ParseError>>( + input: Span<'a>, +) -> IResult { parse_or(input) } #[cfg(test)] pub mod tests { + use nom::error::Error; + use super::*; /// Create a raw [Token]. You must specify the string that appear BEFORE your element followed by your element @@ -469,7 +473,7 @@ pub mod tests { ]; for (input, expected) in test_case { - let result = Fc::parse(input); + let result = Fc::parse::>(input); assert!( result.is_ok(), @@ -477,7 +481,7 @@ pub mod tests { expected, result.unwrap_err() ); - let filter = result.unwrap().1; + let filter = result.unwrap(); assert_eq!(filter, expected, "Filter `{}` failed.", input); } } diff --git a/filter_parser/src/value.rs b/filter_parser/src/value.rs index c36becf7e..1497aaddd 100644 --- a/filter_parser/src/value.rs +++ b/filter_parser/src/value.rs @@ -1,13 +1,14 @@ use nom::branch::alt; use nom::bytes::complete::{take_till, take_while1}; use nom::character::complete::char; +use nom::error::ParseError; use nom::sequence::delimited; use nom::IResult; use crate::{ws, Span, Token}; /// value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS* -pub fn parse_value(input: Span) -> IResult { +pub fn parse_value<'a, E: ParseError>>(input: Span<'a>) -> IResult, Token, E> { // singleQuoted = "'" .* all but quotes "'" let simple_quoted_key = |input| take_till(|c: char| c == '\'')(input); // doubleQuoted = "\"" (word | spaces)* "\"" @@ -29,6 +30,8 @@ fn is_key_component(c: char) -> bool { #[cfg(test)] pub mod tests { + use nom::error::Error; + use super::*; use crate::tests::rtok; @@ -56,7 +59,7 @@ pub mod tests { for (input, expected) in test_case { let input = Span::new(input); - let result = parse_value(input); + let result = parse_value::>(input); assert!( result.is_ok(), diff --git a/milli/Cargo.toml b/milli/Cargo.toml index 63fd0d984..3fc53492f 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -39,8 +39,8 @@ tempfile = "3.2.0" uuid = { version = "0.8.2", features = ["v4"] } # facet filter parser +filter_parser = { path = "../filter_parser" } nom = "7.0.0" -nom_locate = "4.0.0" # documents words self-join itertools = "0.10.0" diff --git a/milli/src/lib.rs b/milli/src/lib.rs index 6fe5947f5..27453bf36 100644 --- a/milli/src/lib.rs +++ b/milli/src/lib.rs @@ -34,7 +34,7 @@ pub use self::heed_codec::{ RoaringBitmapLenCodec, StrBEU32Codec, StrStrU8Codec, }; pub use self::index::Index; -pub use self::search::{FacetDistribution, FilterCondition, MatchingWords, Search, SearchResult}; +pub use self::search::{FacetDistribution, Filter, MatchingWords, Search, SearchResult}; pub type Result = std::result::Result; diff --git a/milli/src/search/facet/filter_condition.rs b/milli/src/search/facet/filter_condition.rs index 5c57adb88..50caf4eac 100644 --- a/milli/src/search/facet/filter_condition.rs +++ b/milli/src/search/facet/filter_condition.rs @@ -2,13 +2,12 @@ use std::fmt::Debug; use std::ops::Bound::{self, Excluded, Included}; use either::Either; +use filter_parser::{Condition, FilterCondition, Span, Token}; use heed::types::DecodeIgnore; use log::debug; use nom::error::{convert_error, VerboseError}; use roaring::RoaringBitmap; -use self::FilterCondition::*; -use super::filter_parser::{Operator, ParseContext}; use super::FacetNumberRange; use crate::error::{Error, UserError}; use crate::heed_codec::facet::{ @@ -17,24 +16,19 @@ use crate::heed_codec::facet::{ use crate::{distance_between_two_points, CboRoaringBitmapCodec, FieldId, Index, Result}; #[derive(Debug, Clone)] -pub enum FilterCondition<'a> { - Operator(FieldId, Operator<'a>), - Or(Box, Box), - And(Box, Box), - Empty, +pub struct Filter<'a> { + condition: FilterCondition<'a>, } -impl<'a> FilterCondition<'a> { - pub fn from_array( +impl<'a> Filter<'a> { + pub fn from_array( rtxn: &heed::RoTxn, index: &Index, array: I, ) -> Result>> where - I: IntoIterator>, - J: IntoIterator, - A: AsRef, - B: AsRef, + I: IntoIterator>, + J: IntoIterator, { let mut ands: Option = None; @@ -43,24 +37,32 @@ impl<'a> FilterCondition<'a> { Either::Left(array) => { let mut ors = None; for rule in array { - let condition = FilterCondition::from_str(rtxn, index, rule.as_ref())?; + let condition = + FilterCondition::parse::>(rule.as_ref()).unwrap(); ors = match ors.take() { - Some(ors) => Some(Or(Box::new(ors), Box::new(condition))), + Some(ors) => { + Some(FilterCondition::Or(Box::new(ors), Box::new(condition))) + } None => Some(condition), }; } if let Some(rule) = ors { ands = match ands.take() { - Some(ands) => Some(And(Box::new(ands), Box::new(rule))), + Some(ands) => { + Some(FilterCondition::And(Box::new(ands), Box::new(rule))) + } None => Some(rule), }; } } Either::Right(rule) => { - let condition = FilterCondition::from_str(rtxn, index, rule.as_ref())?; + let condition = + FilterCondition::parse::>(rule.as_ref()).unwrap(); ands = match ands.take() { - Some(ands) => Some(And(Box::new(ands), Box::new(condition))), + Some(ands) => { + Some(FilterCondition::And(Box::new(ands), Box::new(condition))) + } None => Some(condition), }; } @@ -70,17 +72,14 @@ impl<'a> FilterCondition<'a> { Ok(ands) } - pub fn from_str( - rtxn: &heed::RoTxn, - index: &Index, - expression: &'a str, - ) -> Result> { + pub fn from_str(rtxn: &heed::RoTxn, index: &Index, expression: &'a str) -> Result { let fields_ids_map = index.fields_ids_map(rtxn)?; let filterable_fields = index.filterable_fields(rtxn)?; - let ctx = - ParseContext { fields_ids_map: &fields_ids_map, filterable_fields: &filterable_fields }; - match ctx.parse_expression::>(expression) { - Ok((_, fc)) => Ok(fc), + // TODO TAMO + let condition = FilterCondition::parse::>(expression).ok().unwrap(); + /* + let condition = match FilterCondition::parse::>(expression) { + Ok(fc) => Ok(fc), Err(e) => { let ve = match e { nom::Err::Error(x) => x, @@ -88,25 +87,16 @@ impl<'a> FilterCondition<'a> { _ => unreachable!(), }; Err(Error::UserError(UserError::InvalidFilter { - input: convert_error(expression, ve).to_string(), + input: convert_error(Span::new(expression), ve).to_string(), })) } - } - } - pub fn negate(self) -> FilterCondition<'a> { - match self { - Operator(fid, op) => match op.negate() { - (op, None) => Operator(fid, op), - (a, Some(b)) => Or(Box::new(Operator(fid, a)), Box::new(Operator(fid, b))), - }, - Or(a, b) => And(Box::new(a.negate()), Box::new(b.negate())), - And(a, b) => Or(Box::new(a.negate()), Box::new(b.negate())), - Empty => Empty, - } + }; + */ + Ok(Self { condition }) } } -impl<'a> FilterCondition<'a> { +impl<'a> Filter<'a> { /// Aggregates the documents ids that are part of the specified range automatically /// going deeper through the levels. fn explore_facet_number_levels( @@ -221,20 +211,33 @@ impl<'a> FilterCondition<'a> { numbers_db: heed::Database, strings_db: heed::Database, field_id: FieldId, - operator: &Operator<'a>, + operator: &Condition<'a>, ) -> Result { // Make sure we always bound the ranges with the field id and the level, // as the facets values are all in the same database and prefixed by the // field id and the level. + // TODO TAMO: return good error when we can't parse a span let (left, right) = match operator { - Operator::GreaterThan(val) => (Excluded(*val), Included(f64::MAX)), - Operator::GreaterThanOrEqual(val) => (Included(*val), Included(f64::MAX)), - Operator::Equal(number, string) => { + Condition::GreaterThan(val) => { + (Excluded(val.inner.parse::().unwrap()), Included(f64::MAX)) + } + Condition::GreaterThanOrEqual(val) => { + (Included(val.inner.parse::().unwrap()), Included(f64::MAX)) + } + Condition::LowerThan(val) => (Included(f64::MIN), Excluded(val.inner.parse().unwrap())), + Condition::LowerThanOrEqual(val) => { + (Included(f64::MIN), Included(val.inner.parse().unwrap())) + } + Condition::Between { from, to } => { + (Included(from.inner.parse::().unwrap()), Included(to.inner.parse().unwrap())) + } + Condition::Equal(val) => { let (_original_value, string_docids) = - strings_db.get(rtxn, &(field_id, &string))?.unwrap_or_default(); + strings_db.get(rtxn, &(field_id, val.inner))?.unwrap_or_default(); + let number = val.inner.parse::().ok(); let number_docids = match number { Some(n) => { - let n = Included(*n); + let n = Included(n); let mut output = RoaringBitmap::new(); Self::explore_facet_number_levels( rtxn, @@ -251,50 +254,49 @@ impl<'a> FilterCondition<'a> { }; return Ok(string_docids | number_docids); } - Operator::NotEqual(number, string) => { + Condition::NotEqual(val) => { + let number = val.inner.parse::().ok(); let all_numbers_ids = if number.is_some() { index.number_faceted_documents_ids(rtxn, field_id)? } else { RoaringBitmap::new() }; let all_strings_ids = index.string_faceted_documents_ids(rtxn, field_id)?; - let operator = Operator::Equal(*number, string.clone()); + let operator = Condition::Equal(val.clone()); let docids = Self::evaluate_operator( rtxn, index, numbers_db, strings_db, field_id, &operator, )?; return Ok((all_numbers_ids | all_strings_ids) - docids); - } - Operator::LowerThan(val) => (Included(f64::MIN), Excluded(*val)), - Operator::LowerThanOrEqual(val) => (Included(f64::MIN), Included(*val)), - Operator::Between(left, right) => (Included(*left), Included(*right)), - Operator::GeoLowerThan(base_point, distance) => { - let rtree = match index.geo_rtree(rtxn)? { - Some(rtree) => rtree, - None => return Ok(RoaringBitmap::new()), - }; + } /* + Condition::GeoLowerThan(base_point, distance) => { + let rtree = match index.geo_rtree(rtxn)? { + Some(rtree) => rtree, + None => return Ok(RoaringBitmap::new()), + }; - let result = rtree - .nearest_neighbor_iter(base_point) - .take_while(|point| { - distance_between_two_points(base_point, point.geom()) < *distance - }) - .map(|point| point.data) - .collect(); + let result = rtree + .nearest_neighbor_iter(base_point) + .take_while(|point| { + distance_between_two_points(base_point, point.geom()) < *distance + }) + .map(|point| point.data) + .collect(); - return Ok(result); - } - Operator::GeoGreaterThan(point, distance) => { - let result = Self::evaluate_operator( - rtxn, - index, - numbers_db, - strings_db, - field_id, - &Operator::GeoLowerThan(point.clone(), *distance), - )?; - let geo_faceted_doc_ids = index.geo_faceted_documents_ids(rtxn)?; - return Ok(geo_faceted_doc_ids - result); - } + return Ok(result); + } + Condition::GeoGreaterThan(point, distance) => { + let result = Self::evaluate_operator( + rtxn, + index, + numbers_db, + strings_db, + field_id, + &Condition::GeoLowerThan(point.clone(), *distance), + )?; + let geo_faceted_doc_ids = index.geo_faceted_documents_ids(rtxn)?; + return Ok(geo_faceted_doc_ids - result); + } + */ }; // Ask for the biggest value that can exist for this specific field, if it exists @@ -326,21 +328,390 @@ impl<'a> FilterCondition<'a> { let numbers_db = index.facet_id_f64_docids; let strings_db = index.facet_id_string_docids; - match self { - Operator(fid, op) => { - Self::evaluate_operator(rtxn, index, numbers_db, strings_db, *fid, op) + match &self.condition { + FilterCondition::Condition { fid, op } => { + // TODO: parse fid + let _ = fid; + let fid = 42; + Self::evaluate_operator(rtxn, index, numbers_db, strings_db, fid, &op) } - Or(lhs, rhs) => { - let lhs = lhs.evaluate(rtxn, index)?; - let rhs = rhs.evaluate(rtxn, index)?; + FilterCondition::Or(lhs, rhs) => { + let lhs = Self::evaluate(&(lhs.as_ref().clone()).into(), rtxn, index)?; + let rhs = Self::evaluate(&(rhs.as_ref().clone()).into(), rtxn, index)?; Ok(lhs | rhs) } - And(lhs, rhs) => { - let lhs = lhs.evaluate(rtxn, index)?; - let rhs = rhs.evaluate(rtxn, index)?; + FilterCondition::And(lhs, rhs) => { + let lhs = Self::evaluate(&(lhs.as_ref().clone()).into(), rtxn, index)?; + let rhs = Self::evaluate(&(rhs.as_ref().clone()).into(), rtxn, index)?; Ok(lhs & rhs) } Empty => Ok(RoaringBitmap::new()), } } } + +impl<'a> From> for Filter<'a> { + fn from(fc: FilterCondition<'a>) -> Self { + Self { condition: fc } + } +} + +#[cfg(test)] +mod tests { + use big_s::S; + use either::Either; + use heed::EnvOpenOptions; + use maplit::hashset; + + use super::*; + use crate::update::Settings; + use crate::Index; + + #[test] + fn number() { + let path = tempfile::tempdir().unwrap(); + let mut options = EnvOpenOptions::new(); + options.map_size(10 * 1024 * 1024); // 10 MB + let index = Index::new(options, &path).unwrap(); + + // Set the filterable fields to be the channel. + let mut wtxn = index.write_txn().unwrap(); + let mut map = index.fields_ids_map(&wtxn).unwrap(); + map.insert("timestamp"); + index.put_fields_ids_map(&mut wtxn, &map).unwrap(); + let mut builder = Settings::new(&mut wtxn, &index, 0); + builder.set_filterable_fields(hashset! { "timestamp".into() }); + builder.execute(|_, _| ()).unwrap(); + wtxn.commit().unwrap(); + + // Test that the facet condition is correctly generated. + let rtxn = index.read_txn().unwrap(); + let condition = FilterCondition::from_str(&rtxn, &index, "timestamp 22 TO 44").unwrap(); + let expected = FilterCondition::Operator(0, Between(22.0, 44.0)); + assert_eq!(condition, expected); + + let condition = FilterCondition::from_str(&rtxn, &index, "NOT timestamp 22 TO 44").unwrap(); + let expected = FilterCondition::Or( + Box::new(FilterCondition::Operator(0, LowerThan(22.0))), + Box::new(FilterCondition::Operator(0, GreaterThan(44.0))), + ); + assert_eq!(condition, expected); + } + + #[test] + fn compare() { + let path = tempfile::tempdir().unwrap(); + let mut options = EnvOpenOptions::new(); + options.map_size(10 * 1024 * 1024); // 10 MB + let index = Index::new(options, &path).unwrap(); + + let mut wtxn = index.write_txn().unwrap(); + let mut builder = Settings::new(&mut wtxn, &index, 0); + builder.set_searchable_fields(vec![S("channel"), S("timestamp"), S("id")]); // to keep the fields order + builder.set_filterable_fields(hashset! { S("channel"), S("timestamp") ,S("id")}); + builder.execute(|_, _| ()).unwrap(); + wtxn.commit().unwrap(); + + let rtxn = index.read_txn().unwrap(); + let condition = FilterCondition::from_str(&rtxn, &index, "channel < 20").unwrap(); + let expected = FilterCondition::Operator(0, LowerThan(20.0)); + assert_eq!(condition, expected); + + let rtxn = index.read_txn().unwrap(); + let condition = FilterCondition::from_str(&rtxn, &index, "id < 200").unwrap(); + let expected = FilterCondition::Operator(2, LowerThan(200.0)); + assert_eq!(condition, expected); + } + + #[test] + fn parentheses() { + let path = tempfile::tempdir().unwrap(); + let mut options = EnvOpenOptions::new(); + options.map_size(10 * 1024 * 1024); // 10 MB + let index = Index::new(options, &path).unwrap(); + + // Set the filterable fields to be the channel. + let mut wtxn = index.write_txn().unwrap(); + let mut builder = Settings::new(&mut wtxn, &index, 0); + builder.set_searchable_fields(vec![S("channel"), S("timestamp")]); // to keep the fields order + builder.set_filterable_fields(hashset! { S("channel"), S("timestamp") }); + builder.execute(|_, _| ()).unwrap(); + wtxn.commit().unwrap(); + + // Test that the facet condition is correctly generated. + let rtxn = index.read_txn().unwrap(); + let condition = FilterCondition::from_str( + &rtxn, + &index, + "channel = gotaga OR (timestamp 22 TO 44 AND channel != ponce)", + ) + .unwrap(); + let expected = FilterCondition::Or( + Box::new(FilterCondition::Operator(0, Operator::Equal(None, S("gotaga")))), + Box::new(FilterCondition::And( + Box::new(FilterCondition::Operator(1, Between(22.0, 44.0))), + Box::new(FilterCondition::Operator(0, Operator::NotEqual(None, S("ponce")))), + )), + ); + assert_eq!(condition, expected); + + let condition = FilterCondition::from_str( + &rtxn, + &index, + "channel = gotaga OR NOT (timestamp 22 TO 44 AND channel != ponce)", + ) + .unwrap(); + let expected = FilterCondition::Or( + Box::new(FilterCondition::Operator(0, Operator::Equal(None, S("gotaga")))), + Box::new(FilterCondition::Or( + Box::new(FilterCondition::Or( + Box::new(FilterCondition::Operator(1, LowerThan(22.0))), + Box::new(FilterCondition::Operator(1, GreaterThan(44.0))), + )), + Box::new(FilterCondition::Operator(0, Operator::Equal(None, S("ponce")))), + )), + ); + assert_eq!(condition, expected); + } + + #[test] + fn from_array() { + let path = tempfile::tempdir().unwrap(); + let mut options = EnvOpenOptions::new(); + options.map_size(10 * 1024 * 1024); // 10 MB + let index = Index::new(options, &path).unwrap(); + + // Set the filterable fields to be the channel. + let mut wtxn = index.write_txn().unwrap(); + let mut builder = Settings::new(&mut wtxn, &index, 0); + builder.set_searchable_fields(vec![S("channel"), S("timestamp")]); // to keep the fields order + builder.set_filterable_fields(hashset! { S("channel"), S("timestamp") }); + builder.execute(|_, _| ()).unwrap(); + wtxn.commit().unwrap(); + + // Simple array with Left + let rtxn = index.read_txn().unwrap(); + let condition = FilterCondition::from_array::<_, _, _, &str>( + &rtxn, + &index, + vec![Either::Left(["channel = mv"])], + ) + .unwrap() + .unwrap(); + let expected = FilterCondition::from_str(&rtxn, &index, "channel = mv").unwrap(); + assert_eq!(condition, expected); + + // Simple array with Right + let rtxn = index.read_txn().unwrap(); + let condition = FilterCondition::from_array::<_, Option<&str>, _, _>( + &rtxn, + &index, + vec![Either::Right("channel = mv")], + ) + .unwrap() + .unwrap(); + let expected = FilterCondition::from_str(&rtxn, &index, "channel = mv").unwrap(); + assert_eq!(condition, expected); + + // Array with Left and escaped quote + let rtxn = index.read_txn().unwrap(); + let condition = FilterCondition::from_array::<_, _, _, &str>( + &rtxn, + &index, + vec![Either::Left(["channel = \"Mister Mv\""])], + ) + .unwrap() + .unwrap(); + let expected = FilterCondition::from_str(&rtxn, &index, "channel = \"Mister Mv\"").unwrap(); + assert_eq!(condition, expected); + + // Array with Right and escaped quote + let rtxn = index.read_txn().unwrap(); + let condition = FilterCondition::from_array::<_, Option<&str>, _, _>( + &rtxn, + &index, + vec![Either::Right("channel = \"Mister Mv\"")], + ) + .unwrap() + .unwrap(); + let expected = FilterCondition::from_str(&rtxn, &index, "channel = \"Mister Mv\"").unwrap(); + assert_eq!(condition, expected); + + // Array with Left and escaped simple quote + let rtxn = index.read_txn().unwrap(); + let condition = FilterCondition::from_array::<_, _, _, &str>( + &rtxn, + &index, + vec![Either::Left(["channel = 'Mister Mv'"])], + ) + .unwrap() + .unwrap(); + let expected = FilterCondition::from_str(&rtxn, &index, "channel = 'Mister Mv'").unwrap(); + assert_eq!(condition, expected); + + // Array with Right and escaped simple quote + let rtxn = index.read_txn().unwrap(); + let condition = FilterCondition::from_array::<_, Option<&str>, _, _>( + &rtxn, + &index, + vec![Either::Right("channel = 'Mister Mv'")], + ) + .unwrap() + .unwrap(); + let expected = FilterCondition::from_str(&rtxn, &index, "channel = 'Mister Mv'").unwrap(); + assert_eq!(condition, expected); + + // Simple with parenthesis + let rtxn = index.read_txn().unwrap(); + let condition = FilterCondition::from_array::<_, _, _, &str>( + &rtxn, + &index, + vec![Either::Left(["(channel = mv)"])], + ) + .unwrap() + .unwrap(); + let expected = FilterCondition::from_str(&rtxn, &index, "(channel = mv)").unwrap(); + assert_eq!(condition, expected); + + // Test that the facet condition is correctly generated. + let rtxn = index.read_txn().unwrap(); + let condition = FilterCondition::from_array( + &rtxn, + &index, + vec![ + Either::Right("channel = gotaga"), + Either::Left(vec!["timestamp = 44", "channel != ponce"]), + ], + ) + .unwrap() + .unwrap(); + let expected = FilterCondition::from_str( + &rtxn, + &index, + "channel = gotaga AND (timestamp = 44 OR channel != ponce)", + ) + .unwrap(); + assert_eq!(condition, expected); + } + + #[test] + fn geo_radius() { + let path = tempfile::tempdir().unwrap(); + let mut options = EnvOpenOptions::new(); + options.map_size(10 * 1024 * 1024); // 10 MB + let index = Index::new(options, &path).unwrap(); + + // Set the filterable fields to be the channel. + let mut wtxn = index.write_txn().unwrap(); + let mut builder = Settings::new(&mut wtxn, &index, 0); + builder.set_searchable_fields(vec![S("_geo"), S("price")]); // to keep the fields order + builder.set_filterable_fields(hashset! { S("_geo"), S("price") }); + builder.execute(|_, _| ()).unwrap(); + wtxn.commit().unwrap(); + + let rtxn = index.read_txn().unwrap(); + // basic test + let condition = + FilterCondition::from_str(&rtxn, &index, "_geoRadius(12, 13.0005, 2000)").unwrap(); + let expected = FilterCondition::Operator(0, GeoLowerThan([12., 13.0005], 2000.)); + assert_eq!(condition, expected); + + // test the negation of the GeoLowerThan + let condition = + FilterCondition::from_str(&rtxn, &index, "NOT _geoRadius(50, 18, 2000.500)").unwrap(); + let expected = FilterCondition::Operator(0, GeoGreaterThan([50., 18.], 2000.500)); + assert_eq!(condition, expected); + + // composition of multiple operations + let condition = FilterCondition::from_str( + &rtxn, + &index, + "(NOT _geoRadius(1, 2, 300) AND _geoRadius(1.001, 2.002, 1000.300)) OR price <= 10", + ) + .unwrap(); + let expected = FilterCondition::Or( + Box::new(FilterCondition::And( + Box::new(FilterCondition::Operator(0, GeoGreaterThan([1., 2.], 300.))), + Box::new(FilterCondition::Operator(0, GeoLowerThan([1.001, 2.002], 1000.300))), + )), + Box::new(FilterCondition::Operator(1, LowerThanOrEqual(10.))), + ); + assert_eq!(condition, expected); + } + + #[test] + fn geo_radius_error() { + let path = tempfile::tempdir().unwrap(); + let mut options = EnvOpenOptions::new(); + options.map_size(10 * 1024 * 1024); // 10 MB + let index = Index::new(options, &path).unwrap(); + + // Set the filterable fields to be the channel. + let mut wtxn = index.write_txn().unwrap(); + let mut builder = Settings::new(&mut wtxn, &index, 0); + builder.set_searchable_fields(vec![S("_geo"), S("price")]); // to keep the fields order + builder.set_filterable_fields(hashset! { S("_geo"), S("price") }); + builder.execute(|_, _| ()).unwrap(); + wtxn.commit().unwrap(); + + let rtxn = index.read_txn().unwrap(); + + // georadius don't have any parameters + let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius"); + assert!(result.is_err()); + let error = result.unwrap_err(); + assert!(error.to_string().contains("The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`")); + + // georadius don't have any parameters + let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius()"); + assert!(result.is_err()); + let error = result.unwrap_err(); + assert!(error.to_string().contains("The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`")); + + // georadius don't have enough parameters + let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius(1, 2)"); + assert!(result.is_err()); + let error = result.unwrap_err(); + assert!(error.to_string().contains("The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`")); + + // georadius have too many parameters + let result = + FilterCondition::from_str(&rtxn, &index, "_geoRadius(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)"); + assert!(result.is_err()); + let error = result.unwrap_err(); + assert!(error.to_string().contains("The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`")); + + let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius(-100, 150, 10)"); + assert!(result.is_err()); + let error = result.unwrap_err(); + assert!( + error.to_string().contains("Latitude must be contained between -90 and 90 degrees."), + "{}", + error.to_string() + ); + + // georadius have a bad latitude + let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius(-90.0000001, 150, 10)"); + assert!(result.is_err()); + let error = result.unwrap_err(); + assert!(error + .to_string() + .contains("Latitude must be contained between -90 and 90 degrees.")); + + // georadius have a bad longitude + let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius(-10, 250, 10)"); + assert!(result.is_err()); + let error = result.unwrap_err(); + assert!(error + .to_string() + .contains("Longitude must be contained between -180 and 180 degrees.")); + + // georadius have a bad longitude + let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius(-10, 180.000001, 10)"); + assert!(result.is_err()); + let error = result.unwrap_err(); + assert!(error + .to_string() + .contains("Longitude must be contained between -180 and 180 degrees.")); + } +} diff --git a/milli/src/search/facet/filter_parser.rs b/milli/src/search/facet/filter_parser.rs deleted file mode 100644 index c25d523aa..000000000 --- a/milli/src/search/facet/filter_parser.rs +++ /dev/null @@ -1,891 +0,0 @@ -//! BNF grammar: -//! -//! ```text -//! expression = or -//! or = and (~ "OR" ~ and) -//! and = not (~ "AND" not)* -//! not = ("NOT" | "!") not | primary -//! primary = (WS* ~ "(" expression ")" ~ WS*) | condition | to | geoRadius -//! to = value value TO value -//! condition = value ("==" | ">" ...) value -//! value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS* -//! singleQuoted = "'" .* all but quotes "'" -//! doubleQuoted = "\"" (word | spaces)* "\"" -//! word = (alphanumeric | _ | - | .)+ -//! geoRadius = WS* ~ "_geoRadius(float ~ "," ~ float ~ "," float) -//! ``` - -use std::collections::HashSet; -use std::fmt::Debug; -use std::result::Result as StdResult; - -use nom::branch::alt; -use nom::bytes::complete::{tag, take_till, take_while1}; -use nom::character::complete::{char, multispace0}; -use nom::combinator::map; -use nom::error::{ContextError, ErrorKind, VerboseError}; -use nom::multi::{many0, separated_list1}; -use nom::number::complete::recognize_float; -use nom::sequence::{delimited, preceded, tuple}; -use nom::IResult; -use nom_locate::LocatedSpan; - -use self::Operator::*; -use super::FilterCondition; -use crate::{FieldId, FieldsIdsMap}; - -pub enum FilterError { - AttributeNotFilterable(String), -} - -#[derive(Debug, Clone, PartialEq, Eq)] -struct Token<'a> { - pub position: Span<'a>, - pub inner: &'a str, -} - -type Span<'a> = LocatedSpan<&'a str>; - -#[derive(Debug, Clone)] -pub enum Operator<'a> { - GreaterThan(Token<'a>), - GreaterThanOrEqual(Token<'a>), - Equal(Option>, Token<'a>), - NotEqual(Option>, Token<'a>), - LowerThan(Token<'a>), - LowerThanOrEqual(Token<'a>), - Between(Token<'a>, Token<'a>), - GeoLowerThan([Token<'a>; 2], Token<'a>), - GeoGreaterThan([Token<'a>; 2], Token<'a>), -} - -impl<'a> Operator<'a> { - /// This method can return two operations in case it must express - /// an OR operation for the between case (i.e. `TO`). - pub fn negate(self) -> (Self, Option) { - match self { - GreaterThan(n) => (LowerThanOrEqual(n), None), - GreaterThanOrEqual(n) => (LowerThan(n), None), - Equal(n, s) => (NotEqual(n, s), None), - NotEqual(n, s) => (Equal(n, s), None), - LowerThan(n) => (GreaterThanOrEqual(n), None), - LowerThanOrEqual(n) => (GreaterThan(n), None), - Between(n, m) => (LowerThan(n), Some(GreaterThan(m))), - GeoLowerThan(point, distance) => (GeoGreaterThan(point, distance), None), - GeoGreaterThan(point, distance) => (GeoLowerThan(point, distance), None), - } - } -} - -pub trait FilterParserError<'a>: - nom::error::ParseError<&'a str> + ContextError<&'a str> + std::fmt::Debug -{ -} - -impl<'a> FilterParserError<'a> for VerboseError<&'a str> {} - -pub struct ParseContext<'a> { - pub fields_ids_map: &'a FieldsIdsMap, - pub filterable_fields: &'a HashSet, -} - -impl<'a> ParseContext<'a> { - /// and = not (~ "AND" not)* - fn parse_or(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> - where - E: FilterParserError<'a>, - { - let (input, lhs) = self.parse_and(input)?; - let (input, ors) = - many0(preceded(self.ws(tag("OR")), |c| Self::parse_and(self, c)))(input)?; - - let expr = ors - .into_iter() - .fold(lhs, |acc, branch| FilterCondition::Or(Box::new(acc), Box::new(branch))); - Ok((input, expr)) - } - - fn parse_and(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> - where - E: FilterParserError<'a>, - { - let (input, lhs) = self.parse_not(input)?; - let (input, ors) = many0(preceded(self.ws(tag("AND")), |c| self.parse_not(c)))(input)?; - let expr = ors - .into_iter() - .fold(lhs, |acc, branch| FilterCondition::And(Box::new(acc), Box::new(branch))); - Ok((input, expr)) - } - - /// not = ("NOT" | "!") not | primary - fn parse_not(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> - where - E: FilterParserError<'a>, - { - alt(( - map(preceded(alt((tag("!"), tag("NOT"))), |c| self.parse_not(c)), |e| e.negate()), - |c| self.parse_primary(c), - ))(input) - } - - fn ws(&'a self, inner: F) -> impl FnMut(&'a str) -> IResult<&'a str, O, E> - where - F: FnMut(&'a str) -> IResult<&'a str, O, E>, - E: FilterParserError<'a>, - { - delimited(multispace0, inner, multispace0) - } - - /// condition = value ("==" | ">" ...) value - fn parse_condition(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> - where - E: FilterParserError<'a>, - { - let operator = alt((tag("<="), tag(">="), tag("!="), tag("<"), tag(">"), tag("="))); - let (input, (key, op, value)) = - tuple((|c| self.parse_value(c), operator, |c| self.parse_value(c)))(input)?; - - let fid = self.parse_fid(input, key)?; - let r: StdResult>> = self.parse_numeric(value); - match op { - "=" => { - let k = - FilterCondition::Operator(fid, Equal(r.ok(), value.to_string().to_lowercase())); - Ok((input, k)) - } - "!=" => { - let k = FilterCondition::Operator( - fid, - NotEqual(r.ok(), value.to_string().to_lowercase()), - ); - Ok((input, k)) - } - ">" | "<" | "<=" | ">=" => { - let numeric: f64 = self.parse_numeric(value)?; - let k = match op { - ">" => FilterCondition::Operator(fid, GreaterThan(numeric)), - "<" => FilterCondition::Operator(fid, LowerThan(numeric)), - "<=" => FilterCondition::Operator(fid, LowerThanOrEqual(numeric)), - ">=" => FilterCondition::Operator(fid, GreaterThanOrEqual(numeric)), - _ => unreachable!(), - }; - Ok((input, k)) - } - _ => unreachable!(), - } - } - - fn parse_numeric(&'a self, input: &'a str) -> StdResult> - where - E: FilterParserError<'a>, - T: std::str::FromStr, - { - match input.parse::() { - Ok(n) => Ok(n), - Err(_) => match input.chars().nth(0) { - Some(ch) => Err(nom::Err::Failure(E::from_char(input, ch))), - None => Err(nom::Err::Failure(E::from_error_kind(input, ErrorKind::Eof))), - }, - } - } - - fn parse_fid(&'a self, input: &'a str, key: &'a str) -> StdResult> - where - E: FilterParserError<'a>, - { - match self.fields_ids_map.id(key) { - Some(fid) if self.filterable_fields.contains(key) => Ok(fid), - _ => Err(nom::Err::Failure(E::add_context( - input, - "Attribute is not filterable", - E::from_char(input, 'T'), - ))), - } - } - - /// to = value value TO value - fn parse_to(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> - where - E: FilterParserError<'a>, - { - let (input, (key, from, _, to)) = tuple(( - self.ws(|c| self.parse_value(c)), - self.ws(|c| self.parse_value(c)), - tag("TO"), - self.ws(|c| self.parse_value(c)), - ))(input)?; - - let fid = self.parse_fid(input, key)?; - let numeric_from: f64 = self.parse_numeric(from)?; - let numeric_to: f64 = self.parse_numeric(to)?; - let res = FilterCondition::Operator(fid, Between(numeric_from, numeric_to)); - - Ok((input, res)) - } - - /// geoRadius = WS* ~ "_geoRadius(float ~ "," ~ float ~ "," float) - fn parse_geo_radius(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> - where - E: FilterParserError<'a>, - { - let err_msg_args_incomplete = "_geoRadius. The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`"; - let err_msg_latitude_invalid = - "_geoRadius. Latitude must be contained between -90 and 90 degrees."; - - let err_msg_longitude_invalid = - "_geoRadius. Longitude must be contained between -180 and 180 degrees."; - - let parsed = preceded::<_, _, _, E, _, _>( - // TODO: forbid spaces between _geoRadius and parenthesis - self.ws(tag("_geoRadius")), - delimited( - char('('), - separated_list1(tag(","), self.ws(|c| recognize_float(c))), - char(')'), - ), - )(input); - - let (input, args): (&str, Vec<&str>) = match parsed { - Ok(e) => e, - Err(_e) => { - return Err(nom::Err::Failure(E::add_context( - input, - err_msg_args_incomplete, - E::from_char(input, '('), - ))); - } - }; - - if args.len() != 3 { - let e = E::from_char(input, '('); - return Err(nom::Err::Failure(E::add_context(input, err_msg_args_incomplete, e))); - } - let lat = self.parse_numeric(args[0])?; - let lng = self.parse_numeric(args[1])?; - let dis = self.parse_numeric(args[2])?; - - let fid = match self.fields_ids_map.id("_geo") { - Some(fid) => fid, - // TODO send an error - None => return Ok((input, FilterCondition::Empty)), - }; - - if !(-90.0..=90.0).contains(&lat) { - return Err(nom::Err::Failure(E::add_context( - input, - err_msg_latitude_invalid, - E::from_char(input, '('), - ))); - } else if !(-180.0..=180.0).contains(&lng) { - return Err(nom::Err::Failure(E::add_context( - input, - err_msg_longitude_invalid, - E::from_char(input, '('), - ))); - } - - let res = FilterCondition::Operator(fid, GeoLowerThan([lat, lng], dis)); - Ok((input, res)) - } - - /// primary = (WS* ~ "(" expression ")" ~ WS*) | condition | to | geoRadius - fn parse_primary(&'a self, input: &'a str) -> IResult<&str, FilterCondition, E> - where - E: FilterParserError<'a>, - { - alt(( - delimited(self.ws(char('(')), |c| self.parse_expression(c), self.ws(char(')'))), - |c| self.parse_condition(c), - |c| self.parse_to(c), - |c| self.parse_geo_radius(c), - ))(input) - } - - /// value = WS* ~ ( word | singleQuoted | doubleQuoted) ~ WS* - fn parse_value(&'a self, input: &'a str) -> IResult<&'a str, &'a str, E> - where - E: FilterParserError<'a>, - { - // singleQuoted = "'" .* all but quotes "'" - let simple_quoted_key = |input| take_till(|c: char| c == '\'')(input); - // doubleQuoted = "\"" (word | spaces)* "\"" - let quoted_key = |input| take_till(|c: char| c == '"')(input); - // word = (alphanumeric | _ | - | .)+ - let word = |input| take_while1(Self::is_key_component)(input); - - alt(( - self.ws(delimited(char('\''), simple_quoted_key, char('\''))), - self.ws(delimited(char('"'), quoted_key, char('"'))), - self.ws(word), - ))(input) - } - - fn is_key_component(c: char) -> bool { - c.is_alphanumeric() || ['_', '-', '.'].contains(&c) - } - - /// expression = or - pub fn parse_expression(&'a self, input: &'a str) -> IResult<&'a str, FilterCondition, E> - where - E: FilterParserError<'a>, - { - self.parse_or(input) - } -} - -#[cfg(test)] -mod tests { - use big_s::S; - use either::Either; - use heed::EnvOpenOptions; - use maplit::hashset; - - use super::*; - use crate::update::Settings; - use crate::Index; - - #[test] - fn string() { - let path = tempfile::tempdir().unwrap(); - let mut options = EnvOpenOptions::new(); - options.map_size(10 * 1024 * 1024); // 10 MB - let index = Index::new(options, &path).unwrap(); - - // Set the filterable fields to be the channel. - let mut wtxn = index.write_txn().unwrap(); - let mut map = index.fields_ids_map(&wtxn).unwrap(); - map.insert("channel"); - map.insert("dog race"); - map.insert("subscribers"); - map.insert("_geo"); - index.put_fields_ids_map(&mut wtxn, &map).unwrap(); - let mut builder = Settings::new(&mut wtxn, &index, 0); - builder.set_filterable_fields( - hashset! { S("channel"), S("dog race"), S("subscribers"), S("_geo") }, - ); - builder.execute(|_, _| ()).unwrap(); - wtxn.commit().unwrap(); - - let rtxn = index.read_txn().unwrap(); - - use FilterCondition as Fc; - let test_case = [ - // simple test - ( - Fc::from_str(&rtxn, &index, "channel = Ponce"), - Fc::Operator(0, Operator::Equal(None, S("ponce"))), - ), - ( - Fc::from_str(&rtxn, &index, "subscribers = 12"), - Fc::Operator(2, Operator::Equal(Some(12.), S("12"))), - ), - // test all the quotes and simple quotes - ( - Fc::from_str(&rtxn, &index, "channel = 'Mister Mv'"), - Fc::Operator(0, Operator::Equal(None, S("mister mv"))), - ), - ( - Fc::from_str(&rtxn, &index, "channel = \"Mister Mv\""), - Fc::Operator(0, Operator::Equal(None, S("mister mv"))), - ), - ( - Fc::from_str(&rtxn, &index, "'dog race' = Borzoi"), - Fc::Operator(1, Operator::Equal(None, S("borzoi"))), - ), - ( - Fc::from_str(&rtxn, &index, "\"dog race\" = Chusky"), - Fc::Operator(1, Operator::Equal(None, S("chusky"))), - ), - ( - Fc::from_str(&rtxn, &index, "\"dog race\" = \"Bernese Mountain\""), - Fc::Operator(1, Operator::Equal(None, S("bernese mountain"))), - ), - ( - Fc::from_str(&rtxn, &index, "'dog race' = 'Bernese Mountain'"), - Fc::Operator(1, Operator::Equal(None, S("bernese mountain"))), - ), - ( - Fc::from_str(&rtxn, &index, "\"dog race\" = 'Bernese Mountain'"), - Fc::Operator(1, Operator::Equal(None, S("bernese mountain"))), - ), - // test all the operators - ( - Fc::from_str(&rtxn, &index, "channel != ponce"), - Fc::Operator(0, Operator::NotEqual(None, S("ponce"))), - ), - ( - Fc::from_str(&rtxn, &index, "NOT channel = ponce"), - Fc::Operator(0, Operator::NotEqual(None, S("ponce"))), - ), - ( - Fc::from_str(&rtxn, &index, "subscribers < 1000"), - Fc::Operator(2, Operator::LowerThan(1000.)), - ), - ( - Fc::from_str(&rtxn, &index, "subscribers > 1000"), - Fc::Operator(2, Operator::GreaterThan(1000.)), - ), - ( - Fc::from_str(&rtxn, &index, "subscribers <= 1000"), - Fc::Operator(2, Operator::LowerThanOrEqual(1000.)), - ), - ( - Fc::from_str(&rtxn, &index, "subscribers >= 1000"), - Fc::Operator(2, Operator::GreaterThanOrEqual(1000.)), - ), - ( - Fc::from_str(&rtxn, &index, "NOT subscribers < 1000"), - Fc::Operator(2, Operator::GreaterThanOrEqual(1000.)), - ), - ( - Fc::from_str(&rtxn, &index, "NOT subscribers > 1000"), - Fc::Operator(2, Operator::LowerThanOrEqual(1000.)), - ), - ( - Fc::from_str(&rtxn, &index, "NOT subscribers <= 1000"), - Fc::Operator(2, Operator::GreaterThan(1000.)), - ), - ( - Fc::from_str(&rtxn, &index, "NOT subscribers >= 1000"), - Fc::Operator(2, Operator::LowerThan(1000.)), - ), - ( - Fc::from_str(&rtxn, &index, "subscribers 100 TO 1000"), - Fc::Operator(2, Operator::Between(100., 1000.)), - ), - ( - Fc::from_str(&rtxn, &index, "NOT subscribers 100 TO 1000"), - Fc::Or( - Box::new(Fc::Operator(2, Operator::LowerThan(100.))), - Box::new(Fc::Operator(2, Operator::GreaterThan(1000.))), - ), - ), - ( - Fc::from_str(&rtxn, &index, "_geoRadius(12, 13, 14)"), - Fc::Operator(3, Operator::GeoLowerThan([12., 13.], 14.)), - ), - ( - Fc::from_str(&rtxn, &index, "NOT _geoRadius(12, 13, 14)"), - Fc::Operator(3, Operator::GeoGreaterThan([12., 13.], 14.)), - ), - // test simple `or` and `and` - ( - Fc::from_str(&rtxn, &index, "channel = ponce AND 'dog race' != 'bernese mountain'"), - Fc::And( - Box::new(Fc::Operator(0, Operator::Equal(None, S("ponce")))), - Box::new(Fc::Operator(1, Operator::NotEqual(None, S("bernese mountain")))), - ), - ), - ( - Fc::from_str(&rtxn, &index, "channel = ponce OR 'dog race' != 'bernese mountain'"), - Fc::Or( - Box::new(Fc::Operator(0, Operator::Equal(None, S("ponce")))), - Box::new(Fc::Operator(1, Operator::NotEqual(None, S("bernese mountain")))), - ), - ), - ( - Fc::from_str( - &rtxn, - &index, - "channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000", - ), - Fc::Or( - Box::new(Fc::And( - Box::new(Fc::Operator(0, Operator::Equal(None, S("ponce")))), - Box::new(Fc::Operator(1, Operator::NotEqual(None, S("bernese mountain")))), - )), - Box::new(Fc::Operator(2, Operator::GreaterThan(1000.))), - ), - ), - // test parenthesis - ( - Fc::from_str( - &rtxn, - &index, - "channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > 1000 )", - ), - Fc::And( - Box::new(Fc::Operator(0, Operator::Equal(None, S("ponce")))), - Box::new(Fc::Or( - Box::new(Fc::Operator(1, Operator::NotEqual(None, S("bernese mountain")))), - Box::new(Fc::Operator(2, Operator::GreaterThan(1000.))), - ))), - ), - ( - Fc::from_str( - &rtxn, - &index, - "(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, 14)", - ), - Fc::And( - Box::new(Fc::Or( - Box::new(Fc::And( - Box::new(Fc::Operator(0, Operator::Equal(None, S("ponce")))), - Box::new(Fc::Operator(1, Operator::NotEqual(None, S("bernese mountain")))), - )), - Box::new(Fc::Operator(2, Operator::GreaterThan(1000.))), - )), - Box::new(Fc::Operator(3, Operator::GeoLowerThan([12., 13.], 14.)))) - ), - ]; - - for (result, expected) in test_case { - assert!( - result.is_ok(), - "Filter {:?} was supposed to be parsed but failed with the following error: `{}`", - expected, - result.unwrap_err() - ); - let filter = result.unwrap(); - assert_eq!(filter, expected,); - } - } - - #[test] - fn number() { - let path = tempfile::tempdir().unwrap(); - let mut options = EnvOpenOptions::new(); - options.map_size(10 * 1024 * 1024); // 10 MB - let index = Index::new(options, &path).unwrap(); - - // Set the filterable fields to be the channel. - let mut wtxn = index.write_txn().unwrap(); - let mut map = index.fields_ids_map(&wtxn).unwrap(); - map.insert("timestamp"); - index.put_fields_ids_map(&mut wtxn, &map).unwrap(); - let mut builder = Settings::new(&mut wtxn, &index, 0); - builder.set_filterable_fields(hashset! { "timestamp".into() }); - builder.execute(|_, _| ()).unwrap(); - wtxn.commit().unwrap(); - - // Test that the facet condition is correctly generated. - let rtxn = index.read_txn().unwrap(); - let condition = FilterCondition::from_str(&rtxn, &index, "timestamp 22 TO 44").unwrap(); - let expected = FilterCondition::Operator(0, Between(22.0, 44.0)); - assert_eq!(condition, expected); - - let condition = FilterCondition::from_str(&rtxn, &index, "NOT timestamp 22 TO 44").unwrap(); - let expected = FilterCondition::Or( - Box::new(FilterCondition::Operator(0, LowerThan(22.0))), - Box::new(FilterCondition::Operator(0, GreaterThan(44.0))), - ); - assert_eq!(condition, expected); - } - - #[test] - fn compare() { - let path = tempfile::tempdir().unwrap(); - let mut options = EnvOpenOptions::new(); - options.map_size(10 * 1024 * 1024); // 10 MB - let index = Index::new(options, &path).unwrap(); - - let mut wtxn = index.write_txn().unwrap(); - let mut builder = Settings::new(&mut wtxn, &index, 0); - builder.set_searchable_fields(vec![S("channel"), S("timestamp"), S("id")]); // to keep the fields order - builder.set_filterable_fields(hashset! { S("channel"), S("timestamp") ,S("id")}); - builder.execute(|_, _| ()).unwrap(); - wtxn.commit().unwrap(); - - let rtxn = index.read_txn().unwrap(); - let condition = FilterCondition::from_str(&rtxn, &index, "channel < 20").unwrap(); - let expected = FilterCondition::Operator(0, LowerThan(20.0)); - assert_eq!(condition, expected); - - let rtxn = index.read_txn().unwrap(); - let condition = FilterCondition::from_str(&rtxn, &index, "id < 200").unwrap(); - let expected = FilterCondition::Operator(2, LowerThan(200.0)); - assert_eq!(condition, expected); - } - - #[test] - fn parentheses() { - let path = tempfile::tempdir().unwrap(); - let mut options = EnvOpenOptions::new(); - options.map_size(10 * 1024 * 1024); // 10 MB - let index = Index::new(options, &path).unwrap(); - - // Set the filterable fields to be the channel. - let mut wtxn = index.write_txn().unwrap(); - let mut builder = Settings::new(&mut wtxn, &index, 0); - builder.set_searchable_fields(vec![S("channel"), S("timestamp")]); // to keep the fields order - builder.set_filterable_fields(hashset! { S("channel"), S("timestamp") }); - builder.execute(|_, _| ()).unwrap(); - wtxn.commit().unwrap(); - - // Test that the facet condition is correctly generated. - let rtxn = index.read_txn().unwrap(); - let condition = FilterCondition::from_str( - &rtxn, - &index, - "channel = gotaga OR (timestamp 22 TO 44 AND channel != ponce)", - ) - .unwrap(); - let expected = FilterCondition::Or( - Box::new(FilterCondition::Operator(0, Operator::Equal(None, S("gotaga")))), - Box::new(FilterCondition::And( - Box::new(FilterCondition::Operator(1, Between(22.0, 44.0))), - Box::new(FilterCondition::Operator(0, Operator::NotEqual(None, S("ponce")))), - )), - ); - assert_eq!(condition, expected); - - let condition = FilterCondition::from_str( - &rtxn, - &index, - "channel = gotaga OR NOT (timestamp 22 TO 44 AND channel != ponce)", - ) - .unwrap(); - let expected = FilterCondition::Or( - Box::new(FilterCondition::Operator(0, Operator::Equal(None, S("gotaga")))), - Box::new(FilterCondition::Or( - Box::new(FilterCondition::Or( - Box::new(FilterCondition::Operator(1, LowerThan(22.0))), - Box::new(FilterCondition::Operator(1, GreaterThan(44.0))), - )), - Box::new(FilterCondition::Operator(0, Operator::Equal(None, S("ponce")))), - )), - ); - assert_eq!(condition, expected); - } - - #[test] - fn from_array() { - let path = tempfile::tempdir().unwrap(); - let mut options = EnvOpenOptions::new(); - options.map_size(10 * 1024 * 1024); // 10 MB - let index = Index::new(options, &path).unwrap(); - - // Set the filterable fields to be the channel. - let mut wtxn = index.write_txn().unwrap(); - let mut builder = Settings::new(&mut wtxn, &index, 0); - builder.set_searchable_fields(vec![S("channel"), S("timestamp")]); // to keep the fields order - builder.set_filterable_fields(hashset! { S("channel"), S("timestamp") }); - builder.execute(|_, _| ()).unwrap(); - wtxn.commit().unwrap(); - - // Simple array with Left - let rtxn = index.read_txn().unwrap(); - let condition = FilterCondition::from_array::<_, _, _, &str>( - &rtxn, - &index, - vec![Either::Left(["channel = mv"])], - ) - .unwrap() - .unwrap(); - let expected = FilterCondition::from_str(&rtxn, &index, "channel = mv").unwrap(); - assert_eq!(condition, expected); - - // Simple array with Right - let rtxn = index.read_txn().unwrap(); - let condition = FilterCondition::from_array::<_, Option<&str>, _, _>( - &rtxn, - &index, - vec![Either::Right("channel = mv")], - ) - .unwrap() - .unwrap(); - let expected = FilterCondition::from_str(&rtxn, &index, "channel = mv").unwrap(); - assert_eq!(condition, expected); - - // Array with Left and escaped quote - let rtxn = index.read_txn().unwrap(); - let condition = FilterCondition::from_array::<_, _, _, &str>( - &rtxn, - &index, - vec![Either::Left(["channel = \"Mister Mv\""])], - ) - .unwrap() - .unwrap(); - let expected = FilterCondition::from_str(&rtxn, &index, "channel = \"Mister Mv\"").unwrap(); - assert_eq!(condition, expected); - - // Array with Right and escaped quote - let rtxn = index.read_txn().unwrap(); - let condition = FilterCondition::from_array::<_, Option<&str>, _, _>( - &rtxn, - &index, - vec![Either::Right("channel = \"Mister Mv\"")], - ) - .unwrap() - .unwrap(); - let expected = FilterCondition::from_str(&rtxn, &index, "channel = \"Mister Mv\"").unwrap(); - assert_eq!(condition, expected); - - // Array with Left and escaped simple quote - let rtxn = index.read_txn().unwrap(); - let condition = FilterCondition::from_array::<_, _, _, &str>( - &rtxn, - &index, - vec![Either::Left(["channel = 'Mister Mv'"])], - ) - .unwrap() - .unwrap(); - let expected = FilterCondition::from_str(&rtxn, &index, "channel = 'Mister Mv'").unwrap(); - assert_eq!(condition, expected); - - // Array with Right and escaped simple quote - let rtxn = index.read_txn().unwrap(); - let condition = FilterCondition::from_array::<_, Option<&str>, _, _>( - &rtxn, - &index, - vec![Either::Right("channel = 'Mister Mv'")], - ) - .unwrap() - .unwrap(); - let expected = FilterCondition::from_str(&rtxn, &index, "channel = 'Mister Mv'").unwrap(); - assert_eq!(condition, expected); - - // Simple with parenthesis - let rtxn = index.read_txn().unwrap(); - let condition = FilterCondition::from_array::<_, _, _, &str>( - &rtxn, - &index, - vec![Either::Left(["(channel = mv)"])], - ) - .unwrap() - .unwrap(); - let expected = FilterCondition::from_str(&rtxn, &index, "(channel = mv)").unwrap(); - assert_eq!(condition, expected); - - // Test that the facet condition is correctly generated. - let rtxn = index.read_txn().unwrap(); - let condition = FilterCondition::from_array( - &rtxn, - &index, - vec![ - Either::Right("channel = gotaga"), - Either::Left(vec!["timestamp = 44", "channel != ponce"]), - ], - ) - .unwrap() - .unwrap(); - let expected = FilterCondition::from_str( - &rtxn, - &index, - "channel = gotaga AND (timestamp = 44 OR channel != ponce)", - ) - .unwrap(); - assert_eq!(condition, expected); - } - - #[test] - fn geo_radius() { - let path = tempfile::tempdir().unwrap(); - let mut options = EnvOpenOptions::new(); - options.map_size(10 * 1024 * 1024); // 10 MB - let index = Index::new(options, &path).unwrap(); - - // Set the filterable fields to be the channel. - let mut wtxn = index.write_txn().unwrap(); - let mut builder = Settings::new(&mut wtxn, &index, 0); - builder.set_searchable_fields(vec![S("_geo"), S("price")]); // to keep the fields order - builder.set_filterable_fields(hashset! { S("_geo"), S("price") }); - builder.execute(|_, _| ()).unwrap(); - wtxn.commit().unwrap(); - - let rtxn = index.read_txn().unwrap(); - // basic test - let condition = - FilterCondition::from_str(&rtxn, &index, "_geoRadius(12, 13.0005, 2000)").unwrap(); - let expected = FilterCondition::Operator(0, GeoLowerThan([12., 13.0005], 2000.)); - assert_eq!(condition, expected); - - // test the negation of the GeoLowerThan - let condition = - FilterCondition::from_str(&rtxn, &index, "NOT _geoRadius(50, 18, 2000.500)").unwrap(); - let expected = FilterCondition::Operator(0, GeoGreaterThan([50., 18.], 2000.500)); - assert_eq!(condition, expected); - - // composition of multiple operations - let condition = FilterCondition::from_str( - &rtxn, - &index, - "(NOT _geoRadius(1, 2, 300) AND _geoRadius(1.001, 2.002, 1000.300)) OR price <= 10", - ) - .unwrap(); - let expected = FilterCondition::Or( - Box::new(FilterCondition::And( - Box::new(FilterCondition::Operator(0, GeoGreaterThan([1., 2.], 300.))), - Box::new(FilterCondition::Operator(0, GeoLowerThan([1.001, 2.002], 1000.300))), - )), - Box::new(FilterCondition::Operator(1, LowerThanOrEqual(10.))), - ); - assert_eq!(condition, expected); - } - - #[test] - fn geo_radius_error() { - let path = tempfile::tempdir().unwrap(); - let mut options = EnvOpenOptions::new(); - options.map_size(10 * 1024 * 1024); // 10 MB - let index = Index::new(options, &path).unwrap(); - - // Set the filterable fields to be the channel. - let mut wtxn = index.write_txn().unwrap(); - let mut builder = Settings::new(&mut wtxn, &index, 0); - builder.set_searchable_fields(vec![S("_geo"), S("price")]); // to keep the fields order - builder.set_filterable_fields(hashset! { S("_geo"), S("price") }); - builder.execute(|_, _| ()).unwrap(); - wtxn.commit().unwrap(); - - let rtxn = index.read_txn().unwrap(); - - // georadius don't have any parameters - let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius"); - assert!(result.is_err()); - let error = result.unwrap_err(); - assert!(error.to_string().contains("The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`")); - - // georadius don't have any parameters - let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius()"); - assert!(result.is_err()); - let error = result.unwrap_err(); - assert!(error.to_string().contains("The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`")); - - // georadius don't have enough parameters - let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius(1, 2)"); - assert!(result.is_err()); - let error = result.unwrap_err(); - assert!(error.to_string().contains("The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`")); - - // georadius have too many parameters - let result = - FilterCondition::from_str(&rtxn, &index, "_geoRadius(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)"); - assert!(result.is_err()); - let error = result.unwrap_err(); - assert!(error.to_string().contains("The `_geoRadius` filter expect three arguments: `_geoRadius(latitude, longitude, radius)`")); - - let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius(-100, 150, 10)"); - assert!(result.is_err()); - let error = result.unwrap_err(); - assert!( - error.to_string().contains("Latitude must be contained between -90 and 90 degrees."), - "{}", - error.to_string() - ); - - // georadius have a bad latitude - let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius(-90.0000001, 150, 10)"); - assert!(result.is_err()); - let error = result.unwrap_err(); - assert!(error - .to_string() - .contains("Latitude must be contained between -90 and 90 degrees.")); - - // georadius have a bad longitude - let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius(-10, 250, 10)"); - assert!(result.is_err()); - let error = result.unwrap_err(); - assert!(error - .to_string() - .contains("Longitude must be contained between -180 and 180 degrees.")); - - // georadius have a bad longitude - let result = FilterCondition::from_str(&rtxn, &index, "_geoRadius(-10, 180.000001, 10)"); - assert!(result.is_err()); - let error = result.unwrap_err(); - assert!(error - .to_string() - .contains("Longitude must be contained between -180 and 180 degrees.")); - } -} diff --git a/milli/src/search/facet/mod.rs b/milli/src/search/facet/mod.rs index 3efa0262f..d6f276fbb 100644 --- a/milli/src/search/facet/mod.rs +++ b/milli/src/search/facet/mod.rs @@ -1,10 +1,9 @@ pub use self::facet_distribution::FacetDistribution; pub use self::facet_number::{FacetNumberIter, FacetNumberRange, FacetNumberRevRange}; pub use self::facet_string::FacetStringIter; -pub use self::filter_condition::FilterCondition; +pub use self::filter_condition::Filter; mod facet_distribution; mod facet_number; mod facet_string; mod filter_condition; -mod filter_parser; diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 8cd7f1a34..a31ead1ec 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -14,7 +14,7 @@ use meilisearch_tokenizer::{Analyzer, AnalyzerConfig}; use once_cell::sync::Lazy; use roaring::bitmap::RoaringBitmap; -pub use self::facet::{FacetDistribution, FacetNumberIter, FilterCondition}; +pub use self::facet::{FacetDistribution, FacetNumberIter, Filter}; pub use self::matching_words::MatchingWords; use self::query_tree::QueryTreeBuilder; use crate::error::UserError; @@ -35,7 +35,7 @@ mod query_tree; pub struct Search<'a> { query: Option, // this should be linked to the String in the query - filter: Option>, + filter: Option>, offset: usize, limit: usize, sort_criteria: Option>, @@ -97,7 +97,7 @@ impl<'a> Search<'a> { self } - pub fn filter(&mut self, condition: FilterCondition) -> &mut Search<'a> { + pub fn filter(&mut self, condition: Filter<'a>) -> &mut Search<'a> { self.filter = Some(condition); self }