From 5c525168a08ec6a7194cc9897a9720adc55eb026 Mon Sep 17 00:00:00 2001 From: Guillaume Mourier Date: Fri, 28 Oct 2022 13:42:21 +0200 Subject: [PATCH 01/16] Add _geoBoundingBox parser --- filter-parser/src/lib.rs | 51 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 50 insertions(+), 1 deletion(-) diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs index 04037d061..65c4c56f8 100644 --- a/filter-parser/src/lib.rs +++ b/filter-parser/src/lib.rs @@ -18,6 +18,7 @@ //! doubleQuoted = "\"" .* all but double quotes "\"" //! word = (alphanumeric | _ | - | .)+ //! geoRadius = "_geoRadius(" WS* float WS* "," WS* float WS* "," float WS* ")" +//! geoBoundingBox = "_geoBoundingBox((" WS * float WS* "," WS* float WS* "), (" WS* float WS* "," WS* float WS* ")") //! ``` //! //! Other BNF grammar used to handle some specific errors: @@ -130,6 +131,7 @@ pub enum FilterCondition<'a> { Or(Vec), And(Vec), GeoLowerThan { point: [Token<'a>; 2], radius: Token<'a> }, + GeoBoundingBox { top_left_point: [Token<'a>; 2], bottom_right_point: [Token<'a>; 2]}, } impl<'a> FilterCondition<'a> { @@ -325,6 +327,49 @@ fn parse_geo_radius(input: Span) -> IResult { Ok((input, res)) } +/// geoBoundingBox = WS* "_geoBoundingBox((float WS* "," WS* float WS* "), (float WS* "," WS* float WS* ")") +/// If we parse `_geoBoundingBox` we MUST parse the rest of the expression. +fn parse_geo_bounding_box(input: Span) -> IResult { + // we want to allow space BEFORE the _geoBoundingBox but not after + let parsed = preceded( + tuple((multispace0, word_exact("_geoBoundingBox"))), + // if we were able to parse `_geoBoundingBox` and can't parse the rest of the input we return a failure + cut( + delimited( + char('('), + separated_list1( + tag(","), + ws( + delimited( + char('('), + separated_list1( + tag(","), + ws(recognize_float) + ), + char(')') + ) + ) + ), + char(')') + ) + ), + )(input) + .map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::Geo))); + + let (input, args) = parsed?; + + if args.len() != 2 { + return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::Geo))); + } + + //TODO: Check sub array length + let res = FilterCondition::GeoBoundingBox { + top_left_point: [args[0][0].into(), args[0][1].into()], + bottom_right_point: [args[1][0].into(), args[1][1].into()] + }; + Ok((input, res)) +} + /// geoPoint = WS* "_geoPoint(float WS* "," WS* float WS* "," WS* float) fn parse_geo_point(input: Span) -> IResult { // we want to forbid space BEFORE the _geoPoint but not after @@ -367,6 +412,7 @@ fn parse_primary(input: Span, depth: usize) -> IResult { }), ), parse_geo_radius, + parse_geo_bounding_box, parse_in, parse_not_in, parse_condition, @@ -512,7 +558,7 @@ pub mod tests { insta::assert_display_snapshot!(p("channel = "), @r###" Was expecting a value but instead got nothing. - 14:14 channel = + 14:14 channel = "###); insta::assert_display_snapshot!(p("channel = šŸ»"), @r###" @@ -715,6 +761,9 @@ impl<'a> std::fmt::Display for FilterCondition<'a> { FilterCondition::GeoLowerThan { point, radius } => { write!(f, "_geoRadius({}, {}, {})", point[0], point[1], radius) } + FilterCondition::GeoBoundingBox { top_left_point, bottom_right_point } => { + write!(f, "_geoBoundingBox(({}, {}), ({}, {}))", top_left_point[0], top_left_point[1], bottom_right_point[0], bottom_right_point[1]) + } } } } From b078477d806a6e076b273802653daafb7b598b46 Mon Sep 17 00:00:00 2001 From: Guillaume Mourier Date: Fri, 28 Oct 2022 15:30:53 +0200 Subject: [PATCH 02/16] Add error handling and earth lap collision with bounding box --- filter-parser/src/error.rs | 16 +++-- filter-parser/src/lib.rs | 55 ++++++++-------- filter-parser/src/value.rs | 18 ++++-- milli/src/asc_desc.rs | 3 + milli/src/criterion.rs | 2 + milli/src/search/facet/filter.rs | 108 +++++++++++++++++++++++++++++++ 6 files changed, 165 insertions(+), 37 deletions(-) diff --git a/filter-parser/src/error.rs b/filter-parser/src/error.rs index ea95caba7..aaf1a2e36 100644 --- a/filter-parser/src/error.rs +++ b/filter-parser/src/error.rs @@ -57,8 +57,10 @@ pub enum ExpectedValueKind { #[derive(Debug)] pub enum ErrorKind<'a> { ReservedGeo(&'a str), - Geo, - MisusedGeo, + GeoRadius, + GeoBoundingBox, + MisusedGeoRadius, + MisusedGeoBoundingBox, InvalidPrimary, ExpectedEof, ExpectedValue(ExpectedValueKind), @@ -150,15 +152,21 @@ impl<'a> Display for Error<'a> { ErrorKind::ExpectedEof => { writeln!(f, "Found unexpected characters at the end of the filter: `{}`. You probably forgot an `OR` or an `AND` rule.", escaped_input)? } - ErrorKind::Geo => { + ErrorKind::GeoRadius => { writeln!(f, "The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`.")? } + ErrorKind::GeoBoundingBox => { + writeln!(f, "The `_geoBoundingBox` filter expects two pair of arguments: `_geoBoundingBox((latitude, longitude), (latitude, longitude))`.")? + } ErrorKind::ReservedGeo(name) => { writeln!(f, "`{}` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance) built-in rule to filter on `_geo` coordinates.", name.escape_debug())? } - ErrorKind::MisusedGeo => { + ErrorKind::MisusedGeoRadius => { writeln!(f, "The `_geoRadius` filter is an operation and can't be used as a value.")? } + ErrorKind::MisusedGeoBoundingBox => { + writeln!(f, "The `_geoBoundingBox` filter is an operation and can't be used as a value.")? + } ErrorKind::ReservedKeyword(word) => { writeln!(f, "`{word}` is a reserved keyword and thus cannot be used as a field name unless it is put inside quotes. Use \"{word}\" or \'{word}\' instead.")? } diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs index 65c4c56f8..89e80a267 100644 --- a/filter-parser/src/lib.rs +++ b/filter-parser/src/lib.rs @@ -45,6 +45,7 @@ mod error; mod value; use std::fmt::Debug; +use std::str::FromStr; pub use condition::{parse_condition, parse_to, Condition}; use condition::{parse_exists, parse_not_exists}; @@ -71,7 +72,7 @@ const MAX_FILTER_DEPTH: usize = 200; #[derive(Debug, Clone, Eq)] pub struct Token<'a> { /// The token in the original input, it should be used when possible. - span: Span<'a>, + pub span: Span<'a>, /// If you need to modify the original input you can use the `value` field /// to store your modified input. value: Option, @@ -101,7 +102,7 @@ impl<'a> Token<'a> { } pub fn parse_finite_float(&self) -> Result { - let value: f64 = self.span.parse().map_err(|e| self.as_external_error(e))?; + let value: f64 = self.value().parse().map_err(|e| self.as_external_error(e))?; if value.is_finite() { Ok(value) } else { @@ -131,7 +132,7 @@ pub enum FilterCondition<'a> { Or(Vec), And(Vec), GeoLowerThan { point: [Token<'a>; 2], radius: Token<'a> }, - GeoBoundingBox { top_left_point: [Token<'a>; 2], bottom_right_point: [Token<'a>; 2]}, + GeoBoundingBox { top_left_point: [Token<'a>; 2], bottom_right_point: [Token<'a>; 2] }, } impl<'a> FilterCondition<'a> { @@ -312,12 +313,12 @@ fn parse_geo_radius(input: Span) -> IResult { // if we were able to parse `_geoRadius` and can't parse the rest of the input we return a failure cut(delimited(char('('), separated_list1(tag(","), ws(recognize_float)), char(')'))), )(input) - .map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::Geo))); + .map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::GeoRadius))); let (input, args) = parsed?; if args.len() != 3 { - return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::Geo))); + return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::GeoRadius))); } let res = FilterCondition::GeoLowerThan { @@ -334,38 +335,27 @@ fn parse_geo_bounding_box(input: Span) -> IResult { let parsed = preceded( tuple((multispace0, word_exact("_geoBoundingBox"))), // if we were able to parse `_geoBoundingBox` and can't parse the rest of the input we return a failure - cut( - delimited( - char('('), - separated_list1( - tag(","), - ws( - delimited( - char('('), - separated_list1( - tag(","), - ws(recognize_float) - ), - char(')') - ) - ) - ), - char(')') - ) - ), + cut(delimited( + char('('), + separated_list1( + tag(","), + ws(delimited(char('('), separated_list1(tag(","), ws(recognize_float)), char(')'))), + ), + char(')'), + )), )(input) - .map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::Geo))); + .map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::GeoBoundingBox))); let (input, args) = parsed?; - if args.len() != 2 { - return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::Geo))); + if args.len() != 2 || args[0].len() != 2 || args[1].len() != 2 { + return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::GeoBoundingBox))); } //TODO: Check sub array length let res = FilterCondition::GeoBoundingBox { top_left_point: [args[0][0].into(), args[0][1].into()], - bottom_right_point: [args[1][0].into(), args[1][1].into()] + bottom_right_point: [args[1][0].into(), args[1][1].into()], }; Ok((input, res)) } @@ -762,7 +752,14 @@ impl<'a> std::fmt::Display for FilterCondition<'a> { write!(f, "_geoRadius({}, {}, {})", point[0], point[1], radius) } FilterCondition::GeoBoundingBox { top_left_point, bottom_right_point } => { - write!(f, "_geoBoundingBox(({}, {}), ({}, {}))", top_left_point[0], top_left_point[1], bottom_right_point[0], bottom_right_point[1]) + write!( + f, + "_geoBoundingBox(({}, {}), ({}, {}))", + top_left_point[0], + top_left_point[1], + bottom_right_point[0], + bottom_right_point[1] + ) } } } diff --git a/filter-parser/src/value.rs b/filter-parser/src/value.rs index 73ef61480..abdc10439 100644 --- a/filter-parser/src/value.rs +++ b/filter-parser/src/value.rs @@ -6,7 +6,7 @@ use nom::sequence::{delimited, terminated}; use nom::{InputIter, InputLength, InputTake, Slice}; use crate::error::{ExpectedValueKind, NomErrorExt}; -use crate::{parse_geo_point, parse_geo_radius, Error, ErrorKind, IResult, Span, Token}; +use crate::{parse_geo_point, parse_geo_radius, parse_geo_bounding_box, Error, ErrorKind, IResult, Span, Token}; /// This function goes through all characters in the [Span] if it finds any escaped character (`\`). /// It generates a new string with all `\` removed from the [Span]. @@ -91,11 +91,21 @@ pub fn parse_value(input: Span) -> IResult { } } match parse_geo_radius(input) { - Ok(_) => return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::MisusedGeo))), + Ok(_) => return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::MisusedGeoRadius))), // if we encountered a failure it means the user badly wrote a _geoRadius filter. // But instead of showing him how to fix his syntax we are going to tell him he should not use this filter as a value. Err(e) if e.is_failure() => { - return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::MisusedGeo))) + return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::MisusedGeoRadius))) + } + _ => (), + } + + match parse_geo_bounding_box(input) { + Ok(_) => return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::MisusedGeoBoundingBox))), + // if we encountered a failure it means the user badly wrote a _geoBoundingBox filter. + // But instead of showing him how to fix his syntax we are going to tell him he should not use this filter as a value. + Err(e) if e.is_failure() => { + return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::MisusedGeoBoundingBox))) } _ => (), } @@ -155,7 +165,7 @@ fn is_syntax_component(c: char) -> bool { } fn is_keyword(s: &str) -> bool { - matches!(s, "AND" | "OR" | "IN" | "NOT" | "TO" | "EXISTS" | "_geoRadius") + matches!(s, "AND" | "OR" | "IN" | "NOT" | "TO" | "EXISTS" | "_geoRadius" | "_geoBoundingBox") } #[cfg(test)] diff --git a/milli/src/asc_desc.rs b/milli/src/asc_desc.rs index 21065da36..826290c8a 100644 --- a/milli/src/asc_desc.rs +++ b/milli/src/asc_desc.rs @@ -55,6 +55,9 @@ impl From for CriterionError { AscDescError::ReservedKeyword { name } if name.starts_with("_geoRadius") => { CriterionError::ReservedNameForFilter { name: "_geoRadius".to_string() } } + AscDescError::ReservedKeyword { name } if name.starts_with("_geoBoundingBox") => { + CriterionError::ReservedNameForFilter { name: "_geoBoundingBox".to_string() } + } AscDescError::ReservedKeyword { name } => CriterionError::ReservedName { name }, } } diff --git a/milli/src/criterion.rs b/milli/src/criterion.rs index c02cd2525..4544a97ac 100644 --- a/milli/src/criterion.rs +++ b/milli/src/criterion.rs @@ -159,6 +159,8 @@ mod tests { ("_geoPoint(42, 75):asc", ReservedNameForSort { name: S("_geoPoint") }), ("_geoRadius:asc", ReservedNameForFilter { name: S("_geoRadius") }), ("_geoRadius(42, 75, 59):asc", ReservedNameForFilter { name: S("_geoRadius") }), + ("_geoBoundingBox:asc", ReservedNameForFilter { name: S("_geoBoundingBox") }), + ("_geoBoundinxBox((42, 75), (75, 59)):asc", ReservedNameForFilter { name: S("_geoBoundingBox") }), ]; for (input, expected) in invalid_criteria { diff --git a/milli/src/search/facet/filter.rs b/milli/src/search/facet/filter.rs index 23cbb280c..b44db29e4 100644 --- a/milli/src/search/facet/filter.rs +++ b/milli/src/search/facet/filter.rs @@ -385,6 +385,114 @@ impl<'a> Filter<'a> { }))? } } + FilterCondition::GeoBoundingBox { top_left_point, bottom_right_point } => { + if filterable_fields.contains("_geo") { + let top_left: [f64; 2] = [ + top_left_point[0].parse_finite_float()?, + top_left_point[1].parse_finite_float()?, + ]; + let bottom_right: [f64; 2] = [ + bottom_right_point[0].parse_finite_float()?, + bottom_right_point[1].parse_finite_float()?, + ]; + if !(-90.0..=90.0).contains(&top_left[0]) { + return Err(top_left_point[0] + .as_external_error(FilterError::BadGeoLat(top_left[0])))?; + } + if !(-180.0..=180.0).contains(&top_left[1]) { + return Err(top_left_point[1] + .as_external_error(FilterError::BadGeoLng(top_left[1])))?; + } + if !(-90.0..=90.0).contains(&bottom_right[0]) { + return Err(bottom_right_point[0] + .as_external_error(FilterError::BadGeoLat(bottom_right[0])))?; + } + if !(-180.0..=180.0).contains(&bottom_right[1]) { + return Err(bottom_right_point[1] + .as_external_error(FilterError::BadGeoLng(bottom_right[1])))?; + } + + let geo_lat_token = + Token::new(top_left_point[0].span, Some("_geo.lat".to_string())); + + let condition_lat = FilterCondition::Condition { + fid: geo_lat_token, + op: Condition::Between { + from: bottom_right_point[0].clone(), + to: top_left_point[0].clone(), + }, + }; + + let selected_lat = Filter { condition: condition_lat }.inner_evaluate( + rtxn, + index, + filterable_fields, + )?; + + let geo_lng_token = + Token::new(top_left_point[1].span, Some("_geo.lng".to_string())); + let min_lng_token = + Token::new(top_left_point[1].span, Some("-180.0".to_string())); + let max_lng_token = + Token::new(top_left_point[1].span, Some("180.0".to_string())); + + let selected_lng = if top_left[1] > bottom_right[1] { + dbg!("test"); + + let condition_left = FilterCondition::Condition { + fid: geo_lng_token.clone(), + op: Condition::Between { + from: dbg!(top_left_point[1].clone()), + to: max_lng_token, + }, + }; + let left = Filter { condition: condition_left }.inner_evaluate( + rtxn, + index, + filterable_fields, + )?; + + let condition_right = FilterCondition::Condition { + fid: geo_lng_token, + op: Condition::Between { + from: dbg!(min_lng_token), + to: dbg!(bottom_right_point[1].clone()), + }, + }; + let right = Filter { condition: condition_right }.inner_evaluate( + rtxn, + index, + filterable_fields, + )?; + + dbg!(&left); + dbg!(&right); + dbg!(left | right) + } else { + let condition_lng = FilterCondition::Condition { + fid: geo_lng_token, + op: Condition::Between { + from: top_left_point[1].clone(), + to: bottom_right_point[1].clone(), + }, + }; + Filter { condition: condition_lng }.inner_evaluate( + rtxn, + index, + filterable_fields, + )? + }; + + dbg!(&selected_lng); + + Ok(selected_lat & selected_lng) + } else { + Err(top_left_point[0].as_external_error(FilterError::AttributeNotFilterable { + attribute: "_geo", + filterable_fields: filterable_fields.clone(), + }))? + } + } } } } From 426d63b01bb0e282bcffce518909416314d7e1ef Mon Sep 17 00:00:00 2001 From: Guillaume Mourier Date: Fri, 28 Oct 2022 18:11:11 +0200 Subject: [PATCH 03/16] Update insta test suite --- filter-parser/src/error.rs | 6 +++--- filter-parser/src/lib.rs | 34 +++++++++++++++++++++++------- meilisearch/tests/search/errors.rs | 4 ++-- milli/src/asc_desc.rs | 3 +++ 4 files changed, 34 insertions(+), 13 deletions(-) diff --git a/filter-parser/src/error.rs b/filter-parser/src/error.rs index aaf1a2e36..70018c3d9 100644 --- a/filter-parser/src/error.rs +++ b/filter-parser/src/error.rs @@ -144,10 +144,10 @@ impl<'a> Display for Error<'a> { writeln!(f, "Expression `{}` is missing the following closing delimiter: `{}`.", escaped_input, c)? } ErrorKind::InvalidPrimary if input.trim().is_empty() => { - writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` but instead got nothing.")? + writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing.")? } ErrorKind::InvalidPrimary => { - writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `{}`.", escaped_input)? + writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `{}`.", escaped_input)? } ErrorKind::ExpectedEof => { writeln!(f, "Found unexpected characters at the end of the filter: `{}`. You probably forgot an `OR` or an `AND` rule.", escaped_input)? @@ -159,7 +159,7 @@ impl<'a> Display for Error<'a> { writeln!(f, "The `_geoBoundingBox` filter expects two pair of arguments: `_geoBoundingBox((latitude, longitude), (latitude, longitude))`.")? } ErrorKind::ReservedGeo(name) => { - writeln!(f, "`{}` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance) built-in rule to filter on `_geo` coordinates.", name.escape_debug())? + writeln!(f, "`{}` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance), or _geoBoundingBox((latitude, longitude), (latitude, longitude)) built-in rules to filter on `_geo` coordinates.", name.escape_debug())? } ErrorKind::MisusedGeoRadius => { writeln!(f, "The `_geoRadius` filter is an operation and can't be used as a value.")? diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs index 89e80a267..6274964bd 100644 --- a/filter-parser/src/lib.rs +++ b/filter-parser/src/lib.rs @@ -45,7 +45,6 @@ mod error; mod value; use std::fmt::Debug; -use std::str::FromStr; pub use condition::{parse_condition, parse_to, Condition}; use condition::{parse_exists, parse_not_exists}; @@ -505,6 +504,10 @@ pub mod tests { insta::assert_display_snapshot!(p("_geoRadius(12, 13, 14)"), @"_geoRadius({12}, {13}, {14})"); insta::assert_display_snapshot!(p("NOT _geoRadius(12, 13, 14)"), @"NOT (_geoRadius({12}, {13}, {14}))"); + // Test geo bounding box + insta::assert_display_snapshot!(p("_geoBoundingBox((12, 13), (14, 15))"), @"_geoBoundingBox(({12}, {13}), ({14}, {15}))"); + insta::assert_display_snapshot!(p("NOT _geoBoundingBox((12, 13), (14, 15))"), @"NOT (_geoBoundingBox(({12}, {13}), ({14}, {15})))"); + // Test OR + AND insta::assert_display_snapshot!(p("channel = ponce AND 'dog race' != 'bernese mountain'"), @"AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ]"); insta::assert_display_snapshot!(p("channel = ponce OR 'dog race' != 'bernese mountain'"), @"OR[{channel} = {ponce}, {dog race} != {bernese mountain}, ]"); @@ -562,7 +565,7 @@ pub mod tests { "###); insta::assert_display_snapshot!(p("'OR'"), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `\'OR\'`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`. 1:5 'OR' "###); @@ -572,12 +575,12 @@ pub mod tests { "###); insta::assert_display_snapshot!(p("channel Ponce"), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `channel Ponce`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`. 1:14 channel Ponce "###); insta::assert_display_snapshot!(p("channel = Ponce OR"), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` but instead got nothing. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing. 19:19 channel = Ponce OR "###); @@ -591,13 +594,28 @@ pub mod tests { 1:16 _geoRadius = 12 "###); + insta::assert_display_snapshot!(p("_geoBoundingBox"), @r###" + The `_geoBoundingBox` filter expects two pair of arguments: `_geoBoundingBox((latitude, longitude), (latitude, longitude))`. + 1:16 _geoBoundingBox + "###); + + insta::assert_display_snapshot!(p("_geoBoundingBox = 12"), @r###" + The `_geoBoundingBox` filter expects two pair of arguments: `_geoBoundingBox((latitude, longitude), (latitude, longitude))`. + 1:21 _geoBoundingBox = 12 + "###); + + insta::assert_display_snapshot!(p("_geoBoundingBox(1.0, 1.0)"), @r###" + The `_geoBoundingBox` filter expects two pair of arguments: `_geoBoundingBox((latitude, longitude), (latitude, longitude))`. + 1:26 _geoBoundingBox(1.0, 1.0) + "###); + insta::assert_display_snapshot!(p("_geoPoint(12, 13, 14)"), @r###" - `_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance) built-in rule to filter on `_geo` coordinates. + `_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance), or _geoBoundingBox((latitude, longitude), (latitude, longitude)) built-in rules to filter on `_geo` coordinates. 1:22 _geoPoint(12, 13, 14) "###); insta::assert_display_snapshot!(p("position <= _geoPoint(12, 13, 14)"), @r###" - `_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance) built-in rule to filter on `_geo` coordinates. + `_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance), or _geoBoundingBox((latitude, longitude), (latitude, longitude)) built-in rules to filter on `_geo` coordinates. 13:34 position <= _geoPoint(12, 13, 14) "###); @@ -627,12 +645,12 @@ pub mod tests { "###); insta::assert_display_snapshot!(p("colour NOT EXIST"), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `colour NOT EXIST`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`. 1:17 colour NOT EXIST "###); insta::assert_display_snapshot!(p("subscribers 100 TO1000"), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `subscribers 100 TO1000`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`. 1:23 subscribers 100 TO1000 "###); diff --git a/meilisearch/tests/search/errors.rs b/meilisearch/tests/search/errors.rs index d582a3672..2c02dc0a3 100644 --- a/meilisearch/tests/search/errors.rs +++ b/meilisearch/tests/search/errors.rs @@ -415,7 +415,7 @@ async fn filter_invalid_syntax_object() { index.wait_task(1).await; let expected_response = json!({ - "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `title & Glass`.\n1:14 title & Glass", + "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", "code": "invalid_search_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid-search-filter" @@ -440,7 +440,7 @@ async fn filter_invalid_syntax_array() { index.wait_task(1).await; let expected_response = json!({ - "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `title & Glass`.\n1:14 title & Glass", + "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", "code": "invalid_search_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid-search-filter" diff --git a/milli/src/asc_desc.rs b/milli/src/asc_desc.rs index 826290c8a..a460be503 100644 --- a/milli/src/asc_desc.rs +++ b/milli/src/asc_desc.rs @@ -193,6 +193,9 @@ impl From for SortError { AscDescError::ReservedKeyword { name } if name.starts_with("_geoRadius") => { SortError::ReservedNameForFilter { name: String::from("_geoRadius") } } + AscDescError::ReservedKeyword { name } if name.starts_with("_geoBoundingBox") => { + SortError::ReservedNameForFilter { name: String::from("_geoBoundingBox") } + } AscDescError::ReservedKeyword { name } => SortError::ReservedName { name }, } } From 65a3086cf11461054e9eb0de5d5d31dfd80c3f9e Mon Sep 17 00:00:00 2001 From: Guillaume Mourier Date: Fri, 28 Oct 2022 18:22:26 +0200 Subject: [PATCH 04/16] fix test --- milli/src/asc_desc.rs | 2 +- milli/src/criterion.rs | 2 +- milli/src/error.rs | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/milli/src/asc_desc.rs b/milli/src/asc_desc.rs index a460be503..444819c34 100644 --- a/milli/src/asc_desc.rs +++ b/milli/src/asc_desc.rs @@ -92,7 +92,7 @@ impl FromStr for Member { Ok(Member::Geo([lat, lng])) } None => { - if is_reserved_keyword(text) || text.starts_with("_geoRadius(") { + if is_reserved_keyword(text) || text.starts_with("_geoRadius(") || text.starts_with("_geoBoundingBox(") { return Err(AscDescError::ReservedKeyword { name: text.to_string() })?; } Ok(Member::Field(text.to_string())) diff --git a/milli/src/criterion.rs b/milli/src/criterion.rs index 4544a97ac..23e2ac7f1 100644 --- a/milli/src/criterion.rs +++ b/milli/src/criterion.rs @@ -160,7 +160,7 @@ mod tests { ("_geoRadius:asc", ReservedNameForFilter { name: S("_geoRadius") }), ("_geoRadius(42, 75, 59):asc", ReservedNameForFilter { name: S("_geoRadius") }), ("_geoBoundingBox:asc", ReservedNameForFilter { name: S("_geoBoundingBox") }), - ("_geoBoundinxBox((42, 75), (75, 59)):asc", ReservedNameForFilter { name: S("_geoBoundingBox") }), + ("_geoBoundingBox((42, 75), (75, 59)):asc", ReservedNameForFilter { name: S("_geoBoundingBox") }), ]; for (input, expected) in invalid_criteria { diff --git a/milli/src/error.rs b/milli/src/error.rs index 8734cb540..92c238814 100644 --- a/milli/src/error.rs +++ b/milli/src/error.rs @@ -11,7 +11,7 @@ use crate::documents::{self, DocumentsBatchCursorError}; use crate::{CriterionError, DocumentId, FieldId, Object, SortError}; pub fn is_reserved_keyword(keyword: &str) -> bool { - ["_geo", "_geoDistance", "_geoPoint", "_geoRadius"].contains(&keyword) + ["_geo", "_geoDistance", "_geoPoint", "_geoRadius", "_geoBoundingBox"].contains(&keyword) } #[derive(Error, Debug)] From b2054d3f6c211cc19c6260a93c7b3a40260f3890 Mon Sep 17 00:00:00 2001 From: Guillaume Mourier Date: Fri, 28 Oct 2022 18:27:43 +0200 Subject: [PATCH 05/16] Add insta test on geo filters whitespacing --- filter-parser/src/lib.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs index 6274964bd..d30c1b58e 100644 --- a/filter-parser/src/lib.rs +++ b/filter-parser/src/lib.rs @@ -503,10 +503,12 @@ pub mod tests { // Test geo radius insta::assert_display_snapshot!(p("_geoRadius(12, 13, 14)"), @"_geoRadius({12}, {13}, {14})"); insta::assert_display_snapshot!(p("NOT _geoRadius(12, 13, 14)"), @"NOT (_geoRadius({12}, {13}, {14}))"); + insta::assert_display_snapshot!(p("_geoRadius(12,13,14)"), @"_geoRadius({12}, {13}, {14})"); // Test geo bounding box insta::assert_display_snapshot!(p("_geoBoundingBox((12, 13), (14, 15))"), @"_geoBoundingBox(({12}, {13}), ({14}, {15}))"); insta::assert_display_snapshot!(p("NOT _geoBoundingBox((12, 13), (14, 15))"), @"NOT (_geoBoundingBox(({12}, {13}), ({14}, {15})))"); + insta::assert_display_snapshot!(p("_geoBoundingBox((12,13),(14,15))"), @"_geoBoundingBox(({12}, {13}), ({14}, {15}))"); // Test OR + AND insta::assert_display_snapshot!(p("channel = ponce AND 'dog race' != 'bernese mountain'"), @"AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ]"); From 0d71c80ba69a3a0e3758d8df63009eca632a7425 Mon Sep 17 00:00:00 2001 From: Guillaume Mourier Date: Fri, 28 Oct 2022 19:01:23 +0200 Subject: [PATCH 06/16] add tests --- milli/src/search/facet/filter.rs | 112 ++++++++++++++++++++++++++++--- 1 file changed, 102 insertions(+), 10 deletions(-) diff --git a/milli/src/search/facet/filter.rs b/milli/src/search/facet/filter.rs index b44db29e4..15edb1249 100644 --- a/milli/src/search/facet/filter.rs +++ b/milli/src/search/facet/filter.rs @@ -437,12 +437,10 @@ impl<'a> Filter<'a> { Token::new(top_left_point[1].span, Some("180.0".to_string())); let selected_lng = if top_left[1] > bottom_right[1] { - dbg!("test"); - let condition_left = FilterCondition::Condition { fid: geo_lng_token.clone(), op: Condition::Between { - from: dbg!(top_left_point[1].clone()), + from: top_left_point[1].clone(), to: max_lng_token, }, }; @@ -455,8 +453,8 @@ impl<'a> Filter<'a> { let condition_right = FilterCondition::Condition { fid: geo_lng_token, op: Condition::Between { - from: dbg!(min_lng_token), - to: dbg!(bottom_right_point[1].clone()), + from: min_lng_token, + to: bottom_right_point[1].clone(), }, }; let right = Filter { condition: condition_right }.inner_evaluate( @@ -465,9 +463,7 @@ impl<'a> Filter<'a> { filterable_fields, )?; - dbg!(&left); - dbg!(&right); - dbg!(left | right) + left | right } else { let condition_lng = FilterCondition::Condition { fid: geo_lng_token, @@ -483,8 +479,6 @@ impl<'a> Filter<'a> { )? }; - dbg!(&selected_lng); - Ok(selected_lat & selected_lng) } else { Err(top_left_point[0].as_external_error(FilterError::AttributeNotFilterable { @@ -610,6 +604,12 @@ mod tests { "Attribute `_geo` is not filterable. This index does not have configured filterable attributes." )); + let filter = Filter::from_str("_geoBoundingBox((42, 150), (30, 10))").unwrap().unwrap(); + let error = filter.evaluate(&rtxn, &index).unwrap_err(); + assert!(error.to_string().starts_with( + "Attribute `_geo` is not filterable. This index does not have configured filterable attributes." + )); + let filter = Filter::from_str("dog = \"bernese mountain\"").unwrap().unwrap(); let error = filter.evaluate(&rtxn, &index).unwrap_err(); assert!(error.to_string().starts_with( @@ -632,6 +632,12 @@ mod tests { "Attribute `_geo` is not filterable. Available filterable attributes are: `title`." )); + let filter = Filter::from_str("_geoBoundingBox((42, 150), (30, 10))").unwrap().unwrap(); + let error = filter.evaluate(&rtxn, &index).unwrap_err(); + assert!(error.to_string().starts_with( + "Attribute `_geo` is not filterable. Available filterable attributes are: `title`." + )); + let filter = Filter::from_str("name = 12").unwrap().unwrap(); let error = filter.evaluate(&rtxn, &index).unwrap_err(); assert!(error.to_string().starts_with( @@ -783,6 +789,92 @@ mod tests { )); } + #[test] + fn geo_bounding_box_error() { + let index = TempIndex::new(); + + index + .update_settings(|settings| { + settings.set_searchable_fields(vec![S("_geo"), S("price")]); // to keep the fields order + settings.set_filterable_fields(hashset! { S("_geo"), S("price") }); + }) + .unwrap(); + + let rtxn = index.read_txn().unwrap(); + + // geoboundingbox top left coord have a bad latitude + let filter = + Filter::from_str("_geoBoundingBox((-90.0000001, 150), (30, 10))").unwrap().unwrap(); + let error = filter.evaluate(&rtxn, &index).unwrap_err(); + assert!( + error.to_string().starts_with( + "Bad latitude `-90.0000001`. Latitude must be contained between -90 and 90 degrees." + ), + "{}", + error.to_string() + ); + + // geoboundingbox top left coord have a bad latitude + let filter = + Filter::from_str("_geoBoundingBox((90.0000001, 150), (30, 10))").unwrap().unwrap(); + let error = filter.evaluate(&rtxn, &index).unwrap_err(); + assert!( + error.to_string().starts_with( + "Bad latitude `90.0000001`. Latitude must be contained between -90 and 90 degrees." + ), + "{}", + error.to_string() + ); + + // geoboundingbox bottom right coord have a bad latitude + let filter = + Filter::from_str("_geoBoundingBox((30, 10), (-90.0000001, 150))").unwrap().unwrap(); + let error = filter.evaluate(&rtxn, &index).unwrap_err(); + assert!(error.to_string().contains( + "Bad latitude `-90.0000001`. Latitude must be contained between -90 and 90 degrees." + )); + + // geoboundingbox bottom right coord have a bad latitude + let filter = + Filter::from_str("_geoBoundingBox((30, 10), (90.0000001, 150))").unwrap().unwrap(); + let error = filter.evaluate(&rtxn, &index).unwrap_err(); + assert!(error.to_string().contains( + "Bad latitude `90.0000001`. Latitude must be contained between -90 and 90 degrees." + )); + + // geoboundingbox top left coord have a bad longitude + let filter = + Filter::from_str("_geoBoundingBox((-10, 180.000001), (30, 10))").unwrap().unwrap(); + let error = filter.evaluate(&rtxn, &index).unwrap_err(); + assert!(error.to_string().contains( + "Bad longitude `180.000001`. Longitude must be contained between -180 and 180 degrees." + )); + + // geoboundingbox top left coord have a bad longitude + let filter = + Filter::from_str("_geoBoundingBox((-10, -180.000001), (30, 10))").unwrap().unwrap(); + let error = filter.evaluate(&rtxn, &index).unwrap_err(); + assert!(error.to_string().contains( + "Bad longitude `-180.000001`. Longitude must be contained between -180 and 180 degrees." + )); + + // geoboundingbox bottom right coord have a bad longitude + let filter = + Filter::from_str("_geoBoundingBox((30, 10), (-10, -180.000001))").unwrap().unwrap(); + let error = filter.evaluate(&rtxn, &index).unwrap_err(); + assert!(error.to_string().contains( + "Bad longitude `-180.000001`. Longitude must be contained between -180 and 180 degrees." + )); + + // geoboundingbox bottom right coord have a bad longitude + let filter = + Filter::from_str("_geoBoundingBox((30, 10), (-10, 180.000001))").unwrap().unwrap(); + let error = filter.evaluate(&rtxn, &index).unwrap_err(); + assert!(error.to_string().contains( + "Bad longitude `180.000001`. Longitude must be contained between -180 and 180 degrees." + )); + } + #[test] fn filter_depth() { // generates a big (2 MiB) filter with too much of ORs. From b297b5deb0b0f93d89e4b1bb3a535770e84c5e8d Mon Sep 17 00:00:00 2001 From: Guillaume Mourier Date: Fri, 28 Oct 2022 19:10:58 +0200 Subject: [PATCH 07/16] cargo fmt --- filter-parser/src/value.rs | 21 +++++++++++++++++---- milli/src/asc_desc.rs | 5 ++++- milli/src/criterion.rs | 5 ++++- 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/filter-parser/src/value.rs b/filter-parser/src/value.rs index abdc10439..d08a12a92 100644 --- a/filter-parser/src/value.rs +++ b/filter-parser/src/value.rs @@ -6,7 +6,10 @@ use nom::sequence::{delimited, terminated}; use nom::{InputIter, InputLength, InputTake, Slice}; use crate::error::{ExpectedValueKind, NomErrorExt}; -use crate::{parse_geo_point, parse_geo_radius, parse_geo_bounding_box, Error, ErrorKind, IResult, Span, Token}; +use crate::{ + parse_geo_bounding_box, parse_geo_point, parse_geo_radius, Error, ErrorKind, IResult, Span, + Token, +}; /// This function goes through all characters in the [Span] if it finds any escaped character (`\`). /// It generates a new string with all `\` removed from the [Span]. @@ -91,7 +94,9 @@ pub fn parse_value(input: Span) -> IResult { } } match parse_geo_radius(input) { - Ok(_) => return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::MisusedGeoRadius))), + Ok(_) => { + return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::MisusedGeoRadius))) + } // if we encountered a failure it means the user badly wrote a _geoRadius filter. // But instead of showing him how to fix his syntax we are going to tell him he should not use this filter as a value. Err(e) if e.is_failure() => { @@ -101,11 +106,19 @@ pub fn parse_value(input: Span) -> IResult { } match parse_geo_bounding_box(input) { - Ok(_) => return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::MisusedGeoBoundingBox))), + Ok(_) => { + return Err(nom::Err::Failure(Error::new_from_kind( + input, + ErrorKind::MisusedGeoBoundingBox, + ))) + } // if we encountered a failure it means the user badly wrote a _geoBoundingBox filter. // But instead of showing him how to fix his syntax we are going to tell him he should not use this filter as a value. Err(e) if e.is_failure() => { - return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::MisusedGeoBoundingBox))) + return Err(nom::Err::Failure(Error::new_from_kind( + input, + ErrorKind::MisusedGeoBoundingBox, + ))) } _ => (), } diff --git a/milli/src/asc_desc.rs b/milli/src/asc_desc.rs index 444819c34..ebb28c27d 100644 --- a/milli/src/asc_desc.rs +++ b/milli/src/asc_desc.rs @@ -92,7 +92,10 @@ impl FromStr for Member { Ok(Member::Geo([lat, lng])) } None => { - if is_reserved_keyword(text) || text.starts_with("_geoRadius(") || text.starts_with("_geoBoundingBox(") { + if is_reserved_keyword(text) + || text.starts_with("_geoRadius(") + || text.starts_with("_geoBoundingBox(") + { return Err(AscDescError::ReservedKeyword { name: text.to_string() })?; } Ok(Member::Field(text.to_string())) diff --git a/milli/src/criterion.rs b/milli/src/criterion.rs index 23e2ac7f1..9a6e2be4a 100644 --- a/milli/src/criterion.rs +++ b/milli/src/criterion.rs @@ -160,7 +160,10 @@ mod tests { ("_geoRadius:asc", ReservedNameForFilter { name: S("_geoRadius") }), ("_geoRadius(42, 75, 59):asc", ReservedNameForFilter { name: S("_geoRadius") }), ("_geoBoundingBox:asc", ReservedNameForFilter { name: S("_geoBoundingBox") }), - ("_geoBoundingBox((42, 75), (75, 59)):asc", ReservedNameForFilter { name: S("_geoBoundingBox") }), + ( + "_geoBoundingBox((42, 75), (75, 59)):asc", + ReservedNameForFilter { name: S("_geoBoundingBox") }, + ), ]; for (input, expected) in invalid_criteria { From 2d66fdc8e91a8fdec18aff8d3dfddfbcb6fbc91a Mon Sep 17 00:00:00 2001 From: Guillaume Mourier Date: Tue, 1 Nov 2022 09:56:38 +0100 Subject: [PATCH 08/16] Apply review comments --- filter-parser/src/error.rs | 2 +- filter-parser/src/lib.rs | 1 - filter-parser/src/value.rs | 4 ++-- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/filter-parser/src/error.rs b/filter-parser/src/error.rs index 70018c3d9..0e416091f 100644 --- a/filter-parser/src/error.rs +++ b/filter-parser/src/error.rs @@ -156,7 +156,7 @@ impl<'a> Display for Error<'a> { writeln!(f, "The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`.")? } ErrorKind::GeoBoundingBox => { - writeln!(f, "The `_geoBoundingBox` filter expects two pair of arguments: `_geoBoundingBox((latitude, longitude), (latitude, longitude))`.")? + writeln!(f, "The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox((latitude, longitude), (latitude, longitude))`.")? } ErrorKind::ReservedGeo(name) => { writeln!(f, "`{}` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance), or _geoBoundingBox((latitude, longitude), (latitude, longitude)) built-in rules to filter on `_geo` coordinates.", name.escape_debug())? diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs index d30c1b58e..3e1c8baea 100644 --- a/filter-parser/src/lib.rs +++ b/filter-parser/src/lib.rs @@ -351,7 +351,6 @@ fn parse_geo_bounding_box(input: Span) -> IResult { return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::GeoBoundingBox))); } - //TODO: Check sub array length let res = FilterCondition::GeoBoundingBox { top_left_point: [args[0][0].into(), args[0][1].into()], bottom_right_point: [args[1][0].into(), args[1][1].into()], diff --git a/filter-parser/src/value.rs b/filter-parser/src/value.rs index d08a12a92..2296c0769 100644 --- a/filter-parser/src/value.rs +++ b/filter-parser/src/value.rs @@ -98,7 +98,7 @@ pub fn parse_value(input: Span) -> IResult { return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::MisusedGeoRadius))) } // if we encountered a failure it means the user badly wrote a _geoRadius filter. - // But instead of showing him how to fix his syntax we are going to tell him he should not use this filter as a value. + // But instead of showing them how to fix his syntax we are going to tell them they should not use this filter as a value. Err(e) if e.is_failure() => { return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::MisusedGeoRadius))) } @@ -113,7 +113,7 @@ pub fn parse_value(input: Span) -> IResult { ))) } // if we encountered a failure it means the user badly wrote a _geoBoundingBox filter. - // But instead of showing him how to fix his syntax we are going to tell him he should not use this filter as a value. + // But instead of showing them how to fix his syntax we are going to tell them they should not use this filter as a value. Err(e) if e.is_failure() => { return Err(nom::Err::Failure(Error::new_from_kind( input, From d80ce00623d7b1c1c3619b718cb2e4b79dc0e2f8 Mon Sep 17 00:00:00 2001 From: Guillaume Mourier Date: Tue, 1 Nov 2022 15:27:44 +0100 Subject: [PATCH 09/16] Update insta test --- filter-parser/src/lib.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs index 3e1c8baea..231050401 100644 --- a/filter-parser/src/lib.rs +++ b/filter-parser/src/lib.rs @@ -596,17 +596,17 @@ pub mod tests { "###); insta::assert_display_snapshot!(p("_geoBoundingBox"), @r###" - The `_geoBoundingBox` filter expects two pair of arguments: `_geoBoundingBox((latitude, longitude), (latitude, longitude))`. + The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox((latitude, longitude), (latitude, longitude))`. 1:16 _geoBoundingBox "###); insta::assert_display_snapshot!(p("_geoBoundingBox = 12"), @r###" - The `_geoBoundingBox` filter expects two pair of arguments: `_geoBoundingBox((latitude, longitude), (latitude, longitude))`. + The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox((latitude, longitude), (latitude, longitude))`. 1:21 _geoBoundingBox = 12 "###); insta::assert_display_snapshot!(p("_geoBoundingBox(1.0, 1.0)"), @r###" - The `_geoBoundingBox` filter expects two pair of arguments: `_geoBoundingBox((latitude, longitude), (latitude, longitude))`. + The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox((latitude, longitude), (latitude, longitude))`. 1:26 _geoBoundingBox(1.0, 1.0) "###); From ae8660e585a7fc50515e61c9ff5b1e1806d7f9b7 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 2 Feb 2023 15:03:34 +0100 Subject: [PATCH 10/16] Add Token::original_span rather than making Token::span pub --- filter-parser/src/lib.rs | 7 ++++++- milli/src/search/facet/filter.rs | 8 ++++---- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs index 231050401..3f6b81ea4 100644 --- a/filter-parser/src/lib.rs +++ b/filter-parser/src/lib.rs @@ -71,7 +71,7 @@ const MAX_FILTER_DEPTH: usize = 200; #[derive(Debug, Clone, Eq)] pub struct Token<'a> { /// The token in the original input, it should be used when possible. - pub span: Span<'a>, + span: Span<'a>, /// If you need to modify the original input you can use the `value` field /// to store your modified input. value: Option, @@ -100,6 +100,11 @@ impl<'a> Token<'a> { Error::new_from_external(self.span, error) } + /// Returns a copy of the span this token was created with. + pub fn original_span(&self) -> Span<'a> { + self.span + } + pub fn parse_finite_float(&self) -> Result { let value: f64 = self.value().parse().map_err(|e| self.as_external_error(e))?; if value.is_finite() { diff --git a/milli/src/search/facet/filter.rs b/milli/src/search/facet/filter.rs index 15edb1249..903f4fa94 100644 --- a/milli/src/search/facet/filter.rs +++ b/milli/src/search/facet/filter.rs @@ -413,7 +413,7 @@ impl<'a> Filter<'a> { } let geo_lat_token = - Token::new(top_left_point[0].span, Some("_geo.lat".to_string())); + Token::new(top_left_point[0].original_span(), Some("_geo.lat".to_string())); let condition_lat = FilterCondition::Condition { fid: geo_lat_token, @@ -430,11 +430,11 @@ impl<'a> Filter<'a> { )?; let geo_lng_token = - Token::new(top_left_point[1].span, Some("_geo.lng".to_string())); + Token::new(top_left_point[1].original_span(), Some("_geo.lng".to_string())); let min_lng_token = - Token::new(top_left_point[1].span, Some("-180.0".to_string())); + Token::new(top_left_point[1].original_span(), Some("-180.0".to_string())); let max_lng_token = - Token::new(top_left_point[1].span, Some("180.0".to_string())); + Token::new(top_left_point[1].original_span(), Some("180.0".to_string())); let selected_lng = if top_left[1] > bottom_right[1] { let condition_left = FilterCondition::Condition { From fcb09ccc3d6ee35e7b23353a09e40878b6251748 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 2 Feb 2023 18:19:56 +0100 Subject: [PATCH 11/16] add tests on the geoBoundingBox --- milli/src/index.rs | 97 +++++++++++++++++++++++++++++++- milli/src/search/facet/filter.rs | 14 +++-- 2 files changed, 105 insertions(+), 6 deletions(-) diff --git a/milli/src/index.rs b/milli/src/index.rs index 31311d318..b166ab2d9 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -1206,7 +1206,7 @@ pub(crate) mod tests { self, DeleteDocuments, DeletionStrategy, IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings, }; - use crate::{db_snap, obkv_to_json, Index, Search, SearchResult}; + use crate::{db_snap, obkv_to_json, Filter, Index, Search, SearchResult}; pub(crate) struct TempIndex { pub inner: Index, @@ -1504,6 +1504,101 @@ pub(crate) mod tests { assert_eq!(user_defined, &["doggo", "name"]); } + #[test] + fn test_basic_geo_bounding_box() { + let index = TempIndex::new(); + + index + .update_settings(|settings| { + settings.set_filterable_fields(hashset! { S("_geo") }); + }) + .unwrap(); + index + .add_documents(documents!([ + { "id": 0, "_geo": { "lat": 0, "lng": 0 } }, + { "id": 1, "_geo": { "lat": 0, "lng": -175 } }, + { "id": 2, "_geo": { "lat": 0, "lng": 175 } }, + { "id": 3, "_geo": { "lat": 85, "lng": 0 } }, + { "id": 4, "_geo": { "lat": -85, "lng": 0 } }, + ])) + .unwrap(); + + // ensure we get the right real searchable fields + user defined searchable fields + let rtxn = index.read_txn().unwrap(); + let mut search = index.search(&rtxn); + + // exact match a document + let search_result = search + .filter(Filter::from_str("_geoBoundingBox((0, 0), (0, 0))").unwrap().unwrap()) + .execute() + .unwrap(); + insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[0]>"); + + // match a document in the middle of the rectangle + let search_result = search + .filter(Filter::from_str("_geoBoundingBox((10, -10), (-10, 10))").unwrap().unwrap()) + .execute() + .unwrap(); + insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[0]>"); + + // select everything + let search_result = search + .filter(Filter::from_str("_geoBoundingBox((90, -180), (-90, 180))").unwrap().unwrap()) + .execute() + .unwrap(); + insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[0, 1, 2, 3, 4]>"); + + // go on the edge of the longitude + let search_result = search + .filter(Filter::from_str("_geoBoundingBox((0, 180), (0, -170))").unwrap().unwrap()) + .execute() + .unwrap(); + insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[1]>"); + + // go on the other edge of the longitude + let search_result = search + .filter(Filter::from_str("_geoBoundingBox((0, 170), (0, -180))").unwrap().unwrap()) + .execute() + .unwrap(); + insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[2]>"); + + // wrap around the longitude + let search_result = search + .filter(Filter::from_str("_geoBoundingBox((0, 170), (0, -170))").unwrap().unwrap()) + .execute() + .unwrap(); + insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[1, 2]>"); + + // go on the edge of the latitude + let search_result = search + .filter(Filter::from_str("_geoBoundingBox((90, 0), (80, 0))").unwrap().unwrap()) + .execute() + .unwrap(); + insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[3]>"); + + // go on the edge of the latitude + let search_result = search + .filter(Filter::from_str("_geoBoundingBox((-80, 0), (-90, 0))").unwrap().unwrap()) + .execute() + .unwrap(); + insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[4]>"); + + // try to wrap around the latitude + let search_result = search + .filter(Filter::from_str("_geoBoundingBox((-80, 0), (80, 0))").unwrap().unwrap()) + .execute() + .unwrap(); + insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[]>"); + + // the request that doesn't make sense + // send a top latitude lower than the bottow latitude + let search_result = search + .filter(Filter::from_str("_geoBoundingBox((-10, 0), (10, 0))").unwrap().unwrap()) + .execute() + .unwrap(); + insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[]>"); + } + #[test] fn replace_documents_external_ids_and_soft_deletion_check() { use big_s::S; diff --git a/milli/src/search/facet/filter.rs b/milli/src/search/facet/filter.rs index 903f4fa94..962b6bab1 100644 --- a/milli/src/search/facet/filter.rs +++ b/milli/src/search/facet/filter.rs @@ -431,12 +431,16 @@ impl<'a> Filter<'a> { let geo_lng_token = Token::new(top_left_point[1].original_span(), Some("_geo.lng".to_string())); - let min_lng_token = - Token::new(top_left_point[1].original_span(), Some("-180.0".to_string())); - let max_lng_token = - Token::new(top_left_point[1].original_span(), Some("180.0".to_string())); - let selected_lng = if top_left[1] > bottom_right[1] { + let min_lng_token = Token::new( + top_left_point[1].original_span(), + Some("-180.0".to_string()), + ); + let max_lng_token = Token::new( + top_left_point[1].original_span(), + Some("180.0".to_string()), + ); + let condition_left = FilterCondition::Condition { fid: geo_lng_token.clone(), op: Condition::Between { From d27007005e1a2b12fedf8054eed16d50880f186a Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 6 Feb 2023 11:36:49 +0100 Subject: [PATCH 12/16] comments the geoboundingbox + forbid the usage of the lexeme method which could introduce bugs --- filter-parser/src/lib.rs | 5 +++++ milli/src/search/facet/filter.rs | 13 ++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs index 3f6b81ea4..385e6f623 100644 --- a/filter-parser/src/lib.rs +++ b/filter-parser/src/lib.rs @@ -88,10 +88,15 @@ impl<'a> Token<'a> { Self { span, value } } + /// Returns the string contained in the span of the `Token`. + /// This is only useful in the tests. You should always use + /// the value. + #[cfg(test)] pub fn lexeme(&self) -> &str { &self.span } + /// Return the string contained in the token. pub fn value(&self) -> &str { self.value.as_ref().map_or(&self.span, |value| value) } diff --git a/milli/src/search/facet/filter.rs b/milli/src/search/facet/filter.rs index 962b6bab1..74350275a 100644 --- a/milli/src/search/facet/filter.rs +++ b/milli/src/search/facet/filter.rs @@ -294,7 +294,7 @@ impl<'a> Filter<'a> { Ok(RoaringBitmap::new()) } } else { - match fid.lexeme() { + match fid.value() { attribute @ "_geo" => { Err(fid.as_external_error(FilterError::BadGeo(attribute)))? } @@ -412,6 +412,12 @@ impl<'a> Filter<'a> { .as_external_error(FilterError::BadGeoLng(bottom_right[1])))?; } + // Instead of writing a custom `GeoBoundingBox`Ā filter we're simply going to re-use the range + // filter to create the following filter; + // `_geo.lat {top_left[0]} TO {bottom_right[0]} AND _geo.lng {top_left[1]} TO {bottom_right[1]}` + // As we can see, we need to use a bunch of tokens that doesn't exists in the original filter, + // thus we're going to create tokens that points to a random spans but contains our text. + let geo_lat_token = Token::new(top_left_point[0].original_span(), Some("_geo.lat".to_string())); @@ -432,6 +438,11 @@ impl<'a> Filter<'a> { let geo_lng_token = Token::new(top_left_point[1].original_span(), Some("_geo.lng".to_string())); let selected_lng = if top_left[1] > bottom_right[1] { + // In this case the bounding box is wrapping around the earth (going from 180 to -180). + // We need to update the lng part of the filter from; + // `_geo.lng {top_left[1]} TO {bottom_right[1]}` to + // `_geo.lng {top_left[1]} TO 180 AND _geo.lng -180 TO {bottom_right[1]}` + let min_lng_token = Token::new( top_left_point[1].original_span(), Some("-180.0".to_string()), From 3ebc99473f6aea4f3bf91815b7b6c3cf1765058d Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 6 Feb 2023 13:29:37 +0100 Subject: [PATCH 13/16] Apply suggestions from code review Co-authored-by: Louis Dureuil --- milli/src/search/facet/filter.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/milli/src/search/facet/filter.rs b/milli/src/search/facet/filter.rs index 74350275a..d148d707a 100644 --- a/milli/src/search/facet/filter.rs +++ b/milli/src/search/facet/filter.rs @@ -415,8 +415,8 @@ impl<'a> Filter<'a> { // Instead of writing a custom `GeoBoundingBox`Ā filter we're simply going to re-use the range // filter to create the following filter; // `_geo.lat {top_left[0]} TO {bottom_right[0]} AND _geo.lng {top_left[1]} TO {bottom_right[1]}` - // As we can see, we need to use a bunch of tokens that doesn't exists in the original filter, - // thus we're going to create tokens that points to a random spans but contains our text. + // As we can see, we need to use a bunch of tokens that don't exist in the original filter, + // thus we're going to create tokens that point to a random span but contain our text. let geo_lat_token = Token::new(top_left_point[0].original_span(), Some("_geo.lat".to_string())); From 1b005f697dfd73eeb7f4aa6064897f7ab58b12ba Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 6 Feb 2023 16:50:27 +0100 Subject: [PATCH 14/16] update the syntax of the geoboundingbox filter to uses brackets instead of parens around lat and lng --- filter-parser/src/error.rs | 4 ++-- filter-parser/src/lib.rs | 24 ++++++++++++------------ milli/src/criterion.rs | 2 +- milli/src/index.rs | 20 ++++++++++---------- milli/src/search/facet/filter.rs | 20 ++++++++++---------- 5 files changed, 35 insertions(+), 35 deletions(-) diff --git a/filter-parser/src/error.rs b/filter-parser/src/error.rs index 0e416091f..4d9d89859 100644 --- a/filter-parser/src/error.rs +++ b/filter-parser/src/error.rs @@ -156,10 +156,10 @@ impl<'a> Display for Error<'a> { writeln!(f, "The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`.")? } ErrorKind::GeoBoundingBox => { - writeln!(f, "The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox((latitude, longitude), (latitude, longitude))`.")? + writeln!(f, "The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`.")? } ErrorKind::ReservedGeo(name) => { - writeln!(f, "`{}` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance), or _geoBoundingBox((latitude, longitude), (latitude, longitude)) built-in rules to filter on `_geo` coordinates.", name.escape_debug())? + writeln!(f, "`{}` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance), or _geoBoundingBox([latitude, longitude], [latitude, longitude]) built-in rules to filter on `_geo` coordinates.", name.escape_debug())? } ErrorKind::MisusedGeoRadius => { writeln!(f, "The `_geoRadius` filter is an operation and can't be used as a value.")? diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs index 385e6f623..8e21ff6be 100644 --- a/filter-parser/src/lib.rs +++ b/filter-parser/src/lib.rs @@ -18,7 +18,7 @@ //! doubleQuoted = "\"" .* all but double quotes "\"" //! word = (alphanumeric | _ | - | .)+ //! geoRadius = "_geoRadius(" WS* float WS* "," WS* float WS* "," float WS* ")" -//! geoBoundingBox = "_geoBoundingBox((" WS * float WS* "," WS* float WS* "), (" WS* float WS* "," WS* float WS* ")") +//! geoBoundingBox = "_geoBoundingBox([" WS * float WS* "," WS* float WS* "], [" WS* float WS* "," WS* float WS* "]") //! ``` //! //! Other BNF grammar used to handle some specific errors: @@ -337,7 +337,7 @@ fn parse_geo_radius(input: Span) -> IResult { Ok((input, res)) } -/// geoBoundingBox = WS* "_geoBoundingBox((float WS* "," WS* float WS* "), (float WS* "," WS* float WS* ")") +/// geoBoundingBox = WS* "_geoBoundingBox([float WS* "," WS* float WS* "], [float WS* "," WS* float WS* "]") /// If we parse `_geoBoundingBox` we MUST parse the rest of the expression. fn parse_geo_bounding_box(input: Span) -> IResult { // we want to allow space BEFORE the _geoBoundingBox but not after @@ -348,7 +348,7 @@ fn parse_geo_bounding_box(input: Span) -> IResult { char('('), separated_list1( tag(","), - ws(delimited(char('('), separated_list1(tag(","), ws(recognize_float)), char(')'))), + ws(delimited(char('['), separated_list1(tag(","), ws(recognize_float)), char(']'))), ), char(')'), )), @@ -515,9 +515,9 @@ pub mod tests { insta::assert_display_snapshot!(p("_geoRadius(12,13,14)"), @"_geoRadius({12}, {13}, {14})"); // Test geo bounding box - insta::assert_display_snapshot!(p("_geoBoundingBox((12, 13), (14, 15))"), @"_geoBoundingBox(({12}, {13}), ({14}, {15}))"); - insta::assert_display_snapshot!(p("NOT _geoBoundingBox((12, 13), (14, 15))"), @"NOT (_geoBoundingBox(({12}, {13}), ({14}, {15})))"); - insta::assert_display_snapshot!(p("_geoBoundingBox((12,13),(14,15))"), @"_geoBoundingBox(({12}, {13}), ({14}, {15}))"); + insta::assert_display_snapshot!(p("_geoBoundingBox([12, 13], [14, 15])"), @"_geoBoundingBox([{12}, {13}], [{14}, {15}])"); + insta::assert_display_snapshot!(p("NOT _geoBoundingBox([12, 13], [14, 15])"), @"NOT (_geoBoundingBox([{12}, {13}], [{14}, {15}]))"); + insta::assert_display_snapshot!(p("_geoBoundingBox([12,13],[14,15])"), @"_geoBoundingBox([{12}, {13}], [{14}, {15}])"); // Test OR + AND insta::assert_display_snapshot!(p("channel = ponce AND 'dog race' != 'bernese mountain'"), @"AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ]"); @@ -606,27 +606,27 @@ pub mod tests { "###); insta::assert_display_snapshot!(p("_geoBoundingBox"), @r###" - The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox((latitude, longitude), (latitude, longitude))`. + The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`. 1:16 _geoBoundingBox "###); insta::assert_display_snapshot!(p("_geoBoundingBox = 12"), @r###" - The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox((latitude, longitude), (latitude, longitude))`. + The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`. 1:21 _geoBoundingBox = 12 "###); insta::assert_display_snapshot!(p("_geoBoundingBox(1.0, 1.0)"), @r###" - The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox((latitude, longitude), (latitude, longitude))`. + The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`. 1:26 _geoBoundingBox(1.0, 1.0) "###); insta::assert_display_snapshot!(p("_geoPoint(12, 13, 14)"), @r###" - `_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance), or _geoBoundingBox((latitude, longitude), (latitude, longitude)) built-in rules to filter on `_geo` coordinates. + `_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance), or _geoBoundingBox([latitude, longitude], [latitude, longitude]) built-in rules to filter on `_geo` coordinates. 1:22 _geoPoint(12, 13, 14) "###); insta::assert_display_snapshot!(p("position <= _geoPoint(12, 13, 14)"), @r###" - `_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance), or _geoBoundingBox((latitude, longitude), (latitude, longitude)) built-in rules to filter on `_geo` coordinates. + `_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance), or _geoBoundingBox([latitude, longitude], [latitude, longitude]) built-in rules to filter on `_geo` coordinates. 13:34 position <= _geoPoint(12, 13, 14) "###); @@ -783,7 +783,7 @@ impl<'a> std::fmt::Display for FilterCondition<'a> { FilterCondition::GeoBoundingBox { top_left_point, bottom_right_point } => { write!( f, - "_geoBoundingBox(({}, {}), ({}, {}))", + "_geoBoundingBox([{}, {}], [{}, {}])", top_left_point[0], top_left_point[1], bottom_right_point[0], diff --git a/milli/src/criterion.rs b/milli/src/criterion.rs index 9a6e2be4a..45cbfe63d 100644 --- a/milli/src/criterion.rs +++ b/milli/src/criterion.rs @@ -161,7 +161,7 @@ mod tests { ("_geoRadius(42, 75, 59):asc", ReservedNameForFilter { name: S("_geoRadius") }), ("_geoBoundingBox:asc", ReservedNameForFilter { name: S("_geoBoundingBox") }), ( - "_geoBoundingBox((42, 75), (75, 59)):asc", + "_geoBoundingBox([42, 75], [75, 59]):asc", ReservedNameForFilter { name: S("_geoBoundingBox") }, ), ]; diff --git a/milli/src/index.rs b/milli/src/index.rs index b166ab2d9..1c0482bca 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -1529,63 +1529,63 @@ pub(crate) mod tests { // exact match a document let search_result = search - .filter(Filter::from_str("_geoBoundingBox((0, 0), (0, 0))").unwrap().unwrap()) + .filter(Filter::from_str("_geoBoundingBox([0, 0], [0, 0])").unwrap().unwrap()) .execute() .unwrap(); insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[0]>"); // match a document in the middle of the rectangle let search_result = search - .filter(Filter::from_str("_geoBoundingBox((10, -10), (-10, 10))").unwrap().unwrap()) + .filter(Filter::from_str("_geoBoundingBox([10, -10], [-10, 10])").unwrap().unwrap()) .execute() .unwrap(); insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[0]>"); // select everything let search_result = search - .filter(Filter::from_str("_geoBoundingBox((90, -180), (-90, 180))").unwrap().unwrap()) + .filter(Filter::from_str("_geoBoundingBox([90, -180], [-90, 180])").unwrap().unwrap()) .execute() .unwrap(); insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[0, 1, 2, 3, 4]>"); // go on the edge of the longitude let search_result = search - .filter(Filter::from_str("_geoBoundingBox((0, 180), (0, -170))").unwrap().unwrap()) + .filter(Filter::from_str("_geoBoundingBox([0, 180], [0, -170])").unwrap().unwrap()) .execute() .unwrap(); insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[1]>"); // go on the other edge of the longitude let search_result = search - .filter(Filter::from_str("_geoBoundingBox((0, 170), (0, -180))").unwrap().unwrap()) + .filter(Filter::from_str("_geoBoundingBox([0, 170], [0, -180])").unwrap().unwrap()) .execute() .unwrap(); insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[2]>"); // wrap around the longitude let search_result = search - .filter(Filter::from_str("_geoBoundingBox((0, 170), (0, -170))").unwrap().unwrap()) + .filter(Filter::from_str("_geoBoundingBox([0, 170], [0, -170])").unwrap().unwrap()) .execute() .unwrap(); insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[1, 2]>"); // go on the edge of the latitude let search_result = search - .filter(Filter::from_str("_geoBoundingBox((90, 0), (80, 0))").unwrap().unwrap()) + .filter(Filter::from_str("_geoBoundingBox([90, 0], [80, 0])").unwrap().unwrap()) .execute() .unwrap(); insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[3]>"); // go on the edge of the latitude let search_result = search - .filter(Filter::from_str("_geoBoundingBox((-80, 0), (-90, 0))").unwrap().unwrap()) + .filter(Filter::from_str("_geoBoundingBox([-80, 0], [-90, 0])").unwrap().unwrap()) .execute() .unwrap(); insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[4]>"); // try to wrap around the latitude let search_result = search - .filter(Filter::from_str("_geoBoundingBox((-80, 0), (80, 0))").unwrap().unwrap()) + .filter(Filter::from_str("_geoBoundingBox([-80, 0], [80, 0])").unwrap().unwrap()) .execute() .unwrap(); insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[]>"); @@ -1593,7 +1593,7 @@ pub(crate) mod tests { // the request that doesn't make sense // send a top latitude lower than the bottow latitude let search_result = search - .filter(Filter::from_str("_geoBoundingBox((-10, 0), (10, 0))").unwrap().unwrap()) + .filter(Filter::from_str("_geoBoundingBox([-10, 0], [10, 0])").unwrap().unwrap()) .execute() .unwrap(); insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[]>"); diff --git a/milli/src/search/facet/filter.rs b/milli/src/search/facet/filter.rs index d148d707a..82073c66b 100644 --- a/milli/src/search/facet/filter.rs +++ b/milli/src/search/facet/filter.rs @@ -619,7 +619,7 @@ mod tests { "Attribute `_geo` is not filterable. This index does not have configured filterable attributes." )); - let filter = Filter::from_str("_geoBoundingBox((42, 150), (30, 10))").unwrap().unwrap(); + let filter = Filter::from_str("_geoBoundingBox([42, 150], [30, 10])").unwrap().unwrap(); let error = filter.evaluate(&rtxn, &index).unwrap_err(); assert!(error.to_string().starts_with( "Attribute `_geo` is not filterable. This index does not have configured filterable attributes." @@ -647,7 +647,7 @@ mod tests { "Attribute `_geo` is not filterable. Available filterable attributes are: `title`." )); - let filter = Filter::from_str("_geoBoundingBox((42, 150), (30, 10))").unwrap().unwrap(); + let filter = Filter::from_str("_geoBoundingBox([42, 150], [30, 10])").unwrap().unwrap(); let error = filter.evaluate(&rtxn, &index).unwrap_err(); assert!(error.to_string().starts_with( "Attribute `_geo` is not filterable. Available filterable attributes are: `title`." @@ -819,7 +819,7 @@ mod tests { // geoboundingbox top left coord have a bad latitude let filter = - Filter::from_str("_geoBoundingBox((-90.0000001, 150), (30, 10))").unwrap().unwrap(); + Filter::from_str("_geoBoundingBox([-90.0000001, 150], [30, 10])").unwrap().unwrap(); let error = filter.evaluate(&rtxn, &index).unwrap_err(); assert!( error.to_string().starts_with( @@ -831,7 +831,7 @@ mod tests { // geoboundingbox top left coord have a bad latitude let filter = - Filter::from_str("_geoBoundingBox((90.0000001, 150), (30, 10))").unwrap().unwrap(); + Filter::from_str("_geoBoundingBox([90.0000001, 150], [30, 10])").unwrap().unwrap(); let error = filter.evaluate(&rtxn, &index).unwrap_err(); assert!( error.to_string().starts_with( @@ -843,7 +843,7 @@ mod tests { // geoboundingbox bottom right coord have a bad latitude let filter = - Filter::from_str("_geoBoundingBox((30, 10), (-90.0000001, 150))").unwrap().unwrap(); + Filter::from_str("_geoBoundingBox([30, 10], [-90.0000001, 150])").unwrap().unwrap(); let error = filter.evaluate(&rtxn, &index).unwrap_err(); assert!(error.to_string().contains( "Bad latitude `-90.0000001`. Latitude must be contained between -90 and 90 degrees." @@ -851,7 +851,7 @@ mod tests { // geoboundingbox bottom right coord have a bad latitude let filter = - Filter::from_str("_geoBoundingBox((30, 10), (90.0000001, 150))").unwrap().unwrap(); + Filter::from_str("_geoBoundingBox([30, 10], [90.0000001, 150])").unwrap().unwrap(); let error = filter.evaluate(&rtxn, &index).unwrap_err(); assert!(error.to_string().contains( "Bad latitude `90.0000001`. Latitude must be contained between -90 and 90 degrees." @@ -859,7 +859,7 @@ mod tests { // geoboundingbox top left coord have a bad longitude let filter = - Filter::from_str("_geoBoundingBox((-10, 180.000001), (30, 10))").unwrap().unwrap(); + Filter::from_str("_geoBoundingBox([-10, 180.000001], [30, 10])").unwrap().unwrap(); let error = filter.evaluate(&rtxn, &index).unwrap_err(); assert!(error.to_string().contains( "Bad longitude `180.000001`. Longitude must be contained between -180 and 180 degrees." @@ -867,7 +867,7 @@ mod tests { // geoboundingbox top left coord have a bad longitude let filter = - Filter::from_str("_geoBoundingBox((-10, -180.000001), (30, 10))").unwrap().unwrap(); + Filter::from_str("_geoBoundingBox([-10, -180.000001], [30, 10])").unwrap().unwrap(); let error = filter.evaluate(&rtxn, &index).unwrap_err(); assert!(error.to_string().contains( "Bad longitude `-180.000001`. Longitude must be contained between -180 and 180 degrees." @@ -875,7 +875,7 @@ mod tests { // geoboundingbox bottom right coord have a bad longitude let filter = - Filter::from_str("_geoBoundingBox((30, 10), (-10, -180.000001))").unwrap().unwrap(); + Filter::from_str("_geoBoundingBox([30, 10], [-10, -180.000001])").unwrap().unwrap(); let error = filter.evaluate(&rtxn, &index).unwrap_err(); assert!(error.to_string().contains( "Bad longitude `-180.000001`. Longitude must be contained between -180 and 180 degrees." @@ -883,7 +883,7 @@ mod tests { // geoboundingbox bottom right coord have a bad longitude let filter = - Filter::from_str("_geoBoundingBox((30, 10), (-10, 180.000001))").unwrap().unwrap(); + Filter::from_str("_geoBoundingBox([30, 10], [-10, 180.000001])").unwrap().unwrap(); let error = filter.evaluate(&rtxn, &index).unwrap_err(); assert!(error.to_string().contains( "Bad longitude `180.000001`. Longitude must be contained between -180 and 180 degrees." From 7a38fe624f5c8424230aae5c54a4cde35fa43cc7 Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 6 Feb 2023 17:50:47 +0100 Subject: [PATCH 15/16] throw an error if the top left corner is found below the bottom right corner --- meilisearch/tests/search/errors.rs | 4 ++-- milli/src/index.rs | 21 ++++++++++++++------- milli/src/search/facet/filter.rs | 12 +++++++++++- milli/src/search/mod.rs | 2 +- 4 files changed, 28 insertions(+), 11 deletions(-) diff --git a/meilisearch/tests/search/errors.rs b/meilisearch/tests/search/errors.rs index 2c02dc0a3..3ef342171 100644 --- a/meilisearch/tests/search/errors.rs +++ b/meilisearch/tests/search/errors.rs @@ -540,7 +540,7 @@ async fn filter_reserved_geo_attribute_array() { index.wait_task(1).await; let expected_response = json!({ - "message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the _geoRadius(latitude, longitude, distance) built-in rule to filter on _geo field coordinates.\n1:5 _geo = Glass", + "message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` field coordinates.\n1:5 _geo = Glass", "code": "invalid_search_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid-search-filter" @@ -565,7 +565,7 @@ async fn filter_reserved_geo_attribute_string() { index.wait_task(1).await; let expected_response = json!({ - "message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the _geoRadius(latitude, longitude, distance) built-in rule to filter on _geo field coordinates.\n1:5 _geo = Glass", + "message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` field coordinates.\n1:5 _geo = Glass", "code": "invalid_search_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid-search-filter" diff --git a/milli/src/index.rs b/milli/src/index.rs index 1c0482bca..972ed789e 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -1583,20 +1583,27 @@ pub(crate) mod tests { .unwrap(); insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[4]>"); + // the requests that doesn't make sense + // try to wrap around the latitude - let search_result = search + let error = search .filter(Filter::from_str("_geoBoundingBox([-80, 0], [80, 0])").unwrap().unwrap()) .execute() - .unwrap(); - insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[]>"); + .unwrap_err(); + insta::assert_display_snapshot!(error, @r###" + The top latitude `-80` is below the bottom latitude `80`. + 32:33 _geoBoundingBox([-80, 0], [80, 0]) + "###); - // the request that doesn't make sense // send a top latitude lower than the bottow latitude - let search_result = search + let error = search .filter(Filter::from_str("_geoBoundingBox([-10, 0], [10, 0])").unwrap().unwrap()) .execute() - .unwrap(); - insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[]>"); + .unwrap_err(); + insta::assert_display_snapshot!(error, @r###" + The top latitude `-10` is below the bottom latitude `10`. + 32:33 _geoBoundingBox([-10, 0], [10, 0]) + "###); } #[test] diff --git a/milli/src/search/facet/filter.rs b/milli/src/search/facet/filter.rs index 82073c66b..3cf11819f 100644 --- a/milli/src/search/facet/filter.rs +++ b/milli/src/search/facet/filter.rs @@ -27,6 +27,7 @@ enum FilterError<'a> { BadGeo(&'a str), BadGeoLat(f64), BadGeoLng(f64), + BadGeoBoundingBoxTopIsBelowBottom(f64, f64), Reserved(&'a str), TooDeep, } @@ -62,7 +63,8 @@ impl<'a> Display for FilterError<'a> { "`{}` is a reserved keyword and thus can't be used as a filter expression.", keyword ), - Self::BadGeo(keyword) => write!(f, "`{}` is a reserved keyword and thus can't be used as a filter expression. Use the _geoRadius(latitude, longitude, distance) built-in rule to filter on _geo field coordinates.", keyword), + Self::BadGeo(keyword) => write!(f, "`{}` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` field coordinates.", keyword), + Self::BadGeoBoundingBoxTopIsBelowBottom(top, bottom) => write!(f, "The top latitude `{top}` is below the bottom latitude `{bottom}`."), Self::BadGeoLat(lat) => write!(f, "Bad latitude `{}`. Latitude must be contained between -90 and 90 degrees. ", lat), Self::BadGeoLng(lng) => write!(f, "Bad longitude `{}`. Longitude must be contained between -180 and 180 degrees. ", lng), } @@ -411,6 +413,14 @@ impl<'a> Filter<'a> { return Err(bottom_right_point[1] .as_external_error(FilterError::BadGeoLng(bottom_right[1])))?; } + if top_left[0] < bottom_right[0] { + return Err(bottom_right_point[1].as_external_error( + FilterError::BadGeoBoundingBoxTopIsBelowBottom( + top_left[0], + bottom_right[0], + ), + ))?; + } // Instead of writing a custom `GeoBoundingBox`Ā filter we're simply going to re-use the range // filter to create the following filter; diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index df59634bb..dc48e04a8 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -319,7 +319,7 @@ impl fmt::Debug for Search<'_> { } } -#[derive(Default)] +#[derive(Default, Debug)] pub struct SearchResult { pub matching_words: MatchingWords, pub candidates: RoaringBitmap, From 42114325cdee1ea41ac5a15f04b38494fb52ca3b Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 6 Feb 2023 18:07:00 +0100 Subject: [PATCH 16/16] Apply suggestions from code review Co-authored-by: Louis Dureuil --- milli/src/index.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/milli/src/index.rs b/milli/src/index.rs index 972ed789e..9f5b30cd6 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -1583,7 +1583,7 @@ pub(crate) mod tests { .unwrap(); insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[4]>"); - // the requests that doesn't make sense + // the requests that don't make sense // try to wrap around the latitude let error = search