diff --git a/filter-parser/src/error.rs b/filter-parser/src/error.rs index ea95caba7..4d9d89859 100644 --- a/filter-parser/src/error.rs +++ b/filter-parser/src/error.rs @@ -57,8 +57,10 @@ pub enum ExpectedValueKind { #[derive(Debug)] pub enum ErrorKind<'a> { ReservedGeo(&'a str), - Geo, - MisusedGeo, + GeoRadius, + GeoBoundingBox, + MisusedGeoRadius, + MisusedGeoBoundingBox, InvalidPrimary, ExpectedEof, ExpectedValue(ExpectedValueKind), @@ -142,23 +144,29 @@ impl<'a> Display for Error<'a> { writeln!(f, "Expression `{}` is missing the following closing delimiter: `{}`.", escaped_input, c)? } ErrorKind::InvalidPrimary if input.trim().is_empty() => { - writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` but instead got nothing.")? + writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing.")? } ErrorKind::InvalidPrimary => { - writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `{}`.", escaped_input)? + writeln!(f, "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `{}`.", escaped_input)? } ErrorKind::ExpectedEof => { writeln!(f, "Found unexpected characters at the end of the filter: `{}`. You probably forgot an `OR` or an `AND` rule.", escaped_input)? } - ErrorKind::Geo => { + ErrorKind::GeoRadius => { writeln!(f, "The `_geoRadius` filter expects three arguments: `_geoRadius(latitude, longitude, radius)`.")? } - ErrorKind::ReservedGeo(name) => { - writeln!(f, "`{}` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance) built-in rule to filter on `_geo` coordinates.", name.escape_debug())? + ErrorKind::GeoBoundingBox => { + writeln!(f, "The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`.")? } - ErrorKind::MisusedGeo => { + ErrorKind::ReservedGeo(name) => { + writeln!(f, "`{}` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance), or _geoBoundingBox([latitude, longitude], [latitude, longitude]) built-in rules to filter on `_geo` coordinates.", name.escape_debug())? + } + ErrorKind::MisusedGeoRadius => { writeln!(f, "The `_geoRadius` filter is an operation and can't be used as a value.")? } + ErrorKind::MisusedGeoBoundingBox => { + writeln!(f, "The `_geoBoundingBox` filter is an operation and can't be used as a value.")? + } ErrorKind::ReservedKeyword(word) => { writeln!(f, "`{word}` is a reserved keyword and thus cannot be used as a field name unless it is put inside quotes. Use \"{word}\" or \'{word}\' instead.")? } diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs index 04037d061..8e21ff6be 100644 --- a/filter-parser/src/lib.rs +++ b/filter-parser/src/lib.rs @@ -18,6 +18,7 @@ //! doubleQuoted = "\"" .* all but double quotes "\"" //! word = (alphanumeric | _ | - | .)+ //! geoRadius = "_geoRadius(" WS* float WS* "," WS* float WS* "," float WS* ")" +//! geoBoundingBox = "_geoBoundingBox([" WS * float WS* "," WS* float WS* "], [" WS* float WS* "," WS* float WS* "]") //! ``` //! //! Other BNF grammar used to handle some specific errors: @@ -87,10 +88,15 @@ impl<'a> Token<'a> { Self { span, value } } + /// Returns the string contained in the span of the `Token`. + /// This is only useful in the tests. You should always use + /// the value. + #[cfg(test)] pub fn lexeme(&self) -> &str { &self.span } + /// Return the string contained in the token. pub fn value(&self) -> &str { self.value.as_ref().map_or(&self.span, |value| value) } @@ -99,8 +105,13 @@ impl<'a> Token<'a> { Error::new_from_external(self.span, error) } + /// Returns a copy of the span this token was created with. + pub fn original_span(&self) -> Span<'a> { + self.span + } + pub fn parse_finite_float(&self) -> Result { - let value: f64 = self.span.parse().map_err(|e| self.as_external_error(e))?; + let value: f64 = self.value().parse().map_err(|e| self.as_external_error(e))?; if value.is_finite() { Ok(value) } else { @@ -130,6 +141,7 @@ pub enum FilterCondition<'a> { Or(Vec), And(Vec), GeoLowerThan { point: [Token<'a>; 2], radius: Token<'a> }, + GeoBoundingBox { top_left_point: [Token<'a>; 2], bottom_right_point: [Token<'a>; 2] }, } impl<'a> FilterCondition<'a> { @@ -310,12 +322,12 @@ fn parse_geo_radius(input: Span) -> IResult { // if we were able to parse `_geoRadius` and can't parse the rest of the input we return a failure cut(delimited(char('('), separated_list1(tag(","), ws(recognize_float)), char(')'))), )(input) - .map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::Geo))); + .map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::GeoRadius))); let (input, args) = parsed?; if args.len() != 3 { - return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::Geo))); + return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::GeoRadius))); } let res = FilterCondition::GeoLowerThan { @@ -325,6 +337,37 @@ fn parse_geo_radius(input: Span) -> IResult { Ok((input, res)) } +/// geoBoundingBox = WS* "_geoBoundingBox([float WS* "," WS* float WS* "], [float WS* "," WS* float WS* "]") +/// If we parse `_geoBoundingBox` we MUST parse the rest of the expression. +fn parse_geo_bounding_box(input: Span) -> IResult { + // we want to allow space BEFORE the _geoBoundingBox but not after + let parsed = preceded( + tuple((multispace0, word_exact("_geoBoundingBox"))), + // if we were able to parse `_geoBoundingBox` and can't parse the rest of the input we return a failure + cut(delimited( + char('('), + separated_list1( + tag(","), + ws(delimited(char('['), separated_list1(tag(","), ws(recognize_float)), char(']'))), + ), + char(')'), + )), + )(input) + .map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::GeoBoundingBox))); + + let (input, args) = parsed?; + + if args.len() != 2 || args[0].len() != 2 || args[1].len() != 2 { + return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::GeoBoundingBox))); + } + + let res = FilterCondition::GeoBoundingBox { + top_left_point: [args[0][0].into(), args[0][1].into()], + bottom_right_point: [args[1][0].into(), args[1][1].into()], + }; + Ok((input, res)) +} + /// geoPoint = WS* "_geoPoint(float WS* "," WS* float WS* "," WS* float) fn parse_geo_point(input: Span) -> IResult { // we want to forbid space BEFORE the _geoPoint but not after @@ -367,6 +410,7 @@ fn parse_primary(input: Span, depth: usize) -> IResult { }), ), parse_geo_radius, + parse_geo_bounding_box, parse_in, parse_not_in, parse_condition, @@ -468,6 +512,12 @@ pub mod tests { // Test geo radius insta::assert_display_snapshot!(p("_geoRadius(12, 13, 14)"), @"_geoRadius({12}, {13}, {14})"); insta::assert_display_snapshot!(p("NOT _geoRadius(12, 13, 14)"), @"NOT (_geoRadius({12}, {13}, {14}))"); + insta::assert_display_snapshot!(p("_geoRadius(12,13,14)"), @"_geoRadius({12}, {13}, {14})"); + + // Test geo bounding box + insta::assert_display_snapshot!(p("_geoBoundingBox([12, 13], [14, 15])"), @"_geoBoundingBox([{12}, {13}], [{14}, {15}])"); + insta::assert_display_snapshot!(p("NOT _geoBoundingBox([12, 13], [14, 15])"), @"NOT (_geoBoundingBox([{12}, {13}], [{14}, {15}]))"); + insta::assert_display_snapshot!(p("_geoBoundingBox([12,13],[14,15])"), @"_geoBoundingBox([{12}, {13}], [{14}, {15}])"); // Test OR + AND insta::assert_display_snapshot!(p("channel = ponce AND 'dog race' != 'bernese mountain'"), @"AND[{channel} = {ponce}, {dog race} != {bernese mountain}, ]"); @@ -512,7 +562,7 @@ pub mod tests { insta::assert_display_snapshot!(p("channel = "), @r###" Was expecting a value but instead got nothing. - 14:14 channel = + 14:14 channel = "###); insta::assert_display_snapshot!(p("channel = 🐻"), @r###" @@ -526,7 +576,7 @@ pub mod tests { "###); insta::assert_display_snapshot!(p("'OR'"), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `\'OR\'`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `\'OR\'`. 1:5 'OR' "###); @@ -536,12 +586,12 @@ pub mod tests { "###); insta::assert_display_snapshot!(p("channel Ponce"), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `channel Ponce`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `channel Ponce`. 1:14 channel Ponce "###); insta::assert_display_snapshot!(p("channel = Ponce OR"), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` but instead got nothing. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` but instead got nothing. 19:19 channel = Ponce OR "###); @@ -555,13 +605,28 @@ pub mod tests { 1:16 _geoRadius = 12 "###); + insta::assert_display_snapshot!(p("_geoBoundingBox"), @r###" + The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`. + 1:16 _geoBoundingBox + "###); + + insta::assert_display_snapshot!(p("_geoBoundingBox = 12"), @r###" + The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`. + 1:21 _geoBoundingBox = 12 + "###); + + insta::assert_display_snapshot!(p("_geoBoundingBox(1.0, 1.0)"), @r###" + The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`. + 1:26 _geoBoundingBox(1.0, 1.0) + "###); + insta::assert_display_snapshot!(p("_geoPoint(12, 13, 14)"), @r###" - `_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance) built-in rule to filter on `_geo` coordinates. + `_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance), or _geoBoundingBox([latitude, longitude], [latitude, longitude]) built-in rules to filter on `_geo` coordinates. 1:22 _geoPoint(12, 13, 14) "###); insta::assert_display_snapshot!(p("position <= _geoPoint(12, 13, 14)"), @r###" - `_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance) built-in rule to filter on `_geo` coordinates. + `_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance), or _geoBoundingBox([latitude, longitude], [latitude, longitude]) built-in rules to filter on `_geo` coordinates. 13:34 position <= _geoPoint(12, 13, 14) "###); @@ -591,12 +656,12 @@ pub mod tests { "###); insta::assert_display_snapshot!(p("colour NOT EXIST"), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `colour NOT EXIST`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `colour NOT EXIST`. 1:17 colour NOT EXIST "###); insta::assert_display_snapshot!(p("subscribers 100 TO1000"), @r###" - Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `subscribers 100 TO1000`. + Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `subscribers 100 TO1000`. 1:23 subscribers 100 TO1000 "###); @@ -715,6 +780,16 @@ impl<'a> std::fmt::Display for FilterCondition<'a> { FilterCondition::GeoLowerThan { point, radius } => { write!(f, "_geoRadius({}, {}, {})", point[0], point[1], radius) } + FilterCondition::GeoBoundingBox { top_left_point, bottom_right_point } => { + write!( + f, + "_geoBoundingBox([{}, {}], [{}, {}])", + top_left_point[0], + top_left_point[1], + bottom_right_point[0], + bottom_right_point[1] + ) + } } } } diff --git a/filter-parser/src/value.rs b/filter-parser/src/value.rs index 73ef61480..2296c0769 100644 --- a/filter-parser/src/value.rs +++ b/filter-parser/src/value.rs @@ -6,7 +6,10 @@ use nom::sequence::{delimited, terminated}; use nom::{InputIter, InputLength, InputTake, Slice}; use crate::error::{ExpectedValueKind, NomErrorExt}; -use crate::{parse_geo_point, parse_geo_radius, Error, ErrorKind, IResult, Span, Token}; +use crate::{ + parse_geo_bounding_box, parse_geo_point, parse_geo_radius, Error, ErrorKind, IResult, Span, + Token, +}; /// This function goes through all characters in the [Span] if it finds any escaped character (`\`). /// It generates a new string with all `\` removed from the [Span]. @@ -91,11 +94,31 @@ pub fn parse_value(input: Span) -> IResult { } } match parse_geo_radius(input) { - Ok(_) => return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::MisusedGeo))), + Ok(_) => { + return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::MisusedGeoRadius))) + } // if we encountered a failure it means the user badly wrote a _geoRadius filter. - // But instead of showing him how to fix his syntax we are going to tell him he should not use this filter as a value. + // But instead of showing them how to fix his syntax we are going to tell them they should not use this filter as a value. Err(e) if e.is_failure() => { - return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::MisusedGeo))) + return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::MisusedGeoRadius))) + } + _ => (), + } + + match parse_geo_bounding_box(input) { + Ok(_) => { + return Err(nom::Err::Failure(Error::new_from_kind( + input, + ErrorKind::MisusedGeoBoundingBox, + ))) + } + // if we encountered a failure it means the user badly wrote a _geoBoundingBox filter. + // But instead of showing them how to fix his syntax we are going to tell them they should not use this filter as a value. + Err(e) if e.is_failure() => { + return Err(nom::Err::Failure(Error::new_from_kind( + input, + ErrorKind::MisusedGeoBoundingBox, + ))) } _ => (), } @@ -155,7 +178,7 @@ fn is_syntax_component(c: char) -> bool { } fn is_keyword(s: &str) -> bool { - matches!(s, "AND" | "OR" | "IN" | "NOT" | "TO" | "EXISTS" | "_geoRadius") + matches!(s, "AND" | "OR" | "IN" | "NOT" | "TO" | "EXISTS" | "_geoRadius" | "_geoBoundingBox") } #[cfg(test)] diff --git a/meilisearch/tests/search/errors.rs b/meilisearch/tests/search/errors.rs index d582a3672..3ef342171 100644 --- a/meilisearch/tests/search/errors.rs +++ b/meilisearch/tests/search/errors.rs @@ -415,7 +415,7 @@ async fn filter_invalid_syntax_object() { index.wait_task(1).await; let expected_response = json!({ - "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `title & Glass`.\n1:14 title & Glass", + "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", "code": "invalid_search_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid-search-filter" @@ -440,7 +440,7 @@ async fn filter_invalid_syntax_array() { index.wait_task(1).await; let expected_response = json!({ - "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, or `_geoRadius` at `title & Glass`.\n1:14 title & Glass", + "message": "Was expecting an operation `=`, `!=`, `>=`, `>`, `<=`, `<`, `IN`, `NOT IN`, `TO`, `EXISTS`, `NOT EXISTS`, `_geoRadius`, or `_geoBoundingBox` at `title & Glass`.\n1:14 title & Glass", "code": "invalid_search_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid-search-filter" @@ -540,7 +540,7 @@ async fn filter_reserved_geo_attribute_array() { index.wait_task(1).await; let expected_response = json!({ - "message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the _geoRadius(latitude, longitude, distance) built-in rule to filter on _geo field coordinates.\n1:5 _geo = Glass", + "message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` field coordinates.\n1:5 _geo = Glass", "code": "invalid_search_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid-search-filter" @@ -565,7 +565,7 @@ async fn filter_reserved_geo_attribute_string() { index.wait_task(1).await; let expected_response = json!({ - "message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the _geoRadius(latitude, longitude, distance) built-in rule to filter on _geo field coordinates.\n1:5 _geo = Glass", + "message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` field coordinates.\n1:5 _geo = Glass", "code": "invalid_search_filter", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid-search-filter" diff --git a/milli/src/asc_desc.rs b/milli/src/asc_desc.rs index 21065da36..ebb28c27d 100644 --- a/milli/src/asc_desc.rs +++ b/milli/src/asc_desc.rs @@ -55,6 +55,9 @@ impl From for CriterionError { AscDescError::ReservedKeyword { name } if name.starts_with("_geoRadius") => { CriterionError::ReservedNameForFilter { name: "_geoRadius".to_string() } } + AscDescError::ReservedKeyword { name } if name.starts_with("_geoBoundingBox") => { + CriterionError::ReservedNameForFilter { name: "_geoBoundingBox".to_string() } + } AscDescError::ReservedKeyword { name } => CriterionError::ReservedName { name }, } } @@ -89,7 +92,10 @@ impl FromStr for Member { Ok(Member::Geo([lat, lng])) } None => { - if is_reserved_keyword(text) || text.starts_with("_geoRadius(") { + if is_reserved_keyword(text) + || text.starts_with("_geoRadius(") + || text.starts_with("_geoBoundingBox(") + { return Err(AscDescError::ReservedKeyword { name: text.to_string() })?; } Ok(Member::Field(text.to_string())) @@ -190,6 +196,9 @@ impl From for SortError { AscDescError::ReservedKeyword { name } if name.starts_with("_geoRadius") => { SortError::ReservedNameForFilter { name: String::from("_geoRadius") } } + AscDescError::ReservedKeyword { name } if name.starts_with("_geoBoundingBox") => { + SortError::ReservedNameForFilter { name: String::from("_geoBoundingBox") } + } AscDescError::ReservedKeyword { name } => SortError::ReservedName { name }, } } diff --git a/milli/src/criterion.rs b/milli/src/criterion.rs index c02cd2525..45cbfe63d 100644 --- a/milli/src/criterion.rs +++ b/milli/src/criterion.rs @@ -159,6 +159,11 @@ mod tests { ("_geoPoint(42, 75):asc", ReservedNameForSort { name: S("_geoPoint") }), ("_geoRadius:asc", ReservedNameForFilter { name: S("_geoRadius") }), ("_geoRadius(42, 75, 59):asc", ReservedNameForFilter { name: S("_geoRadius") }), + ("_geoBoundingBox:asc", ReservedNameForFilter { name: S("_geoBoundingBox") }), + ( + "_geoBoundingBox([42, 75], [75, 59]):asc", + ReservedNameForFilter { name: S("_geoBoundingBox") }, + ), ]; for (input, expected) in invalid_criteria { diff --git a/milli/src/error.rs b/milli/src/error.rs index 8734cb540..92c238814 100644 --- a/milli/src/error.rs +++ b/milli/src/error.rs @@ -11,7 +11,7 @@ use crate::documents::{self, DocumentsBatchCursorError}; use crate::{CriterionError, DocumentId, FieldId, Object, SortError}; pub fn is_reserved_keyword(keyword: &str) -> bool { - ["_geo", "_geoDistance", "_geoPoint", "_geoRadius"].contains(&keyword) + ["_geo", "_geoDistance", "_geoPoint", "_geoRadius", "_geoBoundingBox"].contains(&keyword) } #[derive(Error, Debug)] diff --git a/milli/src/index.rs b/milli/src/index.rs index 31311d318..9f5b30cd6 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -1206,7 +1206,7 @@ pub(crate) mod tests { self, DeleteDocuments, DeletionStrategy, IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod, IndexerConfig, Settings, }; - use crate::{db_snap, obkv_to_json, Index, Search, SearchResult}; + use crate::{db_snap, obkv_to_json, Filter, Index, Search, SearchResult}; pub(crate) struct TempIndex { pub inner: Index, @@ -1504,6 +1504,108 @@ pub(crate) mod tests { assert_eq!(user_defined, &["doggo", "name"]); } + #[test] + fn test_basic_geo_bounding_box() { + let index = TempIndex::new(); + + index + .update_settings(|settings| { + settings.set_filterable_fields(hashset! { S("_geo") }); + }) + .unwrap(); + index + .add_documents(documents!([ + { "id": 0, "_geo": { "lat": 0, "lng": 0 } }, + { "id": 1, "_geo": { "lat": 0, "lng": -175 } }, + { "id": 2, "_geo": { "lat": 0, "lng": 175 } }, + { "id": 3, "_geo": { "lat": 85, "lng": 0 } }, + { "id": 4, "_geo": { "lat": -85, "lng": 0 } }, + ])) + .unwrap(); + + // ensure we get the right real searchable fields + user defined searchable fields + let rtxn = index.read_txn().unwrap(); + let mut search = index.search(&rtxn); + + // exact match a document + let search_result = search + .filter(Filter::from_str("_geoBoundingBox([0, 0], [0, 0])").unwrap().unwrap()) + .execute() + .unwrap(); + insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[0]>"); + + // match a document in the middle of the rectangle + let search_result = search + .filter(Filter::from_str("_geoBoundingBox([10, -10], [-10, 10])").unwrap().unwrap()) + .execute() + .unwrap(); + insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[0]>"); + + // select everything + let search_result = search + .filter(Filter::from_str("_geoBoundingBox([90, -180], [-90, 180])").unwrap().unwrap()) + .execute() + .unwrap(); + insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[0, 1, 2, 3, 4]>"); + + // go on the edge of the longitude + let search_result = search + .filter(Filter::from_str("_geoBoundingBox([0, 180], [0, -170])").unwrap().unwrap()) + .execute() + .unwrap(); + insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[1]>"); + + // go on the other edge of the longitude + let search_result = search + .filter(Filter::from_str("_geoBoundingBox([0, 170], [0, -180])").unwrap().unwrap()) + .execute() + .unwrap(); + insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[2]>"); + + // wrap around the longitude + let search_result = search + .filter(Filter::from_str("_geoBoundingBox([0, 170], [0, -170])").unwrap().unwrap()) + .execute() + .unwrap(); + insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[1, 2]>"); + + // go on the edge of the latitude + let search_result = search + .filter(Filter::from_str("_geoBoundingBox([90, 0], [80, 0])").unwrap().unwrap()) + .execute() + .unwrap(); + insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[3]>"); + + // go on the edge of the latitude + let search_result = search + .filter(Filter::from_str("_geoBoundingBox([-80, 0], [-90, 0])").unwrap().unwrap()) + .execute() + .unwrap(); + insta::assert_debug_snapshot!(search_result.candidates, @"RoaringBitmap<[4]>"); + + // the requests that don't make sense + + // try to wrap around the latitude + let error = search + .filter(Filter::from_str("_geoBoundingBox([-80, 0], [80, 0])").unwrap().unwrap()) + .execute() + .unwrap_err(); + insta::assert_display_snapshot!(error, @r###" + The top latitude `-80` is below the bottom latitude `80`. + 32:33 _geoBoundingBox([-80, 0], [80, 0]) + "###); + + // send a top latitude lower than the bottow latitude + let error = search + .filter(Filter::from_str("_geoBoundingBox([-10, 0], [10, 0])").unwrap().unwrap()) + .execute() + .unwrap_err(); + insta::assert_display_snapshot!(error, @r###" + The top latitude `-10` is below the bottom latitude `10`. + 32:33 _geoBoundingBox([-10, 0], [10, 0]) + "###); + } + #[test] fn replace_documents_external_ids_and_soft_deletion_check() { use big_s::S; diff --git a/milli/src/search/facet/filter.rs b/milli/src/search/facet/filter.rs index 23cbb280c..3cf11819f 100644 --- a/milli/src/search/facet/filter.rs +++ b/milli/src/search/facet/filter.rs @@ -27,6 +27,7 @@ enum FilterError<'a> { BadGeo(&'a str), BadGeoLat(f64), BadGeoLng(f64), + BadGeoBoundingBoxTopIsBelowBottom(f64, f64), Reserved(&'a str), TooDeep, } @@ -62,7 +63,8 @@ impl<'a> Display for FilterError<'a> { "`{}` is a reserved keyword and thus can't be used as a filter expression.", keyword ), - Self::BadGeo(keyword) => write!(f, "`{}` is a reserved keyword and thus can't be used as a filter expression. Use the _geoRadius(latitude, longitude, distance) built-in rule to filter on _geo field coordinates.", keyword), + Self::BadGeo(keyword) => write!(f, "`{}` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` field coordinates.", keyword), + Self::BadGeoBoundingBoxTopIsBelowBottom(top, bottom) => write!(f, "The top latitude `{top}` is below the bottom latitude `{bottom}`."), Self::BadGeoLat(lat) => write!(f, "Bad latitude `{}`. Latitude must be contained between -90 and 90 degrees. ", lat), Self::BadGeoLng(lng) => write!(f, "Bad longitude `{}`. Longitude must be contained between -180 and 180 degrees. ", lng), } @@ -294,7 +296,7 @@ impl<'a> Filter<'a> { Ok(RoaringBitmap::new()) } } else { - match fid.lexeme() { + match fid.value() { attribute @ "_geo" => { Err(fid.as_external_error(FilterError::BadGeo(attribute)))? } @@ -385,6 +387,131 @@ impl<'a> Filter<'a> { }))? } } + FilterCondition::GeoBoundingBox { top_left_point, bottom_right_point } => { + if filterable_fields.contains("_geo") { + let top_left: [f64; 2] = [ + top_left_point[0].parse_finite_float()?, + top_left_point[1].parse_finite_float()?, + ]; + let bottom_right: [f64; 2] = [ + bottom_right_point[0].parse_finite_float()?, + bottom_right_point[1].parse_finite_float()?, + ]; + if !(-90.0..=90.0).contains(&top_left[0]) { + return Err(top_left_point[0] + .as_external_error(FilterError::BadGeoLat(top_left[0])))?; + } + if !(-180.0..=180.0).contains(&top_left[1]) { + return Err(top_left_point[1] + .as_external_error(FilterError::BadGeoLng(top_left[1])))?; + } + if !(-90.0..=90.0).contains(&bottom_right[0]) { + return Err(bottom_right_point[0] + .as_external_error(FilterError::BadGeoLat(bottom_right[0])))?; + } + if !(-180.0..=180.0).contains(&bottom_right[1]) { + return Err(bottom_right_point[1] + .as_external_error(FilterError::BadGeoLng(bottom_right[1])))?; + } + if top_left[0] < bottom_right[0] { + return Err(bottom_right_point[1].as_external_error( + FilterError::BadGeoBoundingBoxTopIsBelowBottom( + top_left[0], + bottom_right[0], + ), + ))?; + } + + // Instead of writing a custom `GeoBoundingBox` filter we're simply going to re-use the range + // filter to create the following filter; + // `_geo.lat {top_left[0]} TO {bottom_right[0]} AND _geo.lng {top_left[1]} TO {bottom_right[1]}` + // As we can see, we need to use a bunch of tokens that don't exist in the original filter, + // thus we're going to create tokens that point to a random span but contain our text. + + let geo_lat_token = + Token::new(top_left_point[0].original_span(), Some("_geo.lat".to_string())); + + let condition_lat = FilterCondition::Condition { + fid: geo_lat_token, + op: Condition::Between { + from: bottom_right_point[0].clone(), + to: top_left_point[0].clone(), + }, + }; + + let selected_lat = Filter { condition: condition_lat }.inner_evaluate( + rtxn, + index, + filterable_fields, + )?; + + let geo_lng_token = + Token::new(top_left_point[1].original_span(), Some("_geo.lng".to_string())); + let selected_lng = if top_left[1] > bottom_right[1] { + // In this case the bounding box is wrapping around the earth (going from 180 to -180). + // We need to update the lng part of the filter from; + // `_geo.lng {top_left[1]} TO {bottom_right[1]}` to + // `_geo.lng {top_left[1]} TO 180 AND _geo.lng -180 TO {bottom_right[1]}` + + let min_lng_token = Token::new( + top_left_point[1].original_span(), + Some("-180.0".to_string()), + ); + let max_lng_token = Token::new( + top_left_point[1].original_span(), + Some("180.0".to_string()), + ); + + let condition_left = FilterCondition::Condition { + fid: geo_lng_token.clone(), + op: Condition::Between { + from: top_left_point[1].clone(), + to: max_lng_token, + }, + }; + let left = Filter { condition: condition_left }.inner_evaluate( + rtxn, + index, + filterable_fields, + )?; + + let condition_right = FilterCondition::Condition { + fid: geo_lng_token, + op: Condition::Between { + from: min_lng_token, + to: bottom_right_point[1].clone(), + }, + }; + let right = Filter { condition: condition_right }.inner_evaluate( + rtxn, + index, + filterable_fields, + )?; + + left | right + } else { + let condition_lng = FilterCondition::Condition { + fid: geo_lng_token, + op: Condition::Between { + from: top_left_point[1].clone(), + to: bottom_right_point[1].clone(), + }, + }; + Filter { condition: condition_lng }.inner_evaluate( + rtxn, + index, + filterable_fields, + )? + }; + + Ok(selected_lat & selected_lng) + } else { + Err(top_left_point[0].as_external_error(FilterError::AttributeNotFilterable { + attribute: "_geo", + filterable_fields: filterable_fields.clone(), + }))? + } + } } } } @@ -502,6 +629,12 @@ mod tests { "Attribute `_geo` is not filterable. This index does not have configured filterable attributes." )); + let filter = Filter::from_str("_geoBoundingBox([42, 150], [30, 10])").unwrap().unwrap(); + let error = filter.evaluate(&rtxn, &index).unwrap_err(); + assert!(error.to_string().starts_with( + "Attribute `_geo` is not filterable. This index does not have configured filterable attributes." + )); + let filter = Filter::from_str("dog = \"bernese mountain\"").unwrap().unwrap(); let error = filter.evaluate(&rtxn, &index).unwrap_err(); assert!(error.to_string().starts_with( @@ -524,6 +657,12 @@ mod tests { "Attribute `_geo` is not filterable. Available filterable attributes are: `title`." )); + let filter = Filter::from_str("_geoBoundingBox([42, 150], [30, 10])").unwrap().unwrap(); + let error = filter.evaluate(&rtxn, &index).unwrap_err(); + assert!(error.to_string().starts_with( + "Attribute `_geo` is not filterable. Available filterable attributes are: `title`." + )); + let filter = Filter::from_str("name = 12").unwrap().unwrap(); let error = filter.evaluate(&rtxn, &index).unwrap_err(); assert!(error.to_string().starts_with( @@ -675,6 +814,92 @@ mod tests { )); } + #[test] + fn geo_bounding_box_error() { + let index = TempIndex::new(); + + index + .update_settings(|settings| { + settings.set_searchable_fields(vec![S("_geo"), S("price")]); // to keep the fields order + settings.set_filterable_fields(hashset! { S("_geo"), S("price") }); + }) + .unwrap(); + + let rtxn = index.read_txn().unwrap(); + + // geoboundingbox top left coord have a bad latitude + let filter = + Filter::from_str("_geoBoundingBox([-90.0000001, 150], [30, 10])").unwrap().unwrap(); + let error = filter.evaluate(&rtxn, &index).unwrap_err(); + assert!( + error.to_string().starts_with( + "Bad latitude `-90.0000001`. Latitude must be contained between -90 and 90 degrees." + ), + "{}", + error.to_string() + ); + + // geoboundingbox top left coord have a bad latitude + let filter = + Filter::from_str("_geoBoundingBox([90.0000001, 150], [30, 10])").unwrap().unwrap(); + let error = filter.evaluate(&rtxn, &index).unwrap_err(); + assert!( + error.to_string().starts_with( + "Bad latitude `90.0000001`. Latitude must be contained between -90 and 90 degrees." + ), + "{}", + error.to_string() + ); + + // geoboundingbox bottom right coord have a bad latitude + let filter = + Filter::from_str("_geoBoundingBox([30, 10], [-90.0000001, 150])").unwrap().unwrap(); + let error = filter.evaluate(&rtxn, &index).unwrap_err(); + assert!(error.to_string().contains( + "Bad latitude `-90.0000001`. Latitude must be contained between -90 and 90 degrees." + )); + + // geoboundingbox bottom right coord have a bad latitude + let filter = + Filter::from_str("_geoBoundingBox([30, 10], [90.0000001, 150])").unwrap().unwrap(); + let error = filter.evaluate(&rtxn, &index).unwrap_err(); + assert!(error.to_string().contains( + "Bad latitude `90.0000001`. Latitude must be contained between -90 and 90 degrees." + )); + + // geoboundingbox top left coord have a bad longitude + let filter = + Filter::from_str("_geoBoundingBox([-10, 180.000001], [30, 10])").unwrap().unwrap(); + let error = filter.evaluate(&rtxn, &index).unwrap_err(); + assert!(error.to_string().contains( + "Bad longitude `180.000001`. Longitude must be contained between -180 and 180 degrees." + )); + + // geoboundingbox top left coord have a bad longitude + let filter = + Filter::from_str("_geoBoundingBox([-10, -180.000001], [30, 10])").unwrap().unwrap(); + let error = filter.evaluate(&rtxn, &index).unwrap_err(); + assert!(error.to_string().contains( + "Bad longitude `-180.000001`. Longitude must be contained between -180 and 180 degrees." + )); + + // geoboundingbox bottom right coord have a bad longitude + let filter = + Filter::from_str("_geoBoundingBox([30, 10], [-10, -180.000001])").unwrap().unwrap(); + let error = filter.evaluate(&rtxn, &index).unwrap_err(); + assert!(error.to_string().contains( + "Bad longitude `-180.000001`. Longitude must be contained between -180 and 180 degrees." + )); + + // geoboundingbox bottom right coord have a bad longitude + let filter = + Filter::from_str("_geoBoundingBox([30, 10], [-10, 180.000001])").unwrap().unwrap(); + let error = filter.evaluate(&rtxn, &index).unwrap_err(); + assert!(error.to_string().contains( + "Bad longitude `180.000001`. Longitude must be contained between -180 and 180 degrees." + )); + } + #[test] fn filter_depth() { // generates a big (2 MiB) filter with too much of ORs. diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index df59634bb..dc48e04a8 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -319,7 +319,7 @@ impl fmt::Debug for Search<'_> { } } -#[derive(Default)] +#[derive(Default, Debug)] pub struct SearchResult { pub matching_words: MatchingWords, pub candidates: RoaringBitmap,