3638: filter errors - `geo(x,y,z)` and `geoDistance(x,y,z)` r=irevoire a=cymruu

# Pull Request

## Related issue
Fixes #3006

## What does this PR do?
- fixes the display function of `ParseError::ReservedGeo`.  The previous display string was missing back ticks around available filters.
- makes  the filter-parser parse `_geo(x,y,z)` and `geoDistance(x,y,z)` filters. Both parsing functions will throw an error if the filter was used.
- removes `FilterError::ReservedGeo` and `FilterError::Reserved` error variants since they are now thrown by the filter-parser.

I ran `cargo test --package milli -- --test-threads 1` and the tests passed.

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [ ] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Filip Bachul <filipbachul@gmail.com>
Co-authored-by: filip <filipbachul@gmail.com>
This commit is contained in:
bors[bot] 2023-04-05 13:23:50 +00:00 committed by GitHub
commit 1690aec7f1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 121 additions and 37 deletions

View File

@ -159,7 +159,7 @@ impl<'a> Display for Error<'a> {
writeln!(f, "The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`.")? writeln!(f, "The `_geoBoundingBox` filter expects two pairs of arguments: `_geoBoundingBox([latitude, longitude], [latitude, longitude])`.")?
} }
ErrorKind::ReservedGeo(name) => { ErrorKind::ReservedGeo(name) => {
writeln!(f, "`{}` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance), or _geoBoundingBox([latitude, longitude], [latitude, longitude]) built-in rules to filter on `_geo` coordinates.", name.escape_debug())? writeln!(f, "`{}` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.", name.escape_debug())?
} }
ErrorKind::MisusedGeoRadius => { ErrorKind::MisusedGeoRadius => {
writeln!(f, "The `_geoRadius` filter is an operation and can't be used as a value.")? writeln!(f, "The `_geoRadius` filter is an operation and can't be used as a value.")?

View File

@ -382,6 +382,34 @@ fn parse_geo_point(input: Span) -> IResult<FilterCondition> {
Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::ReservedGeo("_geoPoint")))) Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::ReservedGeo("_geoPoint"))))
} }
/// geoPoint = WS* "_geoDistance(float WS* "," WS* float WS* "," WS* float)
fn parse_geo_distance(input: Span) -> IResult<FilterCondition> {
// we want to forbid space BEFORE the _geoDistance but not after
tuple((
multispace0,
tag("_geoDistance"),
// if we were able to parse `_geoDistance` we are going to return a Failure whatever happens next.
cut(delimited(char('('), separated_list1(tag(","), ws(recognize_float)), char(')'))),
))(input)
.map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::ReservedGeo("_geoDistance"))))?;
// if we succeeded we still return a `Failure` because `geoDistance` filters are not allowed
Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::ReservedGeo("_geoDistance"))))
}
/// geo = WS* "_geo(float WS* "," WS* float WS* "," WS* float)
fn parse_geo(input: Span) -> IResult<FilterCondition> {
// we want to forbid space BEFORE the _geo but not after
tuple((
multispace0,
word_exact("_geo"),
// if we were able to parse `_geo` we are going to return a Failure whatever happens next.
cut(delimited(char('('), separated_list1(tag(","), ws(recognize_float)), char(')'))),
))(input)
.map_err(|e| e.map(|_| Error::new_from_kind(input, ErrorKind::ReservedGeo("_geo"))))?;
// if we succeeded we still return a `Failure` because `_geo` filter is not allowed
Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::ReservedGeo("_geo"))))
}
fn parse_error_reserved_keyword(input: Span) -> IResult<FilterCondition> { fn parse_error_reserved_keyword(input: Span) -> IResult<FilterCondition> {
match parse_condition(input) { match parse_condition(input) {
Ok(result) => Ok(result), Ok(result) => Ok(result),
@ -418,6 +446,8 @@ fn parse_primary(input: Span, depth: usize) -> IResult<FilterCondition> {
parse_not_exists, parse_not_exists,
parse_to, parse_to,
// the next lines are only for error handling and are written at the end to have the less possible performance impact // the next lines are only for error handling and are written at the end to have the less possible performance impact
parse_geo,
parse_geo_distance,
parse_geo_point, parse_geo_point,
parse_error_reserved_keyword, parse_error_reserved_keyword,
))(input) ))(input)
@ -621,15 +651,35 @@ pub mod tests {
"###); "###);
insta::assert_display_snapshot!(p("_geoPoint(12, 13, 14)"), @r###" insta::assert_display_snapshot!(p("_geoPoint(12, 13, 14)"), @r###"
`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance), or _geoBoundingBox([latitude, longitude], [latitude, longitude]) built-in rules to filter on `_geo` coordinates. `_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
1:22 _geoPoint(12, 13, 14) 1:22 _geoPoint(12, 13, 14)
"###); "###);
insta::assert_display_snapshot!(p("position <= _geoPoint(12, 13, 14)"), @r###" insta::assert_display_snapshot!(p("position <= _geoPoint(12, 13, 14)"), @r###"
`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance), or _geoBoundingBox([latitude, longitude], [latitude, longitude]) built-in rules to filter on `_geo` coordinates. `_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
13:34 position <= _geoPoint(12, 13, 14) 13:34 position <= _geoPoint(12, 13, 14)
"###); "###);
insta::assert_display_snapshot!(p("_geoDistance(12, 13, 14)"), @r###"
`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
1:25 _geoDistance(12, 13, 14)
"###);
insta::assert_display_snapshot!(p("position <= _geoDistance(12, 13, 14)"), @r###"
`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
13:37 position <= _geoDistance(12, 13, 14)
"###);
insta::assert_display_snapshot!(p("_geo(12, 13, 14)"), @r###"
`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
1:17 _geo(12, 13, 14)
"###);
insta::assert_display_snapshot!(p("position <= _geo(12, 13, 14)"), @r###"
`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.
13:29 position <= _geo(12, 13, 14)
"###);
insta::assert_display_snapshot!(p("position <= _geoRadius(12, 13, 14)"), @r###" insta::assert_display_snapshot!(p("position <= _geoRadius(12, 13, 14)"), @r###"
The `_geoRadius` filter is an operation and can't be used as a value. The `_geoRadius` filter is an operation and can't be used as a value.
13:35 position <= _geoRadius(12, 13, 14) 13:35 position <= _geoRadius(12, 13, 14)

View File

@ -7,8 +7,8 @@ use nom::{InputIter, InputLength, InputTake, Slice};
use crate::error::{ExpectedValueKind, NomErrorExt}; use crate::error::{ExpectedValueKind, NomErrorExt};
use crate::{ use crate::{
parse_geo_bounding_box, parse_geo_point, parse_geo_radius, Error, ErrorKind, IResult, Span, parse_geo, parse_geo_bounding_box, parse_geo_distance, parse_geo_point, parse_geo_radius,
Token, Error, ErrorKind, IResult, Span, Token,
}; };
/// This function goes through all characters in the [Span] if it finds any escaped character (`\`). /// This function goes through all characters in the [Span] if it finds any escaped character (`\`).
@ -88,11 +88,16 @@ pub fn parse_value(input: Span) -> IResult<Token> {
// then, we want to check if the user is misusing a geo expression // then, we want to check if the user is misusing a geo expression
// This expression cant finish without error. // This expression cant finish without error.
// We want to return an error in case of failure. // We want to return an error in case of failure.
if let Err(err) = parse_geo_point(input) { let geo_reserved_parse_functions = [parse_geo_point, parse_geo_distance, parse_geo];
for parser in geo_reserved_parse_functions {
if let Err(err) = parser(input) {
if err.is_failure() { if err.is_failure() {
return Err(err); return Err(err);
} }
} }
}
match parse_geo_radius(input) { match parse_geo_radius(input) {
Ok(_) => { Ok(_) => {
return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::MisusedGeoRadius))) return Err(nom::Err::Failure(Error::new_from_kind(input, ErrorKind::MisusedGeoRadius)))

View File

@ -672,7 +672,7 @@ async fn filter_reserved_geo_attribute_array() {
index.wait_task(1).await; index.wait_task(1).await;
let expected_response = json!({ let expected_response = json!({
"message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` field coordinates.\n1:5 _geo = Glass", "message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:13 _geo = Glass",
"code": "invalid_search_filter", "code": "invalid_search_filter",
"type": "invalid_request", "type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter" "link": "https://docs.meilisearch.com/errors#invalid_search_filter"
@ -697,7 +697,7 @@ async fn filter_reserved_geo_attribute_string() {
index.wait_task(1).await; index.wait_task(1).await;
let expected_response = json!({ let expected_response = json!({
"message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` field coordinates.\n1:5 _geo = Glass", "message": "`_geo` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:13 _geo = Glass",
"code": "invalid_search_filter", "code": "invalid_search_filter",
"type": "invalid_request", "type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter" "link": "https://docs.meilisearch.com/errors#invalid_search_filter"
@ -722,7 +722,7 @@ async fn filter_reserved_attribute_array() {
index.wait_task(1).await; index.wait_task(1).await;
let expected_response = json!({ let expected_response = json!({
"message": "`_geoDistance` is a reserved keyword and thus can't be used as a filter expression.\n1:13 _geoDistance = Glass", "message": "`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:21 _geoDistance = Glass",
"code": "invalid_search_filter", "code": "invalid_search_filter",
"type": "invalid_request", "type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter" "link": "https://docs.meilisearch.com/errors#invalid_search_filter"
@ -747,7 +747,7 @@ async fn filter_reserved_attribute_string() {
index.wait_task(1).await; index.wait_task(1).await;
let expected_response = json!({ let expected_response = json!({
"message": "`_geoDistance` is a reserved keyword and thus can't be used as a filter expression.\n1:13 _geoDistance = Glass", "message": "`_geoDistance` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:21 _geoDistance = Glass",
"code": "invalid_search_filter", "code": "invalid_search_filter",
"type": "invalid_request", "type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter" "link": "https://docs.meilisearch.com/errors#invalid_search_filter"
@ -760,6 +760,56 @@ async fn filter_reserved_attribute_string() {
.await; .await;
} }
#[actix_rt::test]
async fn filter_reserved_geo_point_array() {
let server = Server::new().await;
let index = server.index("test");
index.update_settings(json!({"filterableAttributes": ["title"]})).await;
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(1).await;
let expected_response = json!({
"message": "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:18 _geoPoint = Glass",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
});
index
.search(json!({"filter": ["_geoPoint = Glass"]}), |response, code| {
assert_eq!(response, expected_response);
assert_eq!(code, 400);
})
.await;
}
#[actix_rt::test]
async fn filter_reserved_geo_point_string() {
let server = Server::new().await;
let index = server.index("test");
index.update_settings(json!({"filterableAttributes": ["title"]})).await;
let documents = DOCUMENTS.clone();
index.add_documents(documents, None).await;
index.wait_task(1).await;
let expected_response = json!({
"message": "`_geoPoint` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` coordinates.\n1:18 _geoPoint = Glass",
"code": "invalid_search_filter",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_filter"
});
index
.search(json!({"filter": "_geoPoint = Glass"}), |response, code| {
assert_eq!(response, expected_response);
assert_eq!(code, 400);
})
.await;
}
#[actix_rt::test] #[actix_rt::test]
async fn sort_geo_reserved_attribute() { async fn sort_geo_reserved_attribute() {
let server = Server::new().await; let server = Server::new().await;

View File

@ -54,8 +54,6 @@ impl Display for BadGeoError {
enum FilterError<'a> { enum FilterError<'a> {
AttributeNotFilterable { attribute: &'a str, filterable_fields: HashSet<String> }, AttributeNotFilterable { attribute: &'a str, filterable_fields: HashSet<String> },
ParseGeoError(BadGeoError), ParseGeoError(BadGeoError),
ReservedGeo(&'a str),
Reserved(&'a str),
TooDeep, TooDeep,
} }
impl<'a> std::error::Error for FilterError<'a> {} impl<'a> std::error::Error for FilterError<'a> {}
@ -96,12 +94,6 @@ impl<'a> Display for FilterError<'a> {
"Too many filter conditions, can't process more than {} filters.", "Too many filter conditions, can't process more than {} filters.",
MAX_FILTER_DEPTH MAX_FILTER_DEPTH
), ),
Self::ReservedGeo(keyword) => write!(f, "`{}` is a reserved keyword and thus can't be used as a filter expression. Use the `_geoRadius(latitude, longitude, distance)` or `_geoBoundingBox([latitude, longitude], [latitude, longitude])` built-in rules to filter on `_geo` field coordinates.", keyword),
Self::Reserved(keyword) => write!(
f,
"`{}` is a reserved keyword and thus can't be used as a filter expression.",
keyword
),
Self::ParseGeoError(error) => write!(f, "{}", error), Self::ParseGeoError(error) => write!(f, "{}", error),
} }
} }
@ -332,25 +324,12 @@ impl<'a> Filter<'a> {
Ok(RoaringBitmap::new()) Ok(RoaringBitmap::new())
} }
} else { } else {
match fid.value() {
attribute @ "_geo" => {
Err(fid.as_external_error(FilterError::ReservedGeo(attribute)))?
}
attribute if attribute.starts_with("_geoPoint(") => {
Err(fid.as_external_error(FilterError::ReservedGeo("_geoPoint")))?
}
attribute @ "_geoDistance" => {
Err(fid.as_external_error(FilterError::Reserved(attribute)))?
}
attribute => {
Err(fid.as_external_error(FilterError::AttributeNotFilterable { Err(fid.as_external_error(FilterError::AttributeNotFilterable {
attribute, attribute: fid.value(),
filterable_fields: filterable_fields.clone(), filterable_fields: filterable_fields.clone(),
}))? }))?
} }
} }
}
}
FilterCondition::Or(subfilters) => { FilterCondition::Or(subfilters) => {
let mut bitmap = RoaringBitmap::new(); let mut bitmap = RoaringBitmap::new();
for f in subfilters { for f in subfilters {