689: Handle non-finite floats consistently in filters r=irevoire a=dureuill

# Pull Request

## Related issue

Related meilisearch/meilisearch#3000

## What does this PR do?

### User

- Filters using `field = inf`, (or `infinite`, `NaN`) now match the value as a string rather than returning an internal error.
- Filters using `field < inf` (or other comparison operators) now return an invalid_filter error rather than returning an internal error, much like when using `field < aaa`.

### Implementation

- Add new `NonFiniteFloat` error variants to the filter-parser errors
- Add `Token::parse_as_finite_float` that can fail both when the string is not a float and when the float is not finite
- Refactor `Filter::inner_evaluate` to always use `parse_as_finite_float` instead of just `parse`
- Add corresponding tests

## PR checklist
Please check if your PR fulfills the following requirements:
- [x] Does this PR fix an existing issue, or have you listed the changes applied in the PR description (and why they are needed)?
- [x] Have you read the contributing guidelines?
- [x] Have you made sure that the title is accurate and descriptive of the changes?

Thank you so much for contributing to Meilisearch!


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
This commit is contained in:
bors[bot] 2022-11-08 13:24:38 +00:00 committed by GitHub
commit d85cd9bf1a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 84 additions and 15 deletions

View File

@ -65,6 +65,7 @@ pub enum ErrorKind<'a> {
MalformedValue, MalformedValue,
InOpeningBracket, InOpeningBracket,
InClosingBracket, InClosingBracket,
NonFiniteFloat,
InExpectedValue(ExpectedValueKind), InExpectedValue(ExpectedValueKind),
ReservedKeyword(String), ReservedKeyword(String),
MissingClosingDelimiter(char), MissingClosingDelimiter(char),
@ -167,6 +168,9 @@ impl<'a> Display for Error<'a> {
ErrorKind::InClosingBracket => { ErrorKind::InClosingBracket => {
writeln!(f, "Expected matching `]` after the list of field names given to `IN[`")? writeln!(f, "Expected matching `]` after the list of field names given to `IN[`")?
} }
ErrorKind::NonFiniteFloat => {
writeln!(f, "Non finite floats are not supported")?
}
ErrorKind::InExpectedValue(ExpectedValueKind::ReservedKeyword) => { ErrorKind::InExpectedValue(ExpectedValueKind::ReservedKeyword) => {
writeln!(f, "Expected only comma-separated field names inside `IN[..]` but instead found `{escaped_input}`, which is a keyword. To use `{escaped_input}` as a field name or a value, surround it by quotes.")? writeln!(f, "Expected only comma-separated field names inside `IN[..]` but instead found `{escaped_input}`, which is a keyword. To use `{escaped_input}` as a field name or a value, surround it by quotes.")?
} }

View File

@ -44,7 +44,6 @@ mod error;
mod value; mod value;
use std::fmt::Debug; use std::fmt::Debug;
use std::str::FromStr;
pub use condition::{parse_condition, parse_to, Condition}; pub use condition::{parse_condition, parse_to, Condition};
use condition::{parse_exists, parse_not_exists}; use condition::{parse_exists, parse_not_exists};
@ -100,12 +99,13 @@ impl<'a> Token<'a> {
Error::new_from_external(self.span, error) Error::new_from_external(self.span, error)
} }
pub fn parse<T>(&self) -> Result<T, Error> pub fn parse_finite_float(&self) -> Result<f64, Error> {
where let value: f64 = self.span.parse().map_err(|e| self.as_external_error(e))?;
T: FromStr, if value.is_finite() {
T::Err: std::error::Error, Ok(value)
{ } else {
self.span.parse().map_err(|e| self.as_external_error(e)) Err(Error::new_from_kind(self.span, ErrorKind::NonFiniteFloat))
}
} }
} }

View File

@ -169,11 +169,19 @@ impl<'a> Filter<'a> {
// field id and the level. // field id and the level.
let (left, right) = match operator { let (left, right) = match operator {
Condition::GreaterThan(val) => (Excluded(val.parse()?), Included(f64::MAX)), Condition::GreaterThan(val) => {
Condition::GreaterThanOrEqual(val) => (Included(val.parse()?), Included(f64::MAX)), (Excluded(val.parse_finite_float()?), Included(f64::MAX))
Condition::LowerThan(val) => (Included(f64::MIN), Excluded(val.parse()?)), }
Condition::LowerThanOrEqual(val) => (Included(f64::MIN), Included(val.parse()?)), Condition::GreaterThanOrEqual(val) => {
Condition::Between { from, to } => (Included(from.parse()?), Included(to.parse()?)), (Included(val.parse_finite_float()?), Included(f64::MAX))
}
Condition::LowerThan(val) => (Included(f64::MIN), Excluded(val.parse_finite_float()?)),
Condition::LowerThanOrEqual(val) => {
(Included(f64::MIN), Included(val.parse_finite_float()?))
}
Condition::Between { from, to } => {
(Included(from.parse_finite_float()?), Included(to.parse_finite_float()?))
}
Condition::Exists => { Condition::Exists => {
let exist = index.exists_faceted_documents_ids(rtxn, field_id)?; let exist = index.exists_faceted_documents_ids(rtxn, field_id)?;
return Ok(exist); return Ok(exist);
@ -190,7 +198,7 @@ impl<'a> Filter<'a> {
)? )?
.map(|v| v.bitmap) .map(|v| v.bitmap)
.unwrap_or_default(); .unwrap_or_default();
let number = val.parse::<f64>().ok(); let number = val.parse_finite_float().ok();
let number_docids = match number { let number_docids = match number {
Some(n) => { Some(n) => {
let n = Included(n); let n = Included(n);
@ -389,7 +397,8 @@ impl<'a> Filter<'a> {
} }
FilterCondition::GeoLowerThan { point, radius } => { FilterCondition::GeoLowerThan { point, radius } => {
if filterable_fields.contains("_geo") { if filterable_fields.contains("_geo") {
let base_point: [f64; 2] = [point[0].parse()?, point[1].parse()?]; let base_point: [f64; 2] =
[point[0].parse_finite_float()?, point[1].parse_finite_float()?];
if !(-90.0..=90.0).contains(&base_point[0]) { if !(-90.0..=90.0).contains(&base_point[0]) {
return Err( return Err(
point[0].as_external_error(FilterError::BadGeoLat(base_point[0])) point[0].as_external_error(FilterError::BadGeoLat(base_point[0]))
@ -400,7 +409,7 @@ impl<'a> Filter<'a> {
point[1].as_external_error(FilterError::BadGeoLng(base_point[1])) point[1].as_external_error(FilterError::BadGeoLng(base_point[1]))
)?; )?;
} }
let radius = radius.parse()?; let radius = radius.parse_finite_float()?;
let rtree = match index.geo_rtree(rtxn)? { let rtree = match index.geo_rtree(rtxn)? {
Some(rtree) => rtree, Some(rtree) => rtree,
None => return Ok(RoaringBitmap::new()), None => return Ok(RoaringBitmap::new()),
@ -689,4 +698,60 @@ mod tests {
let option = Filter::from_str(" ").unwrap(); let option = Filter::from_str(" ").unwrap();
assert_eq!(option, None); assert_eq!(option, None);
} }
#[test]
fn non_finite_float() {
let index = TempIndex::new();
index
.update_settings(|settings| {
settings.set_searchable_fields(vec![S("price")]); // to keep the fields order
settings.set_filterable_fields(hashset! { S("price") });
})
.unwrap();
index
.add_documents(documents!([
{
"id": "test_1",
"price": "inf"
},
{
"id": "test_2",
"price": "2000"
},
{
"id": "test_3",
"price": "infinity"
},
]))
.unwrap();
let rtxn = index.read_txn().unwrap();
let filter = Filter::from_str("price = inf").unwrap().unwrap();
let result = filter.evaluate(&rtxn, &index).unwrap();
assert!(result.contains(0));
let filter = Filter::from_str("price < inf").unwrap().unwrap();
assert!(matches!(
filter.evaluate(&rtxn, &index),
Err(crate::Error::UserError(crate::error::UserError::InvalidFilter(_)))
));
let filter = Filter::from_str("price = NaN").unwrap().unwrap();
let result = filter.evaluate(&rtxn, &index).unwrap();
assert!(result.is_empty());
let filter = Filter::from_str("price < NaN").unwrap().unwrap();
assert!(matches!(
filter.evaluate(&rtxn, &index),
Err(crate::Error::UserError(crate::error::UserError::InvalidFilter(_)))
));
let filter = Filter::from_str("price = infinity").unwrap().unwrap();
let result = filter.evaluate(&rtxn, &index).unwrap();
assert!(result.contains(2));
let filter = Filter::from_str("price < infinity").unwrap().unwrap();
assert!(matches!(
filter.evaluate(&rtxn, &index),
Err(crate::Error::UserError(crate::error::UserError::InvalidFilter(_)))
));
}
} }