MeiliSearch/milli/src/search/facet/filter.rs

1088 lines
42 KiB
Rust
Raw Normal View History

2022-09-01 11:40:29 +02:00
use std::collections::HashSet;
use std::fmt::{Debug, Display};
use std::ops::Bound::{self, Excluded, Included};
use either::Either;
pub use filter_parser::{Condition, Error as FPError, FilterCondition, Span, Token};
use roaring::RoaringBitmap;
2022-09-01 11:40:29 +02:00
use super::facet_range_search;
2021-09-16 11:56:18 +02:00
use crate::error::{Error, UserError};
use crate::heed_codec::facet::{
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec, OrderedF64Codec,
};
use crate::{distance_between_two_points, lat_lng_to_xyz, FieldId, Index, Result};
/// The maximum number of filters the filter AST can process.
2021-12-07 17:36:45 +01:00
const MAX_FILTER_DEPTH: usize = 2000;
2021-11-06 01:32:12 +01:00
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Filter<'a> {
condition: FilterCondition<'a>,
}
#[derive(Debug)]
2023-02-14 00:14:17 +01:00
pub enum BadGeoError {
Lat(f64),
Lng(f64),
BoundingBoxTopIsBelowBottom(f64, f64),
}
2023-02-14 00:14:17 +01:00
impl std::error::Error for BadGeoError {}
2023-02-14 00:14:17 +01:00
impl Display for BadGeoError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
2023-02-14 00:14:17 +01:00
Self::BoundingBoxTopIsBelowBottom(top, bottom) => {
2023-02-09 17:37:18 +01:00
write!(f, "The top latitude `{top}` is below the bottom latitude `{bottom}`.")
}
2023-02-14 00:14:17 +01:00
Self::Lat(lat) => write!(
2023-02-09 17:37:18 +01:00
f,
"Bad latitude `{}`. Latitude must be contained between -90 and 90 degrees. ",
lat
),
2023-02-14 00:14:17 +01:00
Self::Lng(lng) => write!(
2023-02-09 17:37:18 +01:00
f,
"Bad longitude `{}`. Longitude must be contained between -180 and 180 degrees. ",
lng
),
}
}
}
#[derive(Debug)]
enum FilterError<'a> {
AttributeNotFilterable { attribute: &'a str, filterable_fields: HashSet<String> },
2023-02-14 00:14:17 +01:00
ParseGeoError(BadGeoError),
TooDeep,
}
impl<'a> std::error::Error for FilterError<'a> {}
2023-02-14 00:14:17 +01:00
impl<'a> From<BadGeoError> for FilterError<'a> {
fn from(geo_error: BadGeoError) -> Self {
FilterError::ParseGeoError(geo_error)
}
}
impl<'a> Display for FilterError<'a> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::AttributeNotFilterable { attribute, filterable_fields } => {
if filterable_fields.is_empty() {
write!(
f,
"Attribute `{}` is not filterable. This index does not have configured filterable attributes.",
attribute,
)
} else {
let filterables_list = filterable_fields
.iter()
.map(AsRef::as_ref)
.collect::<Vec<&str>>()
.join(" ");
write!(
f,
"Attribute `{}` is not filterable. Available filterable attributes are: `{}`.",
attribute,
filterables_list,
)
}
}
Self::TooDeep => write!(
f,
"Too many filter conditions, can't process more than {} filters.",
MAX_FILTER_DEPTH
),
Self::ParseGeoError(error) => write!(f, "{}", error),
}
}
}
impl<'a> From<FPError<'a>> for Error {
fn from(error: FPError<'a>) -> Self {
Self::UserError(UserError::InvalidFilter(error.to_string()))
}
}
2021-11-04 15:52:22 +01:00
impl<'a> From<Filter<'a>> for FilterCondition<'a> {
fn from(f: Filter<'a>) -> Self {
f.condition
}
}
impl<'a> Filter<'a> {
2021-10-22 16:38:35 +02:00
pub fn from_array<I, J>(array: I) -> Result<Option<Self>>
2021-06-16 18:33:33 +02:00
where
I: IntoIterator<Item = Either<J, &'a str>>,
J: IntoIterator<Item = &'a str>,
{
let mut ands = vec![];
for either in array {
match either {
Either::Left(array) => {
let mut ors = vec![];
for rule in array {
if let Some(filter) = Self::from_str(rule)? {
ors.push(filter.condition);
}
}
match ors.len() {
2022-11-03 09:34:33 +01:00
0 => (),
1 => ands.push(ors.pop().unwrap()),
2022-11-03 09:34:33 +01:00
_ => ands.push(FilterCondition::Or(ors)),
}
2021-06-16 18:33:33 +02:00
}
Either::Right(rule) => {
if let Some(filter) = Self::from_str(rule)? {
ands.push(filter.condition);
}
}
}
}
let and = if ands.is_empty() {
return Ok(None);
} else if ands.len() == 1 {
ands.pop().unwrap()
} else {
FilterCondition::And(ands)
};
if let Some(token) = and.token_at_depth(MAX_FILTER_DEPTH) {
return Err(token.as_external_error(FilterError::TooDeep).into());
}
Ok(Some(Self { condition: and }))
}
2021-10-14 15:37:59 +02:00
#[allow(clippy::should_implement_trait)]
pub fn from_str(expression: &'a str) -> Result<Option<Self>> {
let condition = match FilterCondition::parse(expression) {
Ok(Some(fc)) => Ok(fc),
Ok(None) => return Ok(None),
Err(e) => Err(Error::UserError(UserError::InvalidFilter(e.to_string()))),
2021-10-22 16:38:35 +02:00
}?;
if let Some(token) = condition.token_at_depth(MAX_FILTER_DEPTH) {
return Err(token.as_external_error(FilterError::TooDeep).into());
}
Ok(Some(Self { condition }))
}
}
impl<'a> Filter<'a> {
pub fn evaluate(&self, rtxn: &heed::RoTxn, index: &Index) -> Result<RoaringBitmap> {
// to avoid doing this for each recursive call we're going to do it ONCE ahead of time
let soft_deleted_documents = index.soft_deleted_documents_ids(rtxn)?;
let filterable_fields = index.filterable_fields(rtxn)?;
// and finally we delete all the soft_deleted_documents, again, only once at the very end
self.inner_evaluate(rtxn, index, &filterable_fields)
.map(|result| result - soft_deleted_documents)
}
fn evaluate_operator(
rtxn: &heed::RoTxn,
index: &Index,
field_id: FieldId,
operator: &Condition<'a>,
2021-06-16 18:33:33 +02:00
) -> Result<RoaringBitmap> {
let numbers_db = index.facet_id_f64_docids;
let strings_db = index.facet_id_string_docids;
// Make sure we always bound the ranges with the field id and the level,
// as the facets values are all in the same database and prefixed by the
// field id and the level.
let (left, right) = match operator {
Condition::GreaterThan(val) => {
(Excluded(val.parse_finite_float()?), Included(f64::MAX))
}
Condition::GreaterThanOrEqual(val) => {
(Included(val.parse_finite_float()?), Included(f64::MAX))
}
Condition::LowerThan(val) => (Included(f64::MIN), Excluded(val.parse_finite_float()?)),
Condition::LowerThanOrEqual(val) => {
(Included(f64::MIN), Included(val.parse_finite_float()?))
}
Condition::Between { from, to } => {
(Included(from.parse_finite_float()?), Included(to.parse_finite_float()?))
}
Condition::Null => {
let is_null = index.null_faceted_documents_ids(rtxn, field_id)?;
return Ok(is_null);
}
2023-03-14 18:08:12 +01:00
Condition::Empty => {
let is_empty = index.empty_faceted_documents_ids(rtxn, field_id)?;
return Ok(is_empty);
}
Condition::Exists => {
2022-05-25 11:55:16 +02:00
let exist = index.exists_faceted_documents_ids(rtxn, field_id)?;
return Ok(exist);
}
Condition::Equal(val) => {
let string_docids = strings_db
.get(
rtxn,
2022-09-05 13:01:36 +02:00
&FacetGroupKey {
field_id,
level: 0,
2023-03-29 10:57:02 +02:00
left_bound: &crate::normalize_facet(val.value()),
2022-09-05 13:01:36 +02:00
},
)?
.map(|v| v.bitmap)
2022-06-09 16:03:49 +02:00
.unwrap_or_default();
let number = val.parse_finite_float().ok();
let number_docids = match number {
Some(n) => numbers_db
.get(rtxn, &FacetGroupKey { field_id, level: 0, left_bound: n })?
.map(|v| v.bitmap)
.unwrap_or_default(),
None => RoaringBitmap::new(),
};
return Ok(string_docids | number_docids);
2021-06-16 18:33:33 +02:00
}
Condition::NotEqual(val) => {
let operator = Condition::Equal(val.clone());
2022-07-18 17:09:52 +02:00
let docids = Self::evaluate_operator(rtxn, index, field_id, &operator)?;
let all_ids = index.documents_ids(rtxn)?;
return Ok(all_ids - docids);
2021-10-22 18:03:39 +02:00
}
};
let mut output = RoaringBitmap::new();
Self::explore_facet_number_levels(rtxn, numbers_db, field_id, left, right, &mut output)?;
Ok(output)
}
/// Aggregates the documents ids that are part of the specified range automatically
/// going deeper through the levels.
fn explore_facet_number_levels(
rtxn: &heed::RoTxn,
2022-09-05 13:01:36 +02:00
db: heed::Database<FacetGroupKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
field_id: FieldId,
left: Bound<f64>,
right: Bound<f64>,
output: &mut RoaringBitmap,
) -> Result<()> {
match (left, right) {
// lower TO upper when lower > upper must return no result
(Included(l), Included(r)) if l > r => return Ok(()),
(Included(l), Excluded(r)) if l >= r => return Ok(()),
(Excluded(l), Excluded(r)) if l >= r => return Ok(()),
(Excluded(l), Included(r)) if l >= r => return Ok(()),
(_, _) => (),
}
facet_range_search::find_docids_of_facet_within_bounds::<OrderedF64Codec>(
rtxn, db, field_id, &left, &right, output,
)?;
Ok(())
}
fn inner_evaluate(
&self,
rtxn: &heed::RoTxn,
index: &Index,
filterable_fields: &HashSet<String>,
) -> Result<RoaringBitmap> {
match &self.condition {
2022-06-14 15:15:05 +02:00
FilterCondition::Not(f) => {
let all_ids = index.documents_ids(rtxn)?;
let selected = Self::inner_evaluate(
&(f.as_ref().clone()).into(),
rtxn,
index,
filterable_fields,
)?;
Ok(all_ids - selected)
2022-06-14 15:15:05 +02:00
}
2022-05-30 13:58:11 +02:00
FilterCondition::In { fid, els } => {
if crate::is_faceted(fid.value(), filterable_fields) {
2022-05-30 13:58:11 +02:00
let field_ids_map = index.fields_ids_map(rtxn)?;
if let Some(fid) = field_ids_map.id(fid.value()) {
let mut bitmap = RoaringBitmap::new();
for el in els {
let op = Condition::Equal(el.clone());
let el_bitmap = Self::evaluate_operator(rtxn, index, fid, &op)?;
bitmap |= el_bitmap;
}
Ok(bitmap)
} else {
Ok(RoaringBitmap::new())
}
} else {
Err(fid.as_external_error(FilterError::AttributeNotFilterable {
2022-05-30 13:58:11 +02:00
attribute: fid.value(),
filterable_fields: filterable_fields.clone(),
}))?
2022-05-30 13:58:11 +02:00
}
}
FilterCondition::Condition { fid, op } => {
if crate::is_faceted(fid.value(), filterable_fields) {
2021-11-07 01:52:19 +01:00
let field_ids_map = index.fields_ids_map(rtxn)?;
if let Some(fid) = field_ids_map.id(fid.value()) {
Self::evaluate_operator(rtxn, index, fid, op)
2021-11-07 01:52:19 +01:00
} else {
Ok(RoaringBitmap::new())
2021-11-07 01:52:19 +01:00
}
} else {
2023-04-01 00:32:06 +02:00
Err(fid.as_external_error(FilterError::AttributeNotFilterable {
attribute: fid.value(),
filterable_fields: filterable_fields.clone(),
}))?
}
2021-06-16 18:33:33 +02:00
}
FilterCondition::Or(subfilters) => {
let mut bitmap = RoaringBitmap::new();
for f in subfilters {
2022-07-18 17:09:52 +02:00
bitmap |=
Self::inner_evaluate(&(f.clone()).into(), rtxn, index, filterable_fields)?;
}
Ok(bitmap)
2021-06-16 18:33:33 +02:00
}
FilterCondition::And(subfilters) => {
let mut subfilters_iter = subfilters.iter();
if let Some(first_subfilter) = subfilters_iter.next() {
2022-07-18 17:09:52 +02:00
let mut bitmap = Self::inner_evaluate(
&(first_subfilter.clone()).into(),
rtxn,
index,
filterable_fields,
)?;
for f in subfilters_iter {
if bitmap.is_empty() {
return Ok(bitmap);
}
2022-07-18 17:09:52 +02:00
bitmap &= Self::inner_evaluate(
&(f.clone()).into(),
rtxn,
index,
filterable_fields,
)?;
}
Ok(bitmap)
} else {
Ok(RoaringBitmap::new())
}
2021-06-16 18:33:33 +02:00
}
2021-10-22 18:03:39 +02:00
FilterCondition::GeoLowerThan { point, radius } => {
2021-11-07 01:52:19 +01:00
if filterable_fields.contains("_geo") {
let base_point: [f64; 2] =
[point[0].parse_finite_float()?, point[1].parse_finite_float()?];
2021-11-04 17:42:06 +01:00
if !(-90.0..=90.0).contains(&base_point[0]) {
2023-02-14 17:03:44 +01:00
return Err(point[0].as_external_error(BadGeoError::Lat(base_point[0])))?;
2021-11-04 17:42:06 +01:00
}
if !(-180.0..=180.0).contains(&base_point[1]) {
2023-02-14 17:03:44 +01:00
return Err(point[1].as_external_error(BadGeoError::Lng(base_point[1])))?;
2021-11-04 17:42:06 +01:00
}
let radius = radius.parse_finite_float()?;
let rtree = match index.geo_rtree(rtxn)? {
Some(rtree) => rtree,
None => return Ok(RoaringBitmap::new()),
};
2021-10-22 18:03:39 +02:00
2021-12-14 12:21:24 +01:00
let xyz_base_point = lat_lng_to_xyz(&base_point);
let result = rtree
2021-12-14 12:21:24 +01:00
.nearest_neighbor_iter(&xyz_base_point)
.take_while(|point| {
2022-11-30 19:44:26 +01:00
distance_between_two_points(&base_point, &point.data.1)
<= radius + f64::EPSILON
})
2021-12-14 12:21:24 +01:00
.map(|point| point.data.0)
.collect();
2021-10-22 18:03:39 +02:00
Ok(result)
} else {
Err(point[0].as_external_error(FilterError::AttributeNotFilterable {
2021-11-04 17:42:06 +01:00
attribute: "_geo",
filterable_fields: filterable_fields.clone(),
}))?
}
2021-10-22 18:03:39 +02:00
}
FilterCondition::GeoBoundingBox { top_right_point, bottom_left_point } => {
if filterable_fields.contains("_geo") {
let top_right: [f64; 2] = [
top_right_point[0].parse_finite_float()?,
top_right_point[1].parse_finite_float()?,
];
let bottom_left: [f64; 2] = [
bottom_left_point[0].parse_finite_float()?,
bottom_left_point[1].parse_finite_float()?,
];
if !(-90.0..=90.0).contains(&top_right[0]) {
2023-02-14 17:03:44 +01:00
return Err(
top_right_point[0].as_external_error(BadGeoError::Lat(top_right[0]))
2023-02-14 17:03:44 +01:00
)?;
}
if !(-180.0..=180.0).contains(&top_right[1]) {
2023-02-14 17:03:44 +01:00
return Err(
top_right_point[1].as_external_error(BadGeoError::Lng(top_right[1]))
2023-02-14 17:03:44 +01:00
)?;
}
if !(-90.0..=90.0).contains(&bottom_left[0]) {
return Err(bottom_left_point[0]
.as_external_error(BadGeoError::Lat(bottom_left[0])))?;
}
if !(-180.0..=180.0).contains(&bottom_left[1]) {
return Err(bottom_left_point[1]
.as_external_error(BadGeoError::Lng(bottom_left[1])))?;
}
if top_right[0] < bottom_left[0] {
return Err(bottom_left_point[1].as_external_error(
BadGeoError::BoundingBoxTopIsBelowBottom(top_right[0], bottom_left[0]),
))?;
}
// Instead of writing a custom `GeoBoundingBox` filter we're simply going to re-use the range
// filter to create the following filter;
// `_geo.lat {top_right[0]} TO {bottom_left[0]} AND _geo.lng {top_right[1]} TO {bottom_left[1]}`
// As we can see, we need to use a bunch of tokens that don't exist in the original filter,
// thus we're going to create tokens that point to a random span but contain our text.
let geo_lat_token = Token::new(
top_right_point[0].original_span(),
Some("_geo.lat".to_string()),
);
let condition_lat = FilterCondition::Condition {
fid: geo_lat_token,
op: Condition::Between {
from: bottom_left_point[0].clone(),
to: top_right_point[0].clone(),
},
};
let selected_lat = Filter { condition: condition_lat }.inner_evaluate(
rtxn,
index,
filterable_fields,
)?;
let geo_lng_token = Token::new(
top_right_point[1].original_span(),
Some("_geo.lng".to_string()),
);
let selected_lng = if top_right[1] < bottom_left[1] {
// In this case the bounding box is wrapping around the earth (going from 180 to -180).
// We need to update the lng part of the filter from;
// `_geo.lng {top_right[1]} TO {bottom_left[1]}` to
// `_geo.lng {bottom_left[1]} TO 180 AND _geo.lng -180 TO {top_right[1]}`
2023-02-02 18:19:56 +01:00
let min_lng_token = Token::new(
top_right_point[1].original_span(),
2023-02-02 18:19:56 +01:00
Some("-180.0".to_string()),
);
let max_lng_token = Token::new(
top_right_point[1].original_span(),
2023-02-02 18:19:56 +01:00
Some("180.0".to_string()),
);
let condition_left = FilterCondition::Condition {
fid: geo_lng_token.clone(),
op: Condition::Between {
from: bottom_left_point[1].clone(),
to: max_lng_token,
},
};
let left = Filter { condition: condition_left }.inner_evaluate(
rtxn,
index,
filterable_fields,
)?;
let condition_right = FilterCondition::Condition {
fid: geo_lng_token,
op: Condition::Between {
2022-10-28 19:01:23 +02:00
from: min_lng_token,
to: top_right_point[1].clone(),
},
};
let right = Filter { condition: condition_right }.inner_evaluate(
rtxn,
index,
filterable_fields,
)?;
2022-10-28 19:01:23 +02:00
left | right
} else {
let condition_lng = FilterCondition::Condition {
fid: geo_lng_token,
op: Condition::Between {
from: bottom_left_point[1].clone(),
to: top_right_point[1].clone(),
},
};
Filter { condition: condition_lng }.inner_evaluate(
rtxn,
index,
filterable_fields,
)?
};
Ok(selected_lat & selected_lng)
} else {
Err(top_right_point[0].as_external_error(
FilterError::AttributeNotFilterable {
attribute: "_geo",
filterable_fields: filterable_fields.clone(),
},
))?
}
}
}
}
}
impl<'a> From<FilterCondition<'a>> for Filter<'a> {
fn from(fc: FilterCondition<'a>) -> Self {
Self { condition: fc }
}
}
#[cfg(test)]
mod tests {
2022-08-04 11:34:10 +02:00
use std::fmt::Write;
2022-12-07 14:11:20 +01:00
use std::iter::FromIterator;
2022-08-04 11:34:10 +02:00
use big_s::S;
use either::Either;
use maplit::hashset;
2022-12-07 14:11:20 +01:00
use roaring::RoaringBitmap;
2022-08-04 11:34:10 +02:00
use crate::index::tests::TempIndex;
use crate::Filter;
#[test]
fn empty_db() {
2022-08-04 10:46:10 +02:00
let index = TempIndex::new();
2022-11-30 19:44:26 +01:00
//Set the filterable fields to be the channel.
2022-08-04 10:46:10 +02:00
index
.update_settings(|settings| {
settings.set_filterable_fields(hashset! { S("PrIcE") });
})
.unwrap();
let rtxn = index.read_txn().unwrap();
let filter = Filter::from_str("PrIcE < 1000").unwrap().unwrap();
let bitmap = filter.evaluate(&rtxn, &index).unwrap();
assert!(bitmap.is_empty());
let filter = Filter::from_str("NOT PrIcE >= 1000").unwrap().unwrap();
let bitmap = filter.evaluate(&rtxn, &index).unwrap();
assert!(bitmap.is_empty());
}
#[test]
fn from_array() {
// Simple array with Left
2021-11-06 01:32:12 +01:00
let condition = Filter::from_array(vec![Either::Left(["channel = mv"])]).unwrap().unwrap();
let expected = Filter::from_str("channel = mv").unwrap().unwrap();
assert_eq!(condition, expected);
// Simple array with Right
2021-11-06 01:32:12 +01:00
let condition = Filter::from_array::<_, Option<&str>>(vec![Either::Right("channel = mv")])
.unwrap()
.unwrap();
let expected = Filter::from_str("channel = mv").unwrap().unwrap();
assert_eq!(condition, expected);
// Array with Left and escaped quote
2021-11-06 01:32:12 +01:00
let condition =
Filter::from_array(vec![Either::Left(["channel = \"Mister Mv\""])]).unwrap().unwrap();
let expected = Filter::from_str("channel = \"Mister Mv\"").unwrap().unwrap();
assert_eq!(condition, expected);
// Array with Right and escaped quote
2021-11-06 01:32:12 +01:00
let condition =
Filter::from_array::<_, Option<&str>>(vec![Either::Right("channel = \"Mister Mv\"")])
.unwrap()
.unwrap();
let expected = Filter::from_str("channel = \"Mister Mv\"").unwrap().unwrap();
assert_eq!(condition, expected);
// Array with Left and escaped simple quote
2021-11-06 01:32:12 +01:00
let condition =
Filter::from_array(vec![Either::Left(["channel = 'Mister Mv'"])]).unwrap().unwrap();
let expected = Filter::from_str("channel = 'Mister Mv'").unwrap().unwrap();
assert_eq!(condition, expected);
// Array with Right and escaped simple quote
2021-11-06 01:32:12 +01:00
let condition =
Filter::from_array::<_, Option<&str>>(vec![Either::Right("channel = 'Mister Mv'")])
.unwrap()
.unwrap();
let expected = Filter::from_str("channel = 'Mister Mv'").unwrap().unwrap();
assert_eq!(condition, expected);
// Simple with parenthesis
2021-11-06 01:32:12 +01:00
let condition =
Filter::from_array(vec![Either::Left(["(channel = mv)"])]).unwrap().unwrap();
let expected = Filter::from_str("(channel = mv)").unwrap().unwrap();
assert_eq!(condition, expected);
// Test that the facet condition is correctly generated.
2021-11-06 01:32:12 +01:00
let condition = Filter::from_array(vec![
Either::Right("channel = gotaga"),
Either::Left(vec!["timestamp = 44", "channel != ponce"]),
])
.unwrap()
.unwrap();
2021-11-06 01:32:12 +01:00
let expected =
Filter::from_str("channel = gotaga AND (timestamp = 44 OR channel != ponce)")
.unwrap()
.unwrap();
assert_eq!(condition, expected);
}
2021-11-07 01:52:19 +01:00
#[test]
fn not_filterable() {
2022-08-04 10:46:10 +02:00
let index = TempIndex::new();
2021-11-07 01:52:19 +01:00
let rtxn = index.read_txn().unwrap();
let filter = Filter::from_str("_geoRadius(42, 150, 10)").unwrap().unwrap();
2021-11-07 01:52:19 +01:00
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().starts_with(
"Attribute `_geo` is not filterable. This index does not have configured filterable attributes."
2021-11-07 01:52:19 +01:00
));
let filter = Filter::from_str("_geoBoundingBox([42, 150], [30, 10])").unwrap().unwrap();
2022-10-28 19:01:23 +02:00
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().starts_with(
"Attribute `_geo` is not filterable. This index does not have configured filterable attributes."
));
let filter = Filter::from_str("dog = \"bernese mountain\"").unwrap().unwrap();
2021-11-07 01:52:19 +01:00
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().starts_with(
"Attribute `dog` is not filterable. This index does not have configured filterable attributes."
2021-11-07 01:52:19 +01:00
));
drop(rtxn);
2022-08-04 10:46:10 +02:00
index
.update_settings(|settings| {
settings.set_searchable_fields(vec![S("title")]);
settings.set_filterable_fields(hashset! { S("title") });
})
.unwrap();
2021-11-07 01:52:19 +01:00
let rtxn = index.read_txn().unwrap();
let filter = Filter::from_str("_geoRadius(-100, 150, 10)").unwrap().unwrap();
2021-11-07 01:52:19 +01:00
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().starts_with(
"Attribute `_geo` is not filterable. Available filterable attributes are: `title`."
));
let filter = Filter::from_str("_geoBoundingBox([42, 150], [30, 10])").unwrap().unwrap();
2022-10-28 19:01:23 +02:00
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().starts_with(
"Attribute `_geo` is not filterable. Available filterable attributes are: `title`."
));
let filter = Filter::from_str("name = 12").unwrap().unwrap();
2021-11-07 01:52:19 +01:00
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().starts_with(
"Attribute `name` is not filterable. Available filterable attributes are: `title`."
));
}
2022-06-09 16:03:49 +02:00
#[test]
fn escaped_quote_in_filter_value_2380() {
2022-08-04 10:46:10 +02:00
let index = TempIndex::new();
index
.add_documents(documents!([
{
"id": "test_1",
"monitor_diagonal": "27' to 30'"
},
{
"id": "test_2",
"monitor_diagonal": "27\" to 30\""
},
{
"id": "test_3",
"monitor_diagonal": "27\" to 30'"
},
]))
.unwrap();
2022-06-09 16:03:49 +02:00
2022-08-04 10:46:10 +02:00
index
.update_settings(|settings| {
settings.set_filterable_fields(hashset!(S("monitor_diagonal")));
})
.unwrap();
2022-06-09 16:03:49 +02:00
let rtxn = index.read_txn().unwrap();
let mut search = crate::Search::new(&rtxn, &index);
// this filter is copy pasted from #2380 with the exact same espace sequence
2022-08-04 10:46:10 +02:00
search.filter(Filter::from_str("monitor_diagonal = '27\" to 30\\''").unwrap().unwrap());
2022-06-09 16:03:49 +02:00
let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
assert_eq!(documents_ids, vec![2]);
2022-08-04 10:46:10 +02:00
search.filter(Filter::from_str(r#"monitor_diagonal = "27' to 30'" "#).unwrap().unwrap());
2022-06-09 16:03:49 +02:00
let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
assert_eq!(documents_ids, vec![0]);
2022-08-04 10:46:10 +02:00
search.filter(Filter::from_str(r#"monitor_diagonal = "27\" to 30\"" "#).unwrap().unwrap());
2022-06-09 16:03:49 +02:00
let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
assert_eq!(documents_ids, vec![1]);
2022-08-04 10:46:10 +02:00
search.filter(Filter::from_str(r#"monitor_diagonal = "27\" to 30'" "#).unwrap().unwrap());
2022-06-09 16:03:49 +02:00
let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
assert_eq!(documents_ids, vec![2]);
}
2022-11-30 19:44:26 +01:00
#[test]
fn zero_radius() {
let index = TempIndex::new();
index
.update_settings(|settings| {
settings.set_filterable_fields(hashset! { S("_geo") });
})
.unwrap();
index
.add_documents(documents!([
{
"id": 1,
"name": "Nàpiz' Milano",
"address": "Viale Vittorio Veneto, 30, 20124, Milan, Italy",
"type": "pizza",
"rating": 9,
"_geo": {
"lat": 45.4777599,
"lng": 9.1967508
}
},
{
"id": 2,
"name": "Artico Gelateria Tradizionale",
"address": "Via Dogana, 1, 20123 Milan, Italy",
"type": "ice cream",
"rating": 10,
"_geo": {
"lat": 45.4632046,
"lng": 9.1719421
}
},
]))
.unwrap();
let rtxn = index.read_txn().unwrap();
let mut search = crate::Search::new(&rtxn, &index);
search.filter(Filter::from_str("_geoRadius(45.4777599, 9.1967508, 0)").unwrap().unwrap());
let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
assert_eq!(documents_ids, vec![0]);
}
#[test]
fn geo_radius_error() {
2022-08-04 10:46:10 +02:00
let index = TempIndex::new();
2022-08-04 10:46:10 +02:00
index
.update_settings(|settings| {
settings.set_searchable_fields(vec![S("_geo"), S("price")]); // to keep the fields order
settings.set_filterable_fields(hashset! { S("_geo"), S("price") });
})
.unwrap();
let rtxn = index.read_txn().unwrap();
2022-11-30 19:44:26 +01:00
2021-11-06 01:32:12 +01:00
// georadius have a bad latitude
let filter = Filter::from_str("_geoRadius(-100, 150, 10)").unwrap().unwrap();
2021-11-06 01:32:12 +01:00
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(
2021-11-06 01:32:12 +01:00
error.to_string().starts_with(
"Bad latitude `-100`. Latitude must be contained between -90 and 90 degrees."
),
"{}",
error.to_string()
);
// georadius have a bad latitude
let filter = Filter::from_str("_geoRadius(-90.0000001, 150, 10)").unwrap().unwrap();
2021-11-06 01:32:12 +01:00
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().contains(
"Bad latitude `-90.0000001`. Latitude must be contained between -90 and 90 degrees."
));
// georadius have a bad longitude
let filter = Filter::from_str("_geoRadius(-10, 250, 10)").unwrap().unwrap();
2021-11-06 01:32:12 +01:00
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(
error.to_string().contains(
"Bad longitude `250`. Longitude must be contained between -180 and 180 degrees."
),
"{}",
error.to_string(),
);
// georadius have a bad longitude
let filter = Filter::from_str("_geoRadius(-10, 180.000001, 10)").unwrap().unwrap();
2021-11-06 01:32:12 +01:00
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().contains(
"Bad longitude `180.000001`. Longitude must be contained between -180 and 180 degrees."
));
}
2022-10-28 19:01:23 +02:00
#[test]
fn geo_bounding_box_error() {
let index = TempIndex::new();
index
.update_settings(|settings| {
settings.set_searchable_fields(vec![S("_geo"), S("price")]); // to keep the fields order
settings.set_filterable_fields(hashset! { S("_geo"), S("price") });
})
.unwrap();
let rtxn = index.read_txn().unwrap();
// geoboundingbox top left coord have a bad latitude
let filter =
Filter::from_str("_geoBoundingBox([-90.0000001, 150], [30, 10])").unwrap().unwrap();
2022-10-28 19:01:23 +02:00
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(
error.to_string().starts_with(
"Bad latitude `-90.0000001`. Latitude must be contained between -90 and 90 degrees."
),
"{}",
error.to_string()
);
// geoboundingbox top left coord have a bad latitude
let filter =
Filter::from_str("_geoBoundingBox([90.0000001, 150], [30, 10])").unwrap().unwrap();
2022-10-28 19:01:23 +02:00
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(
error.to_string().starts_with(
"Bad latitude `90.0000001`. Latitude must be contained between -90 and 90 degrees."
),
"{}",
error.to_string()
);
// geoboundingbox bottom right coord have a bad latitude
let filter =
Filter::from_str("_geoBoundingBox([30, 10], [-90.0000001, 150])").unwrap().unwrap();
2022-10-28 19:01:23 +02:00
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().contains(
"Bad latitude `-90.0000001`. Latitude must be contained between -90 and 90 degrees."
));
// geoboundingbox bottom right coord have a bad latitude
let filter =
Filter::from_str("_geoBoundingBox([30, 10], [90.0000001, 150])").unwrap().unwrap();
2022-10-28 19:01:23 +02:00
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().contains(
"Bad latitude `90.0000001`. Latitude must be contained between -90 and 90 degrees."
));
// geoboundingbox top left coord have a bad longitude
let filter =
Filter::from_str("_geoBoundingBox([-10, 180.000001], [30, 10])").unwrap().unwrap();
2022-10-28 19:01:23 +02:00
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().contains(
"Bad longitude `180.000001`. Longitude must be contained between -180 and 180 degrees."
));
// geoboundingbox top left coord have a bad longitude
let filter =
Filter::from_str("_geoBoundingBox([-10, -180.000001], [30, 10])").unwrap().unwrap();
2022-10-28 19:01:23 +02:00
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().contains(
"Bad longitude `-180.000001`. Longitude must be contained between -180 and 180 degrees."
));
// geoboundingbox bottom right coord have a bad longitude
let filter =
Filter::from_str("_geoBoundingBox([30, 10], [-10, -180.000001])").unwrap().unwrap();
2022-10-28 19:01:23 +02:00
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().contains(
"Bad longitude `-180.000001`. Longitude must be contained between -180 and 180 degrees."
));
// geoboundingbox bottom right coord have a bad longitude
let filter =
Filter::from_str("_geoBoundingBox([30, 10], [-10, 180.000001])").unwrap().unwrap();
2022-10-28 19:01:23 +02:00
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().contains(
"Bad longitude `180.000001`. Longitude must be contained between -180 and 180 degrees."
));
}
#[test]
fn filter_depth() {
// generates a big (2 MiB) filter with too much of ORs.
let tipic_filter = "account_ids=14361 OR ";
let mut filter_string = String::with_capacity(tipic_filter.len() * 14360);
for i in 1..=14361 {
let _ = write!(&mut filter_string, "account_ids={}", i);
if i != 14361 {
let _ = write!(&mut filter_string, " OR ");
}
}
// Note: the filter used to be rejected for being too deep, but that is
// no longer the case
let filter = Filter::from_str(&filter_string).unwrap();
assert!(filter.is_some());
}
2021-12-09 11:14:51 +01:00
#[test]
fn empty_filter() {
let option = Filter::from_str(" ").unwrap();
assert_eq!(option, None);
}
#[test]
fn non_finite_float() {
let index = TempIndex::new();
index
.update_settings(|settings| {
settings.set_searchable_fields(vec![S("price")]); // to keep the fields order
settings.set_filterable_fields(hashset! { S("price") });
})
.unwrap();
index
.add_documents(documents!([
{
"id": "test_1",
"price": "inf"
},
{
"id": "test_2",
"price": "2000"
},
{
"id": "test_3",
"price": "infinity"
},
]))
.unwrap();
let rtxn = index.read_txn().unwrap();
let filter = Filter::from_str("price = inf").unwrap().unwrap();
let result = filter.evaluate(&rtxn, &index).unwrap();
assert!(result.contains(0));
let filter = Filter::from_str("price < inf").unwrap().unwrap();
assert!(matches!(
filter.evaluate(&rtxn, &index),
Err(crate::Error::UserError(crate::error::UserError::InvalidFilter(_)))
));
let filter = Filter::from_str("price = NaN").unwrap().unwrap();
let result = filter.evaluate(&rtxn, &index).unwrap();
assert!(result.is_empty());
let filter = Filter::from_str("price < NaN").unwrap().unwrap();
assert!(matches!(
filter.evaluate(&rtxn, &index),
Err(crate::Error::UserError(crate::error::UserError::InvalidFilter(_)))
));
let filter = Filter::from_str("price = infinity").unwrap().unwrap();
let result = filter.evaluate(&rtxn, &index).unwrap();
assert!(result.contains(2));
let filter = Filter::from_str("price < infinity").unwrap().unwrap();
assert!(matches!(
filter.evaluate(&rtxn, &index),
Err(crate::Error::UserError(crate::error::UserError::InvalidFilter(_)))
));
}
2022-12-07 14:11:20 +01:00
#[test]
fn filter_number() {
let index = TempIndex::new();
index
.update_settings(|settings| {
settings.set_primary_key("id".to_owned());
settings.set_filterable_fields(hashset! { S("id"), S("one"), S("two") });
})
.unwrap();
let mut docs = vec![];
for i in 0..100 {
docs.push(serde_json::json!({ "id": i, "two": i % 10 }));
}
index.add_documents(documents!(docs)).unwrap();
let rtxn = index.read_txn().unwrap();
for i in 0..100 {
let filter_str = format!("id = {i}");
let filter = Filter::from_str(&filter_str).unwrap().unwrap();
let result = filter.evaluate(&rtxn, &index).unwrap();
assert_eq!(result, RoaringBitmap::from_iter([i]));
}
for i in 0..100 {
let filter_str = format!("id > {i}");
let filter = Filter::from_str(&filter_str).unwrap().unwrap();
let result = filter.evaluate(&rtxn, &index).unwrap();
assert_eq!(result, RoaringBitmap::from_iter((i + 1)..100));
}
for i in 0..100 {
let filter_str = format!("id < {i}");
let filter = Filter::from_str(&filter_str).unwrap().unwrap();
let result = filter.evaluate(&rtxn, &index).unwrap();
assert_eq!(result, RoaringBitmap::from_iter(0..i));
}
for i in 0..100 {
let filter_str = format!("id <= {i}");
let filter = Filter::from_str(&filter_str).unwrap().unwrap();
let result = filter.evaluate(&rtxn, &index).unwrap();
assert_eq!(result, RoaringBitmap::from_iter(0..=i));
}
for i in 0..100 {
let filter_str = format!("id >= {i}");
let filter = Filter::from_str(&filter_str).unwrap().unwrap();
let result = filter.evaluate(&rtxn, &index).unwrap();
assert_eq!(result, RoaringBitmap::from_iter(i..100));
}
for i in 0..100 {
for j in i..100 {
let filter_str = format!("id {i} TO {j}");
let filter = Filter::from_str(&filter_str).unwrap().unwrap();
let result = filter.evaluate(&rtxn, &index).unwrap();
assert_eq!(result, RoaringBitmap::from_iter(i..=j));
}
}
let filter = Filter::from_str("one >= 0 OR one <= 0").unwrap().unwrap();
let result = filter.evaluate(&rtxn, &index).unwrap();
assert_eq!(result, RoaringBitmap::default());
let filter = Filter::from_str("one = 0").unwrap().unwrap();
let result = filter.evaluate(&rtxn, &index).unwrap();
assert_eq!(result, RoaringBitmap::default());
for i in 0..10 {
for j in i..10 {
let filter_str = format!("two {i} TO {j}");
let filter = Filter::from_str(&filter_str).unwrap().unwrap();
let result = filter.evaluate(&rtxn, &index).unwrap();
assert_eq!(
result,
RoaringBitmap::from_iter((0..100).filter(|x| (i..=j).contains(&(x % 10))))
);
}
}
let filter = Filter::from_str("two != 0").unwrap().unwrap();
let result = filter.evaluate(&rtxn, &index).unwrap();
assert_eq!(result, RoaringBitmap::from_iter((0..100).filter(|x| x % 10 != 0)));
}
}