1135 lines
44 KiB
Rust
1135 lines
44 KiB
Rust
use std::collections::HashSet;
|
||
use std::fmt::{Debug, Display};
|
||
use std::ops::Bound::{self, Excluded, Included};
|
||
|
||
use either::Either;
|
||
pub use filter_parser::{Condition, Error as FPError, FilterCondition, Span, Token};
|
||
use roaring::RoaringBitmap;
|
||
use serde_json::Value;
|
||
|
||
use super::facet_range_search;
|
||
use crate::error::{Error, UserError};
|
||
use crate::heed_codec::facet::{
|
||
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec, OrderedF64Codec,
|
||
};
|
||
use crate::{distance_between_two_points, lat_lng_to_xyz, FieldId, Index, Result};
|
||
|
||
/// The maximum number of filters the filter AST can process.
|
||
const MAX_FILTER_DEPTH: usize = 2000;
|
||
|
||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||
pub struct Filter<'a> {
|
||
condition: FilterCondition<'a>,
|
||
}
|
||
|
||
#[derive(Debug)]
|
||
pub enum BadGeoError {
|
||
Lat(f64),
|
||
Lng(f64),
|
||
BoundingBoxTopIsBelowBottom(f64, f64),
|
||
}
|
||
|
||
impl std::error::Error for BadGeoError {}
|
||
|
||
impl Display for BadGeoError {
|
||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||
match self {
|
||
Self::BoundingBoxTopIsBelowBottom(top, bottom) => {
|
||
write!(f, "The top latitude `{top}` is below the bottom latitude `{bottom}`.")
|
||
}
|
||
Self::Lat(lat) => write!(
|
||
f,
|
||
"Bad latitude `{}`. Latitude must be contained between -90 and 90 degrees. ",
|
||
lat
|
||
),
|
||
Self::Lng(lng) => write!(
|
||
f,
|
||
"Bad longitude `{}`. Longitude must be contained between -180 and 180 degrees. ",
|
||
lng
|
||
),
|
||
}
|
||
}
|
||
}
|
||
|
||
#[derive(Debug)]
|
||
enum FilterError<'a> {
|
||
AttributeNotFilterable { attribute: &'a str, filterable_fields: HashSet<String> },
|
||
ParseGeoError(BadGeoError),
|
||
TooDeep,
|
||
}
|
||
impl<'a> std::error::Error for FilterError<'a> {}
|
||
|
||
impl<'a> From<BadGeoError> for FilterError<'a> {
|
||
fn from(geo_error: BadGeoError) -> Self {
|
||
FilterError::ParseGeoError(geo_error)
|
||
}
|
||
}
|
||
|
||
impl<'a> Display for FilterError<'a> {
|
||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||
match self {
|
||
Self::AttributeNotFilterable { attribute, filterable_fields } => {
|
||
if filterable_fields.is_empty() {
|
||
write!(
|
||
f,
|
||
"Attribute `{}` is not filterable. This index does not have configured filterable attributes.",
|
||
attribute,
|
||
)
|
||
} else {
|
||
let filterables_list = filterable_fields
|
||
.iter()
|
||
.map(AsRef::as_ref)
|
||
.collect::<Vec<&str>>()
|
||
.join(" ");
|
||
|
||
write!(
|
||
f,
|
||
"Attribute `{}` is not filterable. Available filterable attributes are: `{}`.",
|
||
attribute,
|
||
filterables_list,
|
||
)
|
||
}
|
||
}
|
||
Self::TooDeep => write!(
|
||
f,
|
||
"Too many filter conditions, can't process more than {} filters.",
|
||
MAX_FILTER_DEPTH
|
||
),
|
||
Self::ParseGeoError(error) => write!(f, "{}", error),
|
||
}
|
||
}
|
||
}
|
||
|
||
impl<'a> From<FPError<'a>> for Error {
|
||
fn from(error: FPError<'a>) -> Self {
|
||
Self::UserError(UserError::InvalidFilter(error.to_string()))
|
||
}
|
||
}
|
||
|
||
impl<'a> From<Filter<'a>> for FilterCondition<'a> {
|
||
fn from(f: Filter<'a>) -> Self {
|
||
f.condition
|
||
}
|
||
}
|
||
|
||
impl<'a> Filter<'a> {
|
||
pub fn from_json(facets: &'a Value) -> Result<Option<Self>> {
|
||
match facets {
|
||
Value::String(expr) => {
|
||
let condition = Filter::from_str(expr)?;
|
||
Ok(condition)
|
||
}
|
||
Value::Array(arr) => Self::parse_filter_array(arr),
|
||
v => Err(Error::UserError(UserError::InvalidFilterExpression(
|
||
&["String", "Array"],
|
||
v.clone(),
|
||
))),
|
||
}
|
||
}
|
||
|
||
fn parse_filter_array(arr: &'a [Value]) -> Result<Option<Self>> {
|
||
let mut ands = Vec::new();
|
||
for value in arr {
|
||
match value {
|
||
Value::String(s) => ands.push(Either::Right(s.as_str())),
|
||
Value::Array(arr) => {
|
||
let mut ors = Vec::new();
|
||
for value in arr {
|
||
match value {
|
||
Value::String(s) => ors.push(s.as_str()),
|
||
v => {
|
||
return Err(Error::UserError(UserError::InvalidFilterExpression(
|
||
&["String"],
|
||
v.clone(),
|
||
)))
|
||
}
|
||
}
|
||
}
|
||
ands.push(Either::Left(ors));
|
||
}
|
||
v => {
|
||
return Err(Error::UserError(UserError::InvalidFilterExpression(
|
||
&["String", "[String]"],
|
||
v.clone(),
|
||
)))
|
||
}
|
||
}
|
||
}
|
||
|
||
Filter::from_array(ands)
|
||
}
|
||
|
||
pub fn from_array<I, J>(array: I) -> Result<Option<Self>>
|
||
where
|
||
I: IntoIterator<Item = Either<J, &'a str>>,
|
||
J: IntoIterator<Item = &'a str>,
|
||
{
|
||
let mut ands = vec![];
|
||
|
||
for either in array {
|
||
match either {
|
||
Either::Left(array) => {
|
||
let mut ors = vec![];
|
||
for rule in array {
|
||
if let Some(filter) = Self::from_str(rule)? {
|
||
ors.push(filter.condition);
|
||
}
|
||
}
|
||
|
||
match ors.len() {
|
||
0 => (),
|
||
1 => ands.push(ors.pop().unwrap()),
|
||
_ => ands.push(FilterCondition::Or(ors)),
|
||
}
|
||
}
|
||
Either::Right(rule) => {
|
||
if let Some(filter) = Self::from_str(rule)? {
|
||
ands.push(filter.condition);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
let and = if ands.is_empty() {
|
||
return Ok(None);
|
||
} else if ands.len() == 1 {
|
||
ands.pop().unwrap()
|
||
} else {
|
||
FilterCondition::And(ands)
|
||
};
|
||
|
||
if let Some(token) = and.token_at_depth(MAX_FILTER_DEPTH) {
|
||
return Err(token.as_external_error(FilterError::TooDeep).into());
|
||
}
|
||
|
||
Ok(Some(Self { condition: and }))
|
||
}
|
||
|
||
#[allow(clippy::should_implement_trait)]
|
||
pub fn from_str(expression: &'a str) -> Result<Option<Self>> {
|
||
let condition = match FilterCondition::parse(expression) {
|
||
Ok(Some(fc)) => Ok(fc),
|
||
Ok(None) => return Ok(None),
|
||
Err(e) => Err(Error::UserError(UserError::InvalidFilter(e.to_string()))),
|
||
}?;
|
||
|
||
if let Some(token) = condition.token_at_depth(MAX_FILTER_DEPTH) {
|
||
return Err(token.as_external_error(FilterError::TooDeep).into());
|
||
}
|
||
|
||
Ok(Some(Self { condition }))
|
||
}
|
||
}
|
||
|
||
impl<'a> Filter<'a> {
|
||
pub fn evaluate(&self, rtxn: &heed::RoTxn, index: &Index) -> Result<RoaringBitmap> {
|
||
// to avoid doing this for each recursive call we're going to do it ONCE ahead of time
|
||
let soft_deleted_documents = index.soft_deleted_documents_ids(rtxn)?;
|
||
let filterable_fields = index.filterable_fields(rtxn)?;
|
||
|
||
// and finally we delete all the soft_deleted_documents, again, only once at the very end
|
||
self.inner_evaluate(rtxn, index, &filterable_fields)
|
||
.map(|result| result - soft_deleted_documents)
|
||
}
|
||
|
||
fn evaluate_operator(
|
||
rtxn: &heed::RoTxn,
|
||
index: &Index,
|
||
field_id: FieldId,
|
||
operator: &Condition<'a>,
|
||
) -> Result<RoaringBitmap> {
|
||
let numbers_db = index.facet_id_f64_docids;
|
||
let strings_db = index.facet_id_string_docids;
|
||
|
||
// Make sure we always bound the ranges with the field id and the level,
|
||
// as the facets values are all in the same database and prefixed by the
|
||
// field id and the level.
|
||
|
||
let (left, right) = match operator {
|
||
Condition::GreaterThan(val) => {
|
||
(Excluded(val.parse_finite_float()?), Included(f64::MAX))
|
||
}
|
||
Condition::GreaterThanOrEqual(val) => {
|
||
(Included(val.parse_finite_float()?), Included(f64::MAX))
|
||
}
|
||
Condition::LowerThan(val) => (Included(f64::MIN), Excluded(val.parse_finite_float()?)),
|
||
Condition::LowerThanOrEqual(val) => {
|
||
(Included(f64::MIN), Included(val.parse_finite_float()?))
|
||
}
|
||
Condition::Between { from, to } => {
|
||
(Included(from.parse_finite_float()?), Included(to.parse_finite_float()?))
|
||
}
|
||
Condition::Null => {
|
||
let is_null = index.null_faceted_documents_ids(rtxn, field_id)?;
|
||
return Ok(is_null);
|
||
}
|
||
Condition::Empty => {
|
||
let is_empty = index.empty_faceted_documents_ids(rtxn, field_id)?;
|
||
return Ok(is_empty);
|
||
}
|
||
Condition::Exists => {
|
||
let exist = index.exists_faceted_documents_ids(rtxn, field_id)?;
|
||
return Ok(exist);
|
||
}
|
||
Condition::Equal(val) => {
|
||
let string_docids = strings_db
|
||
.get(
|
||
rtxn,
|
||
&FacetGroupKey {
|
||
field_id,
|
||
level: 0,
|
||
left_bound: &crate::normalize_facet(val.value()),
|
||
},
|
||
)?
|
||
.map(|v| v.bitmap)
|
||
.unwrap_or_default();
|
||
let number = val.parse_finite_float().ok();
|
||
let number_docids = match number {
|
||
Some(n) => numbers_db
|
||
.get(rtxn, &FacetGroupKey { field_id, level: 0, left_bound: n })?
|
||
.map(|v| v.bitmap)
|
||
.unwrap_or_default(),
|
||
None => RoaringBitmap::new(),
|
||
};
|
||
return Ok(string_docids | number_docids);
|
||
}
|
||
Condition::NotEqual(val) => {
|
||
let operator = Condition::Equal(val.clone());
|
||
let docids = Self::evaluate_operator(rtxn, index, field_id, &operator)?;
|
||
let all_ids = index.documents_ids(rtxn)?;
|
||
return Ok(all_ids - docids);
|
||
}
|
||
};
|
||
|
||
let mut output = RoaringBitmap::new();
|
||
Self::explore_facet_number_levels(rtxn, numbers_db, field_id, left, right, &mut output)?;
|
||
Ok(output)
|
||
}
|
||
|
||
/// Aggregates the documents ids that are part of the specified range automatically
|
||
/// going deeper through the levels.
|
||
fn explore_facet_number_levels(
|
||
rtxn: &heed::RoTxn,
|
||
db: heed::Database<FacetGroupKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
|
||
field_id: FieldId,
|
||
left: Bound<f64>,
|
||
right: Bound<f64>,
|
||
output: &mut RoaringBitmap,
|
||
) -> Result<()> {
|
||
match (left, right) {
|
||
// lower TO upper when lower > upper must return no result
|
||
(Included(l), Included(r)) if l > r => return Ok(()),
|
||
(Included(l), Excluded(r)) if l >= r => return Ok(()),
|
||
(Excluded(l), Excluded(r)) if l >= r => return Ok(()),
|
||
(Excluded(l), Included(r)) if l >= r => return Ok(()),
|
||
(_, _) => (),
|
||
}
|
||
facet_range_search::find_docids_of_facet_within_bounds::<OrderedF64Codec>(
|
||
rtxn, db, field_id, &left, &right, output,
|
||
)?;
|
||
|
||
Ok(())
|
||
}
|
||
|
||
fn inner_evaluate(
|
||
&self,
|
||
rtxn: &heed::RoTxn,
|
||
index: &Index,
|
||
filterable_fields: &HashSet<String>,
|
||
) -> Result<RoaringBitmap> {
|
||
match &self.condition {
|
||
FilterCondition::Not(f) => {
|
||
let all_ids = index.documents_ids(rtxn)?;
|
||
let selected = Self::inner_evaluate(
|
||
&(f.as_ref().clone()).into(),
|
||
rtxn,
|
||
index,
|
||
filterable_fields,
|
||
)?;
|
||
Ok(all_ids - selected)
|
||
}
|
||
FilterCondition::In { fid, els } => {
|
||
if crate::is_faceted(fid.value(), filterable_fields) {
|
||
let field_ids_map = index.fields_ids_map(rtxn)?;
|
||
|
||
if let Some(fid) = field_ids_map.id(fid.value()) {
|
||
let mut bitmap = RoaringBitmap::new();
|
||
|
||
for el in els {
|
||
let op = Condition::Equal(el.clone());
|
||
let el_bitmap = Self::evaluate_operator(rtxn, index, fid, &op)?;
|
||
bitmap |= el_bitmap;
|
||
}
|
||
Ok(bitmap)
|
||
} else {
|
||
Ok(RoaringBitmap::new())
|
||
}
|
||
} else {
|
||
Err(fid.as_external_error(FilterError::AttributeNotFilterable {
|
||
attribute: fid.value(),
|
||
filterable_fields: filterable_fields.clone(),
|
||
}))?
|
||
}
|
||
}
|
||
FilterCondition::Condition { fid, op } => {
|
||
if crate::is_faceted(fid.value(), filterable_fields) {
|
||
let field_ids_map = index.fields_ids_map(rtxn)?;
|
||
if let Some(fid) = field_ids_map.id(fid.value()) {
|
||
Self::evaluate_operator(rtxn, index, fid, op)
|
||
} else {
|
||
Ok(RoaringBitmap::new())
|
||
}
|
||
} else {
|
||
Err(fid.as_external_error(FilterError::AttributeNotFilterable {
|
||
attribute: fid.value(),
|
||
filterable_fields: filterable_fields.clone(),
|
||
}))?
|
||
}
|
||
}
|
||
FilterCondition::Or(subfilters) => {
|
||
let mut bitmap = RoaringBitmap::new();
|
||
for f in subfilters {
|
||
bitmap |=
|
||
Self::inner_evaluate(&(f.clone()).into(), rtxn, index, filterable_fields)?;
|
||
}
|
||
Ok(bitmap)
|
||
}
|
||
FilterCondition::And(subfilters) => {
|
||
let mut subfilters_iter = subfilters.iter();
|
||
if let Some(first_subfilter) = subfilters_iter.next() {
|
||
let mut bitmap = Self::inner_evaluate(
|
||
&(first_subfilter.clone()).into(),
|
||
rtxn,
|
||
index,
|
||
filterable_fields,
|
||
)?;
|
||
for f in subfilters_iter {
|
||
if bitmap.is_empty() {
|
||
return Ok(bitmap);
|
||
}
|
||
bitmap &= Self::inner_evaluate(
|
||
&(f.clone()).into(),
|
||
rtxn,
|
||
index,
|
||
filterable_fields,
|
||
)?;
|
||
}
|
||
Ok(bitmap)
|
||
} else {
|
||
Ok(RoaringBitmap::new())
|
||
}
|
||
}
|
||
FilterCondition::GeoLowerThan { point, radius } => {
|
||
if filterable_fields.contains("_geo") {
|
||
let base_point: [f64; 2] =
|
||
[point[0].parse_finite_float()?, point[1].parse_finite_float()?];
|
||
if !(-90.0..=90.0).contains(&base_point[0]) {
|
||
return Err(point[0].as_external_error(BadGeoError::Lat(base_point[0])))?;
|
||
}
|
||
if !(-180.0..=180.0).contains(&base_point[1]) {
|
||
return Err(point[1].as_external_error(BadGeoError::Lng(base_point[1])))?;
|
||
}
|
||
let radius = radius.parse_finite_float()?;
|
||
let rtree = match index.geo_rtree(rtxn)? {
|
||
Some(rtree) => rtree,
|
||
None => return Ok(RoaringBitmap::new()),
|
||
};
|
||
|
||
let xyz_base_point = lat_lng_to_xyz(&base_point);
|
||
|
||
let result = rtree
|
||
.nearest_neighbor_iter(&xyz_base_point)
|
||
.take_while(|point| {
|
||
distance_between_two_points(&base_point, &point.data.1)
|
||
<= radius + f64::EPSILON
|
||
})
|
||
.map(|point| point.data.0)
|
||
.collect();
|
||
|
||
Ok(result)
|
||
} else {
|
||
Err(point[0].as_external_error(FilterError::AttributeNotFilterable {
|
||
attribute: "_geo",
|
||
filterable_fields: filterable_fields.clone(),
|
||
}))?
|
||
}
|
||
}
|
||
FilterCondition::GeoBoundingBox { top_right_point, bottom_left_point } => {
|
||
if filterable_fields.contains("_geo") {
|
||
let top_right: [f64; 2] = [
|
||
top_right_point[0].parse_finite_float()?,
|
||
top_right_point[1].parse_finite_float()?,
|
||
];
|
||
let bottom_left: [f64; 2] = [
|
||
bottom_left_point[0].parse_finite_float()?,
|
||
bottom_left_point[1].parse_finite_float()?,
|
||
];
|
||
if !(-90.0..=90.0).contains(&top_right[0]) {
|
||
return Err(
|
||
top_right_point[0].as_external_error(BadGeoError::Lat(top_right[0]))
|
||
)?;
|
||
}
|
||
if !(-180.0..=180.0).contains(&top_right[1]) {
|
||
return Err(
|
||
top_right_point[1].as_external_error(BadGeoError::Lng(top_right[1]))
|
||
)?;
|
||
}
|
||
if !(-90.0..=90.0).contains(&bottom_left[0]) {
|
||
return Err(bottom_left_point[0]
|
||
.as_external_error(BadGeoError::Lat(bottom_left[0])))?;
|
||
}
|
||
if !(-180.0..=180.0).contains(&bottom_left[1]) {
|
||
return Err(bottom_left_point[1]
|
||
.as_external_error(BadGeoError::Lng(bottom_left[1])))?;
|
||
}
|
||
if top_right[0] < bottom_left[0] {
|
||
return Err(bottom_left_point[1].as_external_error(
|
||
BadGeoError::BoundingBoxTopIsBelowBottom(top_right[0], bottom_left[0]),
|
||
))?;
|
||
}
|
||
|
||
// Instead of writing a custom `GeoBoundingBox` filter we're simply going to re-use the range
|
||
// filter to create the following filter;
|
||
// `_geo.lat {top_right[0]} TO {bottom_left[0]} AND _geo.lng {top_right[1]} TO {bottom_left[1]}`
|
||
// As we can see, we need to use a bunch of tokens that don't exist in the original filter,
|
||
// thus we're going to create tokens that point to a random span but contain our text.
|
||
|
||
let geo_lat_token = Token::new(
|
||
top_right_point[0].original_span(),
|
||
Some("_geo.lat".to_string()),
|
||
);
|
||
|
||
let condition_lat = FilterCondition::Condition {
|
||
fid: geo_lat_token,
|
||
op: Condition::Between {
|
||
from: bottom_left_point[0].clone(),
|
||
to: top_right_point[0].clone(),
|
||
},
|
||
};
|
||
|
||
let selected_lat = Filter { condition: condition_lat }.inner_evaluate(
|
||
rtxn,
|
||
index,
|
||
filterable_fields,
|
||
)?;
|
||
|
||
let geo_lng_token = Token::new(
|
||
top_right_point[1].original_span(),
|
||
Some("_geo.lng".to_string()),
|
||
);
|
||
let selected_lng = if top_right[1] < bottom_left[1] {
|
||
// In this case the bounding box is wrapping around the earth (going from 180 to -180).
|
||
// We need to update the lng part of the filter from;
|
||
// `_geo.lng {top_right[1]} TO {bottom_left[1]}` to
|
||
// `_geo.lng {bottom_left[1]} TO 180 AND _geo.lng -180 TO {top_right[1]}`
|
||
|
||
let min_lng_token = Token::new(
|
||
top_right_point[1].original_span(),
|
||
Some("-180.0".to_string()),
|
||
);
|
||
let max_lng_token = Token::new(
|
||
top_right_point[1].original_span(),
|
||
Some("180.0".to_string()),
|
||
);
|
||
|
||
let condition_left = FilterCondition::Condition {
|
||
fid: geo_lng_token.clone(),
|
||
op: Condition::Between {
|
||
from: bottom_left_point[1].clone(),
|
||
to: max_lng_token,
|
||
},
|
||
};
|
||
let left = Filter { condition: condition_left }.inner_evaluate(
|
||
rtxn,
|
||
index,
|
||
filterable_fields,
|
||
)?;
|
||
|
||
let condition_right = FilterCondition::Condition {
|
||
fid: geo_lng_token,
|
||
op: Condition::Between {
|
||
from: min_lng_token,
|
||
to: top_right_point[1].clone(),
|
||
},
|
||
};
|
||
let right = Filter { condition: condition_right }.inner_evaluate(
|
||
rtxn,
|
||
index,
|
||
filterable_fields,
|
||
)?;
|
||
|
||
left | right
|
||
} else {
|
||
let condition_lng = FilterCondition::Condition {
|
||
fid: geo_lng_token,
|
||
op: Condition::Between {
|
||
from: bottom_left_point[1].clone(),
|
||
to: top_right_point[1].clone(),
|
||
},
|
||
};
|
||
Filter { condition: condition_lng }.inner_evaluate(
|
||
rtxn,
|
||
index,
|
||
filterable_fields,
|
||
)?
|
||
};
|
||
|
||
Ok(selected_lat & selected_lng)
|
||
} else {
|
||
Err(top_right_point[0].as_external_error(
|
||
FilterError::AttributeNotFilterable {
|
||
attribute: "_geo",
|
||
filterable_fields: filterable_fields.clone(),
|
||
},
|
||
))?
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
impl<'a> From<FilterCondition<'a>> for Filter<'a> {
|
||
fn from(fc: FilterCondition<'a>) -> Self {
|
||
Self { condition: fc }
|
||
}
|
||
}
|
||
|
||
#[cfg(test)]
|
||
mod tests {
|
||
use std::fmt::Write;
|
||
use std::iter::FromIterator;
|
||
|
||
use big_s::S;
|
||
use either::Either;
|
||
use maplit::hashset;
|
||
use roaring::RoaringBitmap;
|
||
|
||
use crate::index::tests::TempIndex;
|
||
use crate::Filter;
|
||
|
||
#[test]
|
||
fn empty_db() {
|
||
let index = TempIndex::new();
|
||
//Set the filterable fields to be the channel.
|
||
index
|
||
.update_settings(|settings| {
|
||
settings.set_filterable_fields(hashset! { S("PrIcE") });
|
||
})
|
||
.unwrap();
|
||
|
||
let rtxn = index.read_txn().unwrap();
|
||
|
||
let filter = Filter::from_str("PrIcE < 1000").unwrap().unwrap();
|
||
let bitmap = filter.evaluate(&rtxn, &index).unwrap();
|
||
assert!(bitmap.is_empty());
|
||
|
||
let filter = Filter::from_str("NOT PrIcE >= 1000").unwrap().unwrap();
|
||
let bitmap = filter.evaluate(&rtxn, &index).unwrap();
|
||
assert!(bitmap.is_empty());
|
||
}
|
||
|
||
#[test]
|
||
fn from_array() {
|
||
// Simple array with Left
|
||
let condition = Filter::from_array(vec![Either::Left(["channel = mv"])]).unwrap().unwrap();
|
||
let expected = Filter::from_str("channel = mv").unwrap().unwrap();
|
||
assert_eq!(condition, expected);
|
||
|
||
// Simple array with Right
|
||
let condition = Filter::from_array::<_, Option<&str>>(vec![Either::Right("channel = mv")])
|
||
.unwrap()
|
||
.unwrap();
|
||
let expected = Filter::from_str("channel = mv").unwrap().unwrap();
|
||
assert_eq!(condition, expected);
|
||
|
||
// Array with Left and escaped quote
|
||
let condition =
|
||
Filter::from_array(vec![Either::Left(["channel = \"Mister Mv\""])]).unwrap().unwrap();
|
||
let expected = Filter::from_str("channel = \"Mister Mv\"").unwrap().unwrap();
|
||
assert_eq!(condition, expected);
|
||
|
||
// Array with Right and escaped quote
|
||
let condition =
|
||
Filter::from_array::<_, Option<&str>>(vec![Either::Right("channel = \"Mister Mv\"")])
|
||
.unwrap()
|
||
.unwrap();
|
||
let expected = Filter::from_str("channel = \"Mister Mv\"").unwrap().unwrap();
|
||
assert_eq!(condition, expected);
|
||
|
||
// Array with Left and escaped simple quote
|
||
let condition =
|
||
Filter::from_array(vec![Either::Left(["channel = 'Mister Mv'"])]).unwrap().unwrap();
|
||
let expected = Filter::from_str("channel = 'Mister Mv'").unwrap().unwrap();
|
||
assert_eq!(condition, expected);
|
||
|
||
// Array with Right and escaped simple quote
|
||
let condition =
|
||
Filter::from_array::<_, Option<&str>>(vec![Either::Right("channel = 'Mister Mv'")])
|
||
.unwrap()
|
||
.unwrap();
|
||
let expected = Filter::from_str("channel = 'Mister Mv'").unwrap().unwrap();
|
||
assert_eq!(condition, expected);
|
||
|
||
// Simple with parenthesis
|
||
let condition =
|
||
Filter::from_array(vec![Either::Left(["(channel = mv)"])]).unwrap().unwrap();
|
||
let expected = Filter::from_str("(channel = mv)").unwrap().unwrap();
|
||
assert_eq!(condition, expected);
|
||
|
||
// Test that the facet condition is correctly generated.
|
||
let condition = Filter::from_array(vec![
|
||
Either::Right("channel = gotaga"),
|
||
Either::Left(vec!["timestamp = 44", "channel != ponce"]),
|
||
])
|
||
.unwrap()
|
||
.unwrap();
|
||
let expected =
|
||
Filter::from_str("channel = gotaga AND (timestamp = 44 OR channel != ponce)")
|
||
.unwrap()
|
||
.unwrap();
|
||
assert_eq!(condition, expected);
|
||
}
|
||
|
||
#[test]
|
||
fn not_filterable() {
|
||
let index = TempIndex::new();
|
||
|
||
let rtxn = index.read_txn().unwrap();
|
||
let filter = Filter::from_str("_geoRadius(42, 150, 10)").unwrap().unwrap();
|
||
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
||
assert!(error.to_string().starts_with(
|
||
"Attribute `_geo` is not filterable. This index does not have configured filterable attributes."
|
||
));
|
||
|
||
let filter = Filter::from_str("_geoBoundingBox([42, 150], [30, 10])").unwrap().unwrap();
|
||
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
||
assert!(error.to_string().starts_with(
|
||
"Attribute `_geo` is not filterable. This index does not have configured filterable attributes."
|
||
));
|
||
|
||
let filter = Filter::from_str("dog = \"bernese mountain\"").unwrap().unwrap();
|
||
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
||
assert!(error.to_string().starts_with(
|
||
"Attribute `dog` is not filterable. This index does not have configured filterable attributes."
|
||
));
|
||
drop(rtxn);
|
||
|
||
index
|
||
.update_settings(|settings| {
|
||
settings.set_searchable_fields(vec![S("title")]);
|
||
settings.set_filterable_fields(hashset! { S("title") });
|
||
})
|
||
.unwrap();
|
||
|
||
let rtxn = index.read_txn().unwrap();
|
||
|
||
let filter = Filter::from_str("_geoRadius(-100, 150, 10)").unwrap().unwrap();
|
||
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
||
assert!(error.to_string().starts_with(
|
||
"Attribute `_geo` is not filterable. Available filterable attributes are: `title`."
|
||
));
|
||
|
||
let filter = Filter::from_str("_geoBoundingBox([42, 150], [30, 10])").unwrap().unwrap();
|
||
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
||
assert!(error.to_string().starts_with(
|
||
"Attribute `_geo` is not filterable. Available filterable attributes are: `title`."
|
||
));
|
||
|
||
let filter = Filter::from_str("name = 12").unwrap().unwrap();
|
||
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
||
assert!(error.to_string().starts_with(
|
||
"Attribute `name` is not filterable. Available filterable attributes are: `title`."
|
||
));
|
||
}
|
||
|
||
#[test]
|
||
fn escaped_quote_in_filter_value_2380() {
|
||
let index = TempIndex::new();
|
||
|
||
index
|
||
.add_documents(documents!([
|
||
{
|
||
"id": "test_1",
|
||
"monitor_diagonal": "27' to 30'"
|
||
},
|
||
{
|
||
"id": "test_2",
|
||
"monitor_diagonal": "27\" to 30\""
|
||
},
|
||
{
|
||
"id": "test_3",
|
||
"monitor_diagonal": "27\" to 30'"
|
||
},
|
||
]))
|
||
.unwrap();
|
||
|
||
index
|
||
.update_settings(|settings| {
|
||
settings.set_filterable_fields(hashset!(S("monitor_diagonal")));
|
||
})
|
||
.unwrap();
|
||
|
||
let rtxn = index.read_txn().unwrap();
|
||
|
||
let mut search = crate::Search::new(&rtxn, &index);
|
||
// this filter is copy pasted from #2380 with the exact same espace sequence
|
||
search.filter(Filter::from_str("monitor_diagonal = '27\" to 30\\''").unwrap().unwrap());
|
||
let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
|
||
assert_eq!(documents_ids, vec![2]);
|
||
|
||
search.filter(Filter::from_str(r#"monitor_diagonal = "27' to 30'" "#).unwrap().unwrap());
|
||
let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
|
||
assert_eq!(documents_ids, vec![0]);
|
||
|
||
search.filter(Filter::from_str(r#"monitor_diagonal = "27\" to 30\"" "#).unwrap().unwrap());
|
||
let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
|
||
assert_eq!(documents_ids, vec![1]);
|
||
|
||
search.filter(Filter::from_str(r#"monitor_diagonal = "27\" to 30'" "#).unwrap().unwrap());
|
||
let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
|
||
assert_eq!(documents_ids, vec![2]);
|
||
}
|
||
|
||
#[test]
|
||
fn zero_radius() {
|
||
let index = TempIndex::new();
|
||
|
||
index
|
||
.update_settings(|settings| {
|
||
settings.set_filterable_fields(hashset! { S("_geo") });
|
||
})
|
||
.unwrap();
|
||
|
||
index
|
||
.add_documents(documents!([
|
||
{
|
||
"id": 1,
|
||
"name": "Nàpiz' Milano",
|
||
"address": "Viale Vittorio Veneto, 30, 20124, Milan, Italy",
|
||
"type": "pizza",
|
||
"rating": 9,
|
||
"_geo": {
|
||
"lat": 45.4777599,
|
||
"lng": 9.1967508
|
||
}
|
||
},
|
||
{
|
||
"id": 2,
|
||
"name": "Artico Gelateria Tradizionale",
|
||
"address": "Via Dogana, 1, 20123 Milan, Italy",
|
||
"type": "ice cream",
|
||
"rating": 10,
|
||
"_geo": {
|
||
"lat": 45.4632046,
|
||
"lng": 9.1719421
|
||
}
|
||
},
|
||
]))
|
||
.unwrap();
|
||
|
||
let rtxn = index.read_txn().unwrap();
|
||
|
||
let mut search = crate::Search::new(&rtxn, &index);
|
||
|
||
search.filter(Filter::from_str("_geoRadius(45.4777599, 9.1967508, 0)").unwrap().unwrap());
|
||
let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
|
||
assert_eq!(documents_ids, vec![0]);
|
||
}
|
||
|
||
#[test]
|
||
fn geo_radius_error() {
|
||
let index = TempIndex::new();
|
||
|
||
index
|
||
.update_settings(|settings| {
|
||
settings.set_searchable_fields(vec![S("_geo"), S("price")]); // to keep the fields order
|
||
settings.set_filterable_fields(hashset! { S("_geo"), S("price") });
|
||
})
|
||
.unwrap();
|
||
|
||
let rtxn = index.read_txn().unwrap();
|
||
|
||
// georadius have a bad latitude
|
||
let filter = Filter::from_str("_geoRadius(-100, 150, 10)").unwrap().unwrap();
|
||
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
||
assert!(
|
||
error.to_string().starts_with(
|
||
"Bad latitude `-100`. Latitude must be contained between -90 and 90 degrees."
|
||
),
|
||
"{}",
|
||
error.to_string()
|
||
);
|
||
|
||
// georadius have a bad latitude
|
||
let filter = Filter::from_str("_geoRadius(-90.0000001, 150, 10)").unwrap().unwrap();
|
||
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
||
assert!(error.to_string().contains(
|
||
"Bad latitude `-90.0000001`. Latitude must be contained between -90 and 90 degrees."
|
||
));
|
||
|
||
// georadius have a bad longitude
|
||
let filter = Filter::from_str("_geoRadius(-10, 250, 10)").unwrap().unwrap();
|
||
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
||
assert!(
|
||
error.to_string().contains(
|
||
"Bad longitude `250`. Longitude must be contained between -180 and 180 degrees."
|
||
),
|
||
"{}",
|
||
error.to_string(),
|
||
);
|
||
|
||
// georadius have a bad longitude
|
||
let filter = Filter::from_str("_geoRadius(-10, 180.000001, 10)").unwrap().unwrap();
|
||
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
||
assert!(error.to_string().contains(
|
||
"Bad longitude `180.000001`. Longitude must be contained between -180 and 180 degrees."
|
||
));
|
||
}
|
||
|
||
#[test]
|
||
fn geo_bounding_box_error() {
|
||
let index = TempIndex::new();
|
||
|
||
index
|
||
.update_settings(|settings| {
|
||
settings.set_searchable_fields(vec![S("_geo"), S("price")]); // to keep the fields order
|
||
settings.set_filterable_fields(hashset! { S("_geo"), S("price") });
|
||
})
|
||
.unwrap();
|
||
|
||
let rtxn = index.read_txn().unwrap();
|
||
|
||
// geoboundingbox top left coord have a bad latitude
|
||
let filter =
|
||
Filter::from_str("_geoBoundingBox([-90.0000001, 150], [30, 10])").unwrap().unwrap();
|
||
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
||
assert!(
|
||
error.to_string().starts_with(
|
||
"Bad latitude `-90.0000001`. Latitude must be contained between -90 and 90 degrees."
|
||
),
|
||
"{}",
|
||
error.to_string()
|
||
);
|
||
|
||
// geoboundingbox top left coord have a bad latitude
|
||
let filter =
|
||
Filter::from_str("_geoBoundingBox([90.0000001, 150], [30, 10])").unwrap().unwrap();
|
||
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
||
assert!(
|
||
error.to_string().starts_with(
|
||
"Bad latitude `90.0000001`. Latitude must be contained between -90 and 90 degrees."
|
||
),
|
||
"{}",
|
||
error.to_string()
|
||
);
|
||
|
||
// geoboundingbox bottom right coord have a bad latitude
|
||
let filter =
|
||
Filter::from_str("_geoBoundingBox([30, 10], [-90.0000001, 150])").unwrap().unwrap();
|
||
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
||
assert!(error.to_string().contains(
|
||
"Bad latitude `-90.0000001`. Latitude must be contained between -90 and 90 degrees."
|
||
));
|
||
|
||
// geoboundingbox bottom right coord have a bad latitude
|
||
let filter =
|
||
Filter::from_str("_geoBoundingBox([30, 10], [90.0000001, 150])").unwrap().unwrap();
|
||
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
||
assert!(error.to_string().contains(
|
||
"Bad latitude `90.0000001`. Latitude must be contained between -90 and 90 degrees."
|
||
));
|
||
|
||
// geoboundingbox top left coord have a bad longitude
|
||
let filter =
|
||
Filter::from_str("_geoBoundingBox([-10, 180.000001], [30, 10])").unwrap().unwrap();
|
||
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
||
assert!(error.to_string().contains(
|
||
"Bad longitude `180.000001`. Longitude must be contained between -180 and 180 degrees."
|
||
));
|
||
|
||
// geoboundingbox top left coord have a bad longitude
|
||
let filter =
|
||
Filter::from_str("_geoBoundingBox([-10, -180.000001], [30, 10])").unwrap().unwrap();
|
||
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
||
assert!(error.to_string().contains(
|
||
"Bad longitude `-180.000001`. Longitude must be contained between -180 and 180 degrees."
|
||
));
|
||
|
||
// geoboundingbox bottom right coord have a bad longitude
|
||
let filter =
|
||
Filter::from_str("_geoBoundingBox([30, 10], [-10, -180.000001])").unwrap().unwrap();
|
||
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
||
assert!(error.to_string().contains(
|
||
"Bad longitude `-180.000001`. Longitude must be contained between -180 and 180 degrees."
|
||
));
|
||
|
||
// geoboundingbox bottom right coord have a bad longitude
|
||
let filter =
|
||
Filter::from_str("_geoBoundingBox([30, 10], [-10, 180.000001])").unwrap().unwrap();
|
||
let error = filter.evaluate(&rtxn, &index).unwrap_err();
|
||
assert!(error.to_string().contains(
|
||
"Bad longitude `180.000001`. Longitude must be contained between -180 and 180 degrees."
|
||
));
|
||
}
|
||
|
||
#[test]
|
||
fn filter_depth() {
|
||
// generates a big (2 MiB) filter with too much of ORs.
|
||
let tipic_filter = "account_ids=14361 OR ";
|
||
let mut filter_string = String::with_capacity(tipic_filter.len() * 14360);
|
||
for i in 1..=14361 {
|
||
let _ = write!(&mut filter_string, "account_ids={}", i);
|
||
if i != 14361 {
|
||
let _ = write!(&mut filter_string, " OR ");
|
||
}
|
||
}
|
||
|
||
// Note: the filter used to be rejected for being too deep, but that is
|
||
// no longer the case
|
||
let filter = Filter::from_str(&filter_string).unwrap();
|
||
assert!(filter.is_some());
|
||
}
|
||
|
||
#[test]
|
||
fn empty_filter() {
|
||
let option = Filter::from_str(" ").unwrap();
|
||
assert_eq!(option, None);
|
||
}
|
||
|
||
#[test]
|
||
fn non_finite_float() {
|
||
let index = TempIndex::new();
|
||
|
||
index
|
||
.update_settings(|settings| {
|
||
settings.set_searchable_fields(vec![S("price")]); // to keep the fields order
|
||
settings.set_filterable_fields(hashset! { S("price") });
|
||
})
|
||
.unwrap();
|
||
index
|
||
.add_documents(documents!([
|
||
{
|
||
"id": "test_1",
|
||
"price": "inf"
|
||
},
|
||
{
|
||
"id": "test_2",
|
||
"price": "2000"
|
||
},
|
||
{
|
||
"id": "test_3",
|
||
"price": "infinity"
|
||
},
|
||
]))
|
||
.unwrap();
|
||
|
||
let rtxn = index.read_txn().unwrap();
|
||
let filter = Filter::from_str("price = inf").unwrap().unwrap();
|
||
let result = filter.evaluate(&rtxn, &index).unwrap();
|
||
assert!(result.contains(0));
|
||
let filter = Filter::from_str("price < inf").unwrap().unwrap();
|
||
assert!(matches!(
|
||
filter.evaluate(&rtxn, &index),
|
||
Err(crate::Error::UserError(crate::error::UserError::InvalidFilter(_)))
|
||
));
|
||
|
||
let filter = Filter::from_str("price = NaN").unwrap().unwrap();
|
||
let result = filter.evaluate(&rtxn, &index).unwrap();
|
||
assert!(result.is_empty());
|
||
let filter = Filter::from_str("price < NaN").unwrap().unwrap();
|
||
assert!(matches!(
|
||
filter.evaluate(&rtxn, &index),
|
||
Err(crate::Error::UserError(crate::error::UserError::InvalidFilter(_)))
|
||
));
|
||
|
||
let filter = Filter::from_str("price = infinity").unwrap().unwrap();
|
||
let result = filter.evaluate(&rtxn, &index).unwrap();
|
||
assert!(result.contains(2));
|
||
let filter = Filter::from_str("price < infinity").unwrap().unwrap();
|
||
assert!(matches!(
|
||
filter.evaluate(&rtxn, &index),
|
||
Err(crate::Error::UserError(crate::error::UserError::InvalidFilter(_)))
|
||
));
|
||
}
|
||
|
||
#[test]
|
||
fn filter_number() {
|
||
let index = TempIndex::new();
|
||
|
||
index
|
||
.update_settings(|settings| {
|
||
settings.set_primary_key("id".to_owned());
|
||
settings.set_filterable_fields(hashset! { S("id"), S("one"), S("two") });
|
||
})
|
||
.unwrap();
|
||
|
||
let mut docs = vec![];
|
||
for i in 0..100 {
|
||
docs.push(serde_json::json!({ "id": i, "two": i % 10 }));
|
||
}
|
||
|
||
index.add_documents(documents!(docs)).unwrap();
|
||
|
||
let rtxn = index.read_txn().unwrap();
|
||
for i in 0..100 {
|
||
let filter_str = format!("id = {i}");
|
||
let filter = Filter::from_str(&filter_str).unwrap().unwrap();
|
||
let result = filter.evaluate(&rtxn, &index).unwrap();
|
||
assert_eq!(result, RoaringBitmap::from_iter([i]));
|
||
}
|
||
for i in 0..100 {
|
||
let filter_str = format!("id > {i}");
|
||
let filter = Filter::from_str(&filter_str).unwrap().unwrap();
|
||
let result = filter.evaluate(&rtxn, &index).unwrap();
|
||
assert_eq!(result, RoaringBitmap::from_iter((i + 1)..100));
|
||
}
|
||
for i in 0..100 {
|
||
let filter_str = format!("id < {i}");
|
||
let filter = Filter::from_str(&filter_str).unwrap().unwrap();
|
||
let result = filter.evaluate(&rtxn, &index).unwrap();
|
||
assert_eq!(result, RoaringBitmap::from_iter(0..i));
|
||
}
|
||
for i in 0..100 {
|
||
let filter_str = format!("id <= {i}");
|
||
let filter = Filter::from_str(&filter_str).unwrap().unwrap();
|
||
let result = filter.evaluate(&rtxn, &index).unwrap();
|
||
assert_eq!(result, RoaringBitmap::from_iter(0..=i));
|
||
}
|
||
for i in 0..100 {
|
||
let filter_str = format!("id >= {i}");
|
||
let filter = Filter::from_str(&filter_str).unwrap().unwrap();
|
||
let result = filter.evaluate(&rtxn, &index).unwrap();
|
||
assert_eq!(result, RoaringBitmap::from_iter(i..100));
|
||
}
|
||
for i in 0..100 {
|
||
for j in i..100 {
|
||
let filter_str = format!("id {i} TO {j}");
|
||
let filter = Filter::from_str(&filter_str).unwrap().unwrap();
|
||
let result = filter.evaluate(&rtxn, &index).unwrap();
|
||
assert_eq!(result, RoaringBitmap::from_iter(i..=j));
|
||
}
|
||
}
|
||
let filter = Filter::from_str("one >= 0 OR one <= 0").unwrap().unwrap();
|
||
let result = filter.evaluate(&rtxn, &index).unwrap();
|
||
assert_eq!(result, RoaringBitmap::default());
|
||
|
||
let filter = Filter::from_str("one = 0").unwrap().unwrap();
|
||
let result = filter.evaluate(&rtxn, &index).unwrap();
|
||
assert_eq!(result, RoaringBitmap::default());
|
||
|
||
for i in 0..10 {
|
||
for j in i..10 {
|
||
let filter_str = format!("two {i} TO {j}");
|
||
let filter = Filter::from_str(&filter_str).unwrap().unwrap();
|
||
let result = filter.evaluate(&rtxn, &index).unwrap();
|
||
assert_eq!(
|
||
result,
|
||
RoaringBitmap::from_iter((0..100).filter(|x| (i..=j).contains(&(x % 10))))
|
||
);
|
||
}
|
||
}
|
||
let filter = Filter::from_str("two != 0").unwrap().unwrap();
|
||
let result = filter.evaluate(&rtxn, &index).unwrap();
|
||
assert_eq!(result, RoaringBitmap::from_iter((0..100).filter(|x| x % 10 != 0)));
|
||
}
|
||
}
|