2022-03-15 22:36:10 +01:00
use std ::collections ::HashSet ;
2021-11-04 17:24:55 +01:00
use std ::fmt ::{ Debug , Display } ;
2021-06-16 18:33:33 +02:00
use std ::ops ::Bound ::{ self , Excluded , Included } ;
2021-11-09 11:34:10 +01:00
use std ::ops ::Deref ;
2020-11-26 20:42:54 +01:00
2021-01-07 10:17:27 +01:00
use either ::Either ;
2021-11-04 15:02:36 +01:00
pub use filter_parser ::{ Condition , Error as FPError , FilterCondition , Span , Token } ;
2021-04-07 11:57:16 +02:00
use heed ::types ::DecodeIgnore ;
2020-11-26 20:42:54 +01:00
use log ::debug ;
use roaring ::RoaringBitmap ;
2021-06-23 10:29:00 +02:00
use super ::FacetNumberRange ;
2021-09-16 11:56:18 +02:00
use crate ::error ::{ Error , UserError } ;
2021-07-17 12:50:01 +02:00
use crate ::heed_codec ::facet ::{
FacetLevelValueF64Codec , FacetStringLevelZeroCodec , FacetStringLevelZeroValueCodec ,
} ;
2021-12-14 12:21:24 +01:00
use crate ::{
distance_between_two_points , lat_lng_to_xyz , CboRoaringBitmapCodec , FieldId , Index , Result ,
} ;
2020-11-26 20:42:54 +01:00
2021-12-07 16:32:48 +01:00
/// The maximum number of filters the filter AST can process.
2021-12-07 17:36:45 +01:00
const MAX_FILTER_DEPTH : usize = 2000 ;
2021-12-07 16:32:48 +01:00
2021-11-06 01:32:12 +01:00
#[ derive(Debug, Clone, PartialEq, Eq) ]
2021-10-22 14:33:18 +02:00
pub struct Filter < ' a > {
condition : FilterCondition < ' a > ,
2020-11-26 20:42:54 +01:00
}
2021-11-04 17:24:55 +01:00
#[ derive(Debug) ]
enum FilterError < ' a > {
2022-03-15 22:36:10 +01:00
AttributeNotFilterable { attribute : & ' a str , filterable_fields : HashSet < String > } ,
2021-11-04 17:24:55 +01:00
BadGeo ( & ' a str ) ,
2021-11-04 17:42:06 +01:00
BadGeoLat ( f64 ) ,
BadGeoLng ( f64 ) ,
2021-11-04 17:24:55 +01:00
Reserved ( & ' a str ) ,
2021-12-07 16:32:48 +01:00
TooDeep ,
2021-11-04 17:24:55 +01:00
}
impl < ' a > std ::error ::Error for FilterError < ' a > { }
impl < ' a > Display for FilterError < ' a > {
fn fmt ( & self , f : & mut std ::fmt ::Formatter < '_ > ) -> std ::fmt ::Result {
match self {
2022-03-15 22:36:10 +01:00
Self ::AttributeNotFilterable { attribute , filterable_fields } = > {
if filterable_fields . is_empty ( ) {
2022-03-15 22:12:51 +01:00
write! (
f ,
" Attribute `{}` is not filterable. This index does not have configured filterable attributes. " ,
attribute ,
)
} else {
2022-03-15 22:36:10 +01:00
let filterables_list = filterable_fields . iter ( ) . map ( AsRef ::as_ref ) . collect ::< Vec < _ > > ( ) . join ( " " ) ;
2022-03-15 22:12:51 +01:00
write! (
f ,
" Attribute `{}` is not filterable. Available filterable attributes are: `{}`. " ,
attribute ,
2022-03-15 22:36:10 +01:00
filterables_list ,
2022-03-15 22:12:51 +01:00
)
}
} ,
2021-12-07 16:32:48 +01:00
Self ::TooDeep = > write! ( f ,
" Too many filter conditions, can't process more than {} filters. " ,
MAX_FILTER_DEPTH
) ,
2021-11-04 17:24:55 +01:00
Self ::Reserved ( keyword ) = > write! (
f ,
" `{}` is a reserved keyword and thus can't be used as a filter expression. " ,
keyword
) ,
Self ::BadGeo ( keyword ) = > write! ( f , " `{}` is a reserved keyword and thus can't be used as a filter expression. Use the _geoRadius(latitude, longitude, distance) built-in rule to filter on _geo field coordinates. " , keyword ) ,
2021-11-04 17:42:06 +01:00
Self ::BadGeoLat ( lat ) = > write! ( f , " Bad latitude `{}`. Latitude must be contained between -90 and 90 degrees. " , lat ) ,
2021-11-06 01:32:12 +01:00
Self ::BadGeoLng ( lng ) = > write! ( f , " Bad longitude `{}`. Longitude must be contained between -180 and 180 degrees. " , lng ) ,
2021-11-04 17:24:55 +01:00
}
}
}
impl < ' a > From < FPError < ' a > > for Error {
fn from ( error : FPError < ' a > ) -> Self {
Self ::UserError ( UserError ::InvalidFilter ( error . to_string ( ) ) )
}
}
2021-11-04 15:52:22 +01:00
impl < ' a > From < Filter < ' a > > for FilterCondition < ' a > {
fn from ( f : Filter < ' a > ) -> Self {
f . condition
}
}
2021-10-22 14:33:18 +02:00
impl < ' a > Filter < ' a > {
2021-10-22 16:38:35 +02:00
pub fn from_array < I , J > ( array : I ) -> Result < Option < Self > >
2021-06-16 18:33:33 +02:00
where
2021-10-22 14:33:18 +02:00
I : IntoIterator < Item = Either < J , & ' a str > > ,
J : IntoIterator < Item = & ' a str > ,
2021-01-07 10:17:27 +01:00
{
2021-09-28 11:50:15 +02:00
let mut ands : Option < FilterCondition > = None ;
2021-01-07 10:17:27 +01:00
for either in array {
match either {
Either ::Left ( array ) = > {
let mut ors = None ;
for rule in array {
2021-12-09 11:13:12 +01:00
if let Some ( filter ) = Self ::from_str ( rule . as_ref ( ) ) ? {
let condition = filter . condition ;
ors = match ors . take ( ) {
Some ( ors ) = > {
Some ( FilterCondition ::Or ( Box ::new ( ors ) , Box ::new ( condition ) ) )
}
None = > Some ( condition ) ,
} ;
}
2021-01-07 10:17:27 +01:00
}
if let Some ( rule ) = ors {
ands = match ands . take ( ) {
2021-10-22 14:33:18 +02:00
Some ( ands ) = > {
Some ( FilterCondition ::And ( Box ::new ( ands ) , Box ::new ( rule ) ) )
}
2021-01-07 10:17:27 +01:00
None = > Some ( rule ) ,
} ;
}
2021-06-16 18:33:33 +02:00
}
2021-01-07 10:17:27 +01:00
Either ::Right ( rule ) = > {
2021-12-09 11:13:12 +01:00
if let Some ( filter ) = Self ::from_str ( rule . as_ref ( ) ) ? {
let condition = filter . condition ;
ands = match ands . take ( ) {
Some ( ands ) = > {
Some ( FilterCondition ::And ( Box ::new ( ands ) , Box ::new ( condition ) ) )
}
None = > Some ( condition ) ,
} ;
}
2021-01-07 10:17:27 +01:00
}
}
}
2021-12-07 16:32:48 +01:00
if let Some ( token ) = ands . as_ref ( ) . and_then ( | fc | fc . token_at_depth ( MAX_FILTER_DEPTH ) ) {
return Err ( token . as_external_error ( FilterError ::TooDeep ) . into ( ) ) ;
}
2021-10-22 16:38:35 +02:00
Ok ( ands . map ( | ands | Self { condition : ands } ) )
2021-01-07 10:17:27 +01:00
}
2021-10-14 15:37:59 +02:00
2021-12-09 11:13:12 +01:00
pub fn from_str ( expression : & ' a str ) -> Result < Option < Self > > {
2021-11-04 15:02:36 +01:00
let condition = match FilterCondition ::parse ( expression ) {
2021-12-09 11:13:12 +01:00
Ok ( Some ( fc ) ) = > Ok ( fc ) ,
Ok ( None ) = > return Ok ( None ) ,
2021-11-04 17:24:55 +01:00
Err ( e ) = > Err ( Error ::UserError ( UserError ::InvalidFilter ( e . to_string ( ) ) ) ) ,
2021-10-22 16:38:35 +02:00
} ? ;
2021-12-07 17:20:11 +01:00
if let Some ( token ) = condition . token_at_depth ( MAX_FILTER_DEPTH ) {
return Err ( token . as_external_error ( FilterError ::TooDeep ) . into ( ) ) ;
}
2021-12-09 11:13:12 +01:00
Ok ( Some ( Self { condition } ) )
2020-11-26 20:42:54 +01:00
}
}
2021-10-22 14:33:18 +02:00
impl < ' a > Filter < ' a > {
2020-11-26 20:42:54 +01:00
/// Aggregates the documents ids that are part of the specified range automatically
/// going deeper through the levels.
2021-04-07 11:57:16 +02:00
fn explore_facet_number_levels (
rtxn : & heed ::RoTxn ,
db : heed ::Database < FacetLevelValueF64Codec , CboRoaringBitmapCodec > ,
2020-11-26 20:42:54 +01:00
field_id : FieldId ,
level : u8 ,
2021-04-07 11:57:16 +02:00
left : Bound < f64 > ,
right : Bound < f64 > ,
2020-11-26 20:42:54 +01:00
output : & mut RoaringBitmap ,
2021-06-16 18:33:33 +02:00
) -> Result < ( ) > {
2020-11-26 20:42:54 +01:00
match ( left , right ) {
// If the request is an exact value we must go directly to the deepest level.
( Included ( l ) , Included ( r ) ) if l = = r & & level > 0 = > {
2021-06-16 18:33:33 +02:00
return Self ::explore_facet_number_levels (
rtxn , db , field_id , 0 , left , right , output ,
) ;
}
2020-11-26 20:42:54 +01:00
// lower TO upper when lower > upper must return no result
( Included ( l ) , Included ( r ) ) if l > r = > return Ok ( ( ) ) ,
( Included ( l ) , Excluded ( r ) ) if l > = r = > return Ok ( ( ) ) ,
( Excluded ( l ) , Excluded ( r ) ) if l > = r = > return Ok ( ( ) ) ,
( Excluded ( l ) , Included ( r ) ) if l > = r = > return Ok ( ( ) ) ,
( _ , _ ) = > ( ) ,
}
let mut left_found = None ;
let mut right_found = None ;
// We must create a custom iterator to be able to iterate over the
// requested range as the range iterator cannot express some conditions.
2021-06-23 10:29:00 +02:00
let iter = FacetNumberRange ::new ( rtxn , db , field_id , level , left , right ) ? ;
2020-11-26 20:42:54 +01:00
debug! ( " Iterating between {:?} and {:?} (level {}) " , left , right , level ) ;
for ( i , result ) in iter . enumerate ( ) {
let ( ( _fid , level , l , r ) , docids ) = result ? ;
debug! ( " {:?} to {:?} (level {}) found {} documents " , l , r , level , docids . len ( ) ) ;
2021-06-30 14:12:56 +02:00
* output | = docids ;
2020-11-26 20:42:54 +01:00
// We save the leftest and rightest bounds we actually found at this level.
2021-06-16 18:33:33 +02:00
if i = = 0 {
left_found = Some ( l ) ;
}
2020-11-26 20:42:54 +01:00
right_found = Some ( r ) ;
}
// Can we go deeper?
let deeper_level = match level . checked_sub ( 1 ) {
Some ( level ) = > level ,
None = > return Ok ( ( ) ) ,
} ;
// We must refine the left and right bounds of this range by retrieving the
// missing part in a deeper level.
match left_found . zip ( right_found ) {
Some ( ( left_found , right_found ) ) = > {
// If the bound is satisfied we avoid calling this function again.
if ! matches! ( left , Included ( l ) if l = = left_found ) {
let sub_right = Excluded ( left_found ) ;
2021-06-16 18:33:33 +02:00
debug! (
" calling left with {:?} to {:?} (level {}) " ,
left , sub_right , deeper_level
) ;
Self ::explore_facet_number_levels (
rtxn ,
db ,
field_id ,
deeper_level ,
left ,
sub_right ,
output ,
) ? ;
2020-11-26 20:42:54 +01:00
}
if ! matches! ( right , Included ( r ) if r = = right_found ) {
let sub_left = Excluded ( right_found ) ;
2021-06-16 18:33:33 +02:00
debug! (
" calling right with {:?} to {:?} (level {}) " ,
sub_left , right , deeper_level
) ;
Self ::explore_facet_number_levels (
rtxn ,
db ,
field_id ,
deeper_level ,
sub_left ,
right ,
output ,
) ? ;
2020-11-26 20:42:54 +01:00
}
2021-06-16 18:33:33 +02:00
}
2020-11-26 20:42:54 +01:00
None = > {
// If we found nothing at this level it means that we must find
// the same bounds but at a deeper, more precise level.
2021-06-16 18:33:33 +02:00
Self ::explore_facet_number_levels (
rtxn ,
db ,
field_id ,
deeper_level ,
left ,
right ,
output ,
) ? ;
}
2020-11-26 20:42:54 +01:00
}
Ok ( ( ) )
}
2021-05-03 11:45:45 +02:00
fn evaluate_operator (
2021-04-07 11:57:16 +02:00
rtxn : & heed ::RoTxn ,
2020-11-26 20:42:54 +01:00
index : & Index ,
2021-05-03 11:45:45 +02:00
numbers_db : heed ::Database < FacetLevelValueF64Codec , CboRoaringBitmapCodec > ,
2021-08-16 13:36:30 +02:00
strings_db : heed ::Database < FacetStringLevelZeroCodec , FacetStringLevelZeroValueCodec > ,
2020-11-26 20:42:54 +01:00
field_id : FieldId ,
2021-10-22 14:33:18 +02:00
operator : & Condition < ' a > ,
2021-06-16 18:33:33 +02:00
) -> Result < RoaringBitmap > {
2020-11-26 20:42:54 +01:00
// Make sure we always bound the ranges with the field id and the level,
// as the facets values are all in the same database and prefixed by the
// field id and the level.
2021-10-22 17:23:22 +02:00
2020-11-26 20:42:54 +01:00
let ( left , right ) = match operator {
2021-11-05 10:46:54 +01:00
Condition ::GreaterThan ( val ) = > ( Excluded ( val . parse ( ) ? ) , Included ( f64 ::MAX ) ) ,
Condition ::GreaterThanOrEqual ( val ) = > ( Included ( val . parse ( ) ? ) , Included ( f64 ::MAX ) ) ,
Condition ::LowerThan ( val ) = > ( Included ( f64 ::MIN ) , Excluded ( val . parse ( ) ? ) ) ,
Condition ::LowerThanOrEqual ( val ) = > ( Included ( f64 ::MIN ) , Included ( val . parse ( ) ? ) ) ,
Condition ::Between { from , to } = > ( Included ( from . parse ( ) ? ) , Included ( to . parse ( ) ? ) ) ,
2021-10-22 14:33:18 +02:00
Condition ::Equal ( val ) = > {
2021-11-09 11:34:10 +01:00
let ( _original_value , string_docids ) =
strings_db . get ( rtxn , & ( field_id , & val . to_lowercase ( ) ) ) ? . unwrap_or_default ( ) ;
let number = val . parse ::< f64 > ( ) . ok ( ) ;
2021-05-03 11:45:45 +02:00
let number_docids = match number {
Some ( n ) = > {
2021-10-22 14:33:18 +02:00
let n = Included ( n ) ;
2021-05-03 11:45:45 +02:00
let mut output = RoaringBitmap ::new ( ) ;
2021-06-16 18:33:33 +02:00
Self ::explore_facet_number_levels (
rtxn ,
numbers_db ,
field_id ,
0 ,
n ,
n ,
& mut output ,
) ? ;
2021-05-03 11:45:45 +02:00
output
2021-06-16 18:33:33 +02:00
}
2021-05-03 11:45:45 +02:00
None = > RoaringBitmap ::new ( ) ,
} ;
return Ok ( string_docids | number_docids ) ;
2021-06-16 18:33:33 +02:00
}
2021-10-22 14:33:18 +02:00
Condition ::NotEqual ( val ) = > {
2021-11-09 11:34:10 +01:00
let number = val . parse ::< f64 > ( ) . ok ( ) ;
2021-05-03 11:45:45 +02:00
let all_numbers_ids = if number . is_some ( ) {
index . number_faceted_documents_ids ( rtxn , field_id ) ?
} else {
RoaringBitmap ::new ( )
} ;
let all_strings_ids = index . string_faceted_documents_ids ( rtxn , field_id ) ? ;
2021-10-22 14:33:18 +02:00
let operator = Condition ::Equal ( val . clone ( ) ) ;
2021-06-16 18:33:33 +02:00
let docids = Self ::evaluate_operator (
rtxn , index , numbers_db , strings_db , field_id , & operator ,
) ? ;
2021-05-03 11:45:45 +02:00
return Ok ( ( all_numbers_ids | all_strings_ids ) - docids ) ;
2021-10-22 18:03:39 +02:00
}
2020-11-26 20:42:54 +01:00
} ;
// Ask for the biggest value that can exist for this specific field, if it exists
// that's fine if it don't, the value just before will be returned instead.
2021-05-03 11:45:45 +02:00
let biggest_level = numbers_db
2021-04-07 11:57:16 +02:00
. remap_data_type ::< DecodeIgnore > ( )
. get_lower_than_or_equal_to ( rtxn , & ( field_id , u8 ::MAX , f64 ::MAX , f64 ::MAX ) ) ?
2020-11-26 20:42:54 +01:00
. and_then ( | ( ( id , level , _ , _ ) , _ ) | if id = = field_id { Some ( level ) } else { None } ) ;
match biggest_level {
Some ( level ) = > {
let mut output = RoaringBitmap ::new ( ) ;
2021-06-16 18:33:33 +02:00
Self ::explore_facet_number_levels (
rtxn ,
numbers_db ,
field_id ,
level ,
left ,
right ,
& mut output ,
) ? ;
2020-11-26 20:42:54 +01:00
Ok ( output )
2021-06-16 18:33:33 +02:00
}
2020-11-26 20:42:54 +01:00
None = > Ok ( RoaringBitmap ::new ( ) ) ,
}
}
2021-06-16 18:33:33 +02:00
pub fn evaluate ( & self , rtxn : & heed ::RoTxn , index : & Index ) -> Result < RoaringBitmap > {
2021-05-03 11:45:45 +02:00
let numbers_db = index . facet_id_f64_docids ;
let strings_db = index . facet_id_string_docids ;
2021-10-22 14:33:18 +02:00
match & self . condition {
FilterCondition ::Condition { fid , op } = > {
2021-11-07 01:52:19 +01:00
let filterable_fields = index . filterable_fields ( rtxn ) ? ;
2022-01-17 13:51:46 +01:00
if filterable_fields . contains ( fid . value ( ) ) {
2021-11-07 01:52:19 +01:00
let field_ids_map = index . fields_ids_map ( rtxn ) ? ;
2022-01-17 13:51:46 +01:00
if let Some ( fid ) = field_ids_map . id ( fid . value ( ) ) {
2021-11-07 01:52:19 +01:00
Self ::evaluate_operator ( rtxn , index , numbers_db , strings_db , fid , & op )
} else {
2022-01-17 13:51:46 +01:00
return Ok ( RoaringBitmap ::new ( ) ) ;
2021-11-07 01:52:19 +01:00
}
2021-10-22 17:23:22 +02:00
} else {
2021-11-09 11:34:10 +01:00
match * fid . deref ( ) {
2021-11-04 17:24:55 +01:00
attribute @ " _geo " = > {
return Err ( fid . as_external_error ( FilterError ::BadGeo ( attribute ) ) ) ? ;
2021-10-22 19:00:33 +02:00
}
2021-11-04 17:24:55 +01:00
attribute if attribute . starts_with ( " _geoPoint( " ) = > {
return Err ( fid . as_external_error ( FilterError ::BadGeo ( " _geoPoint " ) ) ) ? ;
2021-10-22 19:00:33 +02:00
}
2021-11-04 17:24:55 +01:00
attribute @ " _geoDistance " = > {
return Err ( fid . as_external_error ( FilterError ::Reserved ( attribute ) ) ) ? ;
2021-10-22 19:00:33 +02:00
}
2021-11-04 17:24:55 +01:00
attribute = > {
return Err ( fid . as_external_error (
FilterError ::AttributeNotFilterable {
attribute ,
2022-03-15 22:36:10 +01:00
filterable_fields ,
2021-11-04 17:24:55 +01:00
} ,
) ) ? ;
2021-10-22 19:00:33 +02:00
}
}
2021-10-22 17:23:22 +02:00
}
2021-06-16 18:33:33 +02:00
}
2021-10-22 14:33:18 +02:00
FilterCondition ::Or ( lhs , rhs ) = > {
let lhs = Self ::evaluate ( & ( lhs . as_ref ( ) . clone ( ) ) . into ( ) , rtxn , index ) ? ;
let rhs = Self ::evaluate ( & ( rhs . as_ref ( ) . clone ( ) ) . into ( ) , rtxn , index ) ? ;
2020-11-26 20:42:54 +01:00
Ok ( lhs | rhs )
2021-06-16 18:33:33 +02:00
}
2021-10-22 14:33:18 +02:00
FilterCondition ::And ( lhs , rhs ) = > {
let lhs = Self ::evaluate ( & ( lhs . as_ref ( ) . clone ( ) ) . into ( ) , rtxn , index ) ? ;
let rhs = Self ::evaluate ( & ( rhs . as_ref ( ) . clone ( ) ) . into ( ) , rtxn , index ) ? ;
2020-11-26 20:42:54 +01:00
Ok ( lhs & rhs )
2021-06-16 18:33:33 +02:00
}
2021-10-22 18:03:39 +02:00
FilterCondition ::GeoLowerThan { point , radius } = > {
2021-11-07 01:52:19 +01:00
let filterable_fields = index . filterable_fields ( rtxn ) ? ;
if filterable_fields . contains ( " _geo " ) {
2021-11-05 10:46:54 +01:00
let base_point : [ f64 ; 2 ] = [ point [ 0 ] . parse ( ) ? , point [ 1 ] . parse ( ) ? ] ;
2021-11-04 17:42:06 +01:00
if ! ( - 90. 0 ..= 90.0 ) . contains ( & base_point [ 0 ] ) {
return Err (
point [ 0 ] . as_external_error ( FilterError ::BadGeoLat ( base_point [ 0 ] ) )
) ? ;
}
if ! ( - 180. 0 ..= 180.0 ) . contains ( & base_point [ 1 ] ) {
return Err (
point [ 1 ] . as_external_error ( FilterError ::BadGeoLng ( base_point [ 1 ] ) )
) ? ;
}
2021-11-05 10:46:54 +01:00
let radius = radius . parse ( ) ? ;
2021-10-22 18:08:18 +02:00
let rtree = match index . geo_rtree ( rtxn ) ? {
Some ( rtree ) = > rtree ,
None = > return Ok ( RoaringBitmap ::new ( ) ) ,
} ;
2021-10-22 18:03:39 +02:00
2021-12-14 12:21:24 +01:00
let xyz_base_point = lat_lng_to_xyz ( & base_point ) ;
2021-10-22 18:08:18 +02:00
let result = rtree
2021-12-14 12:21:24 +01:00
. nearest_neighbor_iter ( & xyz_base_point )
2021-10-22 18:08:18 +02:00
. take_while ( | point | {
2021-12-14 12:21:24 +01:00
distance_between_two_points ( & base_point , & point . data . 1 ) < radius
2021-10-22 18:08:18 +02:00
} )
2021-12-14 12:21:24 +01:00
. map ( | point | point . data . 0 )
2021-10-22 18:08:18 +02:00
. collect ( ) ;
2021-10-22 18:03:39 +02:00
2021-10-22 18:08:18 +02:00
Ok ( result )
} else {
2021-11-04 17:42:06 +01:00
return Err ( point [ 0 ] . as_external_error ( FilterError ::AttributeNotFilterable {
attribute : " _geo " ,
2022-03-15 22:36:10 +01:00
filterable_fields ,
2021-11-04 17:42:06 +01:00
} ) ) ? ;
2021-10-22 18:08:18 +02:00
}
2021-10-22 18:03:39 +02:00
}
FilterCondition ::GeoGreaterThan { point , radius } = > {
let result = Self ::evaluate (
& FilterCondition ::GeoLowerThan { point : point . clone ( ) , radius : radius . clone ( ) }
. into ( ) ,
rtxn ,
index ,
) ? ;
let geo_faceted_doc_ids = index . geo_faceted_documents_ids ( rtxn ) ? ;
2021-10-22 18:08:18 +02:00
Ok ( geo_faceted_doc_ids - result )
2021-10-22 18:03:39 +02:00
}
2020-11-26 20:42:54 +01:00
}
}
}
2021-10-22 14:33:18 +02:00
impl < ' a > From < FilterCondition < ' a > > for Filter < ' a > {
fn from ( fc : FilterCondition < ' a > ) -> Self {
Self { condition : fc }
}
}
#[ cfg(test) ]
mod tests {
2021-12-07 17:20:11 +01:00
use std ::fmt ::Write ;
2021-10-22 14:33:18 +02:00
use big_s ::S ;
use either ::Either ;
use heed ::EnvOpenOptions ;
use maplit ::hashset ;
use super ::* ;
2021-12-08 14:12:07 +01:00
use crate ::update ::{ IndexerConfig , Settings } ;
2021-10-22 14:33:18 +02:00
use crate ::Index ;
2022-01-18 11:40:30 +01:00
#[ test ]
fn empty_db ( ) {
let path = tempfile ::tempdir ( ) . unwrap ( ) ;
let mut options = EnvOpenOptions ::new ( ) ;
options . map_size ( 10 * 1024 * 1024 ) ; // 10 MB
let index = Index ::new ( options , & path ) . unwrap ( ) ;
// Set the filterable fields to be the channel.
2021-12-08 14:12:07 +01:00
let config = IndexerConfig ::default ( ) ;
2022-01-18 11:40:30 +01:00
let mut wtxn = index . write_txn ( ) . unwrap ( ) ;
2021-12-08 14:12:07 +01:00
let mut builder = Settings ::new ( & mut wtxn , & index , & config ) ;
2022-01-18 11:40:30 +01:00
builder . set_searchable_fields ( vec! [ S ( " PrIcE " ) ] ) ; // to keep the fields order
builder . set_filterable_fields ( hashset! { S ( " PrIcE " ) } ) ;
builder . execute ( | _ | ( ) ) . unwrap ( ) ;
wtxn . commit ( ) . unwrap ( ) ;
let rtxn = index . read_txn ( ) . unwrap ( ) ;
let filter = Filter ::from_str ( " PrIcE < 1000 " ) . unwrap ( ) . unwrap ( ) ;
let bitmap = filter . evaluate ( & rtxn , & index ) . unwrap ( ) ;
assert! ( bitmap . is_empty ( ) ) ;
let filter = Filter ::from_str ( " NOT PrIcE >= 1000 " ) . unwrap ( ) . unwrap ( ) ;
let bitmap = filter . evaluate ( & rtxn , & index ) . unwrap ( ) ;
assert! ( bitmap . is_empty ( ) ) ;
}
2021-10-22 14:33:18 +02:00
#[ test ]
fn from_array ( ) {
// Simple array with Left
2021-11-06 01:32:12 +01:00
let condition = Filter ::from_array ( vec! [ Either ::Left ( [ " channel = mv " ] ) ] ) . unwrap ( ) . unwrap ( ) ;
2021-12-09 11:13:12 +01:00
let expected = Filter ::from_str ( " channel = mv " ) . unwrap ( ) . unwrap ( ) ;
2021-10-22 14:33:18 +02:00
assert_eq! ( condition , expected ) ;
// Simple array with Right
2021-11-06 01:32:12 +01:00
let condition = Filter ::from_array ::< _ , Option < & str > > ( vec! [ Either ::Right ( " channel = mv " ) ] )
. unwrap ( )
. unwrap ( ) ;
2021-12-09 11:13:12 +01:00
let expected = Filter ::from_str ( " channel = mv " ) . unwrap ( ) . unwrap ( ) ;
2021-10-22 14:33:18 +02:00
assert_eq! ( condition , expected ) ;
// Array with Left and escaped quote
2021-11-06 01:32:12 +01:00
let condition =
Filter ::from_array ( vec! [ Either ::Left ( [ " channel = \" Mister Mv \" " ] ) ] ) . unwrap ( ) . unwrap ( ) ;
2021-12-09 11:13:12 +01:00
let expected = Filter ::from_str ( " channel = \" Mister Mv \" " ) . unwrap ( ) . unwrap ( ) ;
2021-10-22 14:33:18 +02:00
assert_eq! ( condition , expected ) ;
// Array with Right and escaped quote
2021-11-06 01:32:12 +01:00
let condition =
Filter ::from_array ::< _ , Option < & str > > ( vec! [ Either ::Right ( " channel = \" Mister Mv \" " ) ] )
. unwrap ( )
. unwrap ( ) ;
2021-12-09 11:13:12 +01:00
let expected = Filter ::from_str ( " channel = \" Mister Mv \" " ) . unwrap ( ) . unwrap ( ) ;
2021-10-22 14:33:18 +02:00
assert_eq! ( condition , expected ) ;
// Array with Left and escaped simple quote
2021-11-06 01:32:12 +01:00
let condition =
Filter ::from_array ( vec! [ Either ::Left ( [ " channel = 'Mister Mv' " ] ) ] ) . unwrap ( ) . unwrap ( ) ;
2021-12-09 11:13:12 +01:00
let expected = Filter ::from_str ( " channel = 'Mister Mv' " ) . unwrap ( ) . unwrap ( ) ;
2021-10-22 14:33:18 +02:00
assert_eq! ( condition , expected ) ;
// Array with Right and escaped simple quote
2021-11-06 01:32:12 +01:00
let condition =
Filter ::from_array ::< _ , Option < & str > > ( vec! [ Either ::Right ( " channel = 'Mister Mv' " ) ] )
. unwrap ( )
. unwrap ( ) ;
2021-12-09 11:13:12 +01:00
let expected = Filter ::from_str ( " channel = 'Mister Mv' " ) . unwrap ( ) . unwrap ( ) ;
2021-10-22 14:33:18 +02:00
assert_eq! ( condition , expected ) ;
// Simple with parenthesis
2021-11-06 01:32:12 +01:00
let condition =
Filter ::from_array ( vec! [ Either ::Left ( [ " (channel = mv) " ] ) ] ) . unwrap ( ) . unwrap ( ) ;
2021-12-09 11:13:12 +01:00
let expected = Filter ::from_str ( " (channel = mv) " ) . unwrap ( ) . unwrap ( ) ;
2021-10-22 14:33:18 +02:00
assert_eq! ( condition , expected ) ;
// Test that the facet condition is correctly generated.
2021-11-06 01:32:12 +01:00
let condition = Filter ::from_array ( vec! [
Either ::Right ( " channel = gotaga " ) ,
Either ::Left ( vec! [ " timestamp = 44 " , " channel != ponce " ] ) ,
] )
2021-10-22 14:33:18 +02:00
. unwrap ( )
. unwrap ( ) ;
2021-11-06 01:32:12 +01:00
let expected =
2021-12-09 11:13:12 +01:00
Filter ::from_str ( " channel = gotaga AND (timestamp = 44 OR channel != ponce) " )
. unwrap ( )
. unwrap ( ) ;
2021-11-06 01:32:12 +01:00
println! ( " \n Expecting: {:#?} \n Got: {:#?} \n " , expected , condition ) ;
2021-10-22 14:33:18 +02:00
assert_eq! ( condition , expected ) ;
}
2021-11-07 01:52:19 +01:00
#[ test ]
fn not_filterable ( ) {
let path = tempfile ::tempdir ( ) . unwrap ( ) ;
let mut options = EnvOpenOptions ::new ( ) ;
options . map_size ( 10 * 1024 * 1024 ) ; // 10 MB
let index = Index ::new ( options , & path ) . unwrap ( ) ;
let rtxn = index . read_txn ( ) . unwrap ( ) ;
2021-12-09 11:13:12 +01:00
let filter = Filter ::from_str ( " _geoRadius(42, 150, 10) " ) . unwrap ( ) . unwrap ( ) ;
2021-11-07 01:52:19 +01:00
let error = filter . evaluate ( & rtxn , & index ) . unwrap_err ( ) ;
assert! ( error . to_string ( ) . starts_with (
2022-03-15 22:12:51 +01:00
" Attribute `_geo` is not filterable. This index does not have configured filterable attributes. "
2021-11-07 01:52:19 +01:00
) ) ;
2021-12-09 11:13:12 +01:00
let filter = Filter ::from_str ( " dog = \" bernese mountain \" " ) . unwrap ( ) . unwrap ( ) ;
2021-11-07 01:52:19 +01:00
let error = filter . evaluate ( & rtxn , & index ) . unwrap_err ( ) ;
assert! ( error . to_string ( ) . starts_with (
2022-03-15 22:12:51 +01:00
" Attribute `dog` is not filterable. This index does not have configured filterable attributes. "
2021-11-07 01:52:19 +01:00
) ) ;
drop ( rtxn ) ;
2021-12-08 14:12:07 +01:00
let config = IndexerConfig ::default ( ) ;
2021-11-07 01:52:19 +01:00
// Set the filterable fields to be the channel.
let mut wtxn = index . write_txn ( ) . unwrap ( ) ;
2021-12-08 14:12:07 +01:00
let mut builder = Settings ::new ( & mut wtxn , & index , & config ) ;
2021-11-07 01:52:19 +01:00
builder . set_searchable_fields ( vec! [ S ( " title " ) ] ) ;
builder . set_filterable_fields ( hashset! { S ( " title " ) } ) ;
2021-11-03 13:12:01 +01:00
builder . execute ( | _ | ( ) ) . unwrap ( ) ;
2021-11-07 01:52:19 +01:00
wtxn . commit ( ) . unwrap ( ) ;
let rtxn = index . read_txn ( ) . unwrap ( ) ;
2021-12-09 11:13:12 +01:00
let filter = Filter ::from_str ( " _geoRadius(-100, 150, 10) " ) . unwrap ( ) . unwrap ( ) ;
2021-11-07 01:52:19 +01:00
let error = filter . evaluate ( & rtxn , & index ) . unwrap_err ( ) ;
assert! ( error . to_string ( ) . starts_with (
" Attribute `_geo` is not filterable. Available filterable attributes are: `title`. "
) ) ;
2021-12-09 11:13:12 +01:00
let filter = Filter ::from_str ( " name = 12 " ) . unwrap ( ) . unwrap ( ) ;
2021-11-07 01:52:19 +01:00
let error = filter . evaluate ( & rtxn , & index ) . unwrap_err ( ) ;
assert! ( error . to_string ( ) . starts_with (
" Attribute `name` is not filterable. Available filterable attributes are: `title`. "
) ) ;
}
2021-10-22 14:33:18 +02:00
#[ test ]
fn geo_radius_error ( ) {
let path = tempfile ::tempdir ( ) . unwrap ( ) ;
let mut options = EnvOpenOptions ::new ( ) ;
options . map_size ( 10 * 1024 * 1024 ) ; // 10 MB
let index = Index ::new ( options , & path ) . unwrap ( ) ;
2021-12-08 14:12:07 +01:00
let config = IndexerConfig ::default ( ) ;
2021-10-22 14:33:18 +02:00
// Set the filterable fields to be the channel.
let mut wtxn = index . write_txn ( ) . unwrap ( ) ;
2021-12-08 14:12:07 +01:00
let mut builder = Settings ::new ( & mut wtxn , & index , & config ) ;
2021-10-22 14:33:18 +02:00
builder . set_searchable_fields ( vec! [ S ( " _geo " ) , S ( " price " ) ] ) ; // to keep the fields order
builder . set_filterable_fields ( hashset! { S ( " _geo " ) , S ( " price " ) } ) ;
2021-11-03 13:12:01 +01:00
builder . execute ( | _ | ( ) ) . unwrap ( ) ;
2021-10-22 14:33:18 +02:00
wtxn . commit ( ) . unwrap ( ) ;
let rtxn = index . read_txn ( ) . unwrap ( ) ;
2021-11-06 01:32:12 +01:00
// georadius have a bad latitude
2021-12-09 11:13:12 +01:00
let filter = Filter ::from_str ( " _geoRadius(-100, 150, 10) " ) . unwrap ( ) . unwrap ( ) ;
2021-11-06 01:32:12 +01:00
let error = filter . evaluate ( & rtxn , & index ) . unwrap_err ( ) ;
2021-10-22 14:33:18 +02:00
assert! (
2021-11-06 01:32:12 +01:00
error . to_string ( ) . starts_with (
" Bad latitude `-100`. Latitude must be contained between -90 and 90 degrees. "
) ,
2021-10-22 14:33:18 +02:00
" {} " ,
error . to_string ( )
) ;
// georadius have a bad latitude
2021-12-09 11:13:12 +01:00
let filter = Filter ::from_str ( " _geoRadius(-90.0000001, 150, 10) " ) . unwrap ( ) . unwrap ( ) ;
2021-11-06 01:32:12 +01:00
let error = filter . evaluate ( & rtxn , & index ) . unwrap_err ( ) ;
assert! ( error . to_string ( ) . contains (
" Bad latitude `-90.0000001`. Latitude must be contained between -90 and 90 degrees. "
) ) ;
2021-10-22 14:33:18 +02:00
// georadius have a bad longitude
2021-12-09 11:13:12 +01:00
let filter = Filter ::from_str ( " _geoRadius(-10, 250, 10) " ) . unwrap ( ) . unwrap ( ) ;
2021-11-06 01:32:12 +01:00
let error = filter . evaluate ( & rtxn , & index ) . unwrap_err ( ) ;
assert! (
error . to_string ( ) . contains (
" Bad longitude `250`. Longitude must be contained between -180 and 180 degrees. "
) ,
" {} " ,
error . to_string ( ) ,
) ;
2021-10-22 14:33:18 +02:00
// georadius have a bad longitude
2021-12-09 11:13:12 +01:00
let filter = Filter ::from_str ( " _geoRadius(-10, 180.000001, 10) " ) . unwrap ( ) . unwrap ( ) ;
2021-11-06 01:32:12 +01:00
let error = filter . evaluate ( & rtxn , & index ) . unwrap_err ( ) ;
assert! ( error . to_string ( ) . contains (
" Bad longitude `180.000001`. Longitude must be contained between -180 and 180 degrees. "
) ) ;
2021-10-22 14:33:18 +02:00
}
2021-12-07 17:20:11 +01:00
#[ test ]
fn filter_depth ( ) {
// generates a big (2 MiB) filter with too much of ORs.
let tipic_filter = " account_ids=14361 OR " ;
let mut filter_string = String ::with_capacity ( tipic_filter . len ( ) * 14360 ) ;
for i in 1 ..= 14361 {
let _ = write! ( & mut filter_string , " account_ids={} " , i ) ;
if i ! = 14361 {
let _ = write! ( & mut filter_string , " OR " ) ;
}
}
let error = Filter ::from_str ( & filter_string ) . unwrap_err ( ) ;
assert! (
error . to_string ( ) . starts_with ( " Too many filter conditions " ) ,
" {} " ,
error . to_string ( )
) ;
}
2021-12-09 11:14:51 +01:00
#[ test ]
fn empty_filter ( ) {
let option = Filter ::from_str ( " " ) . unwrap ( ) ;
assert_eq! ( option , None ) ;
}
2021-10-22 14:33:18 +02:00
}