From 258c3dd5637249d31568b83e6f8b490a41c56903 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Mon, 20 Jun 2022 18:46:57 +0200 Subject: [PATCH] Make AND+OR filters n-ary (store a vector of subfilters instead of 2) NOTE: The token_at_depth is method is a bit useless now, as the only cases where there would be a toke at depth 1000 are the cases where the parser already stack-overflowed earlier. Example: (((((... (x=1) ...))))) --- filter-parser/src/lib.rs | 117 +++++++++++++++++++------------ http-ui/src/main.rs | 7 +- milli/src/search/facet/filter.rs | 104 +++++++++++---------------- 3 files changed, 118 insertions(+), 110 deletions(-) diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs index 01be432d7..8da3da35f 100644 --- a/filter-parser/src/lib.rs +++ b/filter-parser/src/lib.rs @@ -113,8 +113,8 @@ impl<'a> From> for Token<'a> { #[derive(Debug, Clone, PartialEq, Eq)] pub enum FilterCondition<'a> { Condition { fid: Token<'a>, op: Condition<'a> }, - Or(Box, Box), - And(Box, Box), + Or(Vec), + And(Vec), GeoLowerThan { point: [Token<'a>; 2], radius: Token<'a> }, GeoGreaterThan { point: [Token<'a>; 2], radius: Token<'a> }, } @@ -124,13 +124,23 @@ impl<'a> FilterCondition<'a> { pub fn token_at_depth(&self, depth: usize) -> Option<&Token> { match self { FilterCondition::Condition { fid, .. } if depth == 0 => Some(fid), - FilterCondition::Or(left, right) => { + FilterCondition::Or(subfilters) => { let depth = depth.saturating_sub(1); - right.token_at_depth(depth).or_else(|| left.token_at_depth(depth)) + for f in subfilters.iter() { + if let Some(t) = f.token_at_depth(depth) { + return Some(t); + } + } + None } - FilterCondition::And(left, right) => { + FilterCondition::And(subfilters) => { let depth = depth.saturating_sub(1); - right.token_at_depth(depth).or_else(|| left.token_at_depth(depth)) + for f in subfilters.iter() { + if let Some(t) = f.token_at_depth(depth) { + return Some(t); + } + } + None } FilterCondition::GeoLowerThan { point: [point, _], .. } if depth == 0 => Some(point), FilterCondition::GeoGreaterThan { point: [point, _], .. } if depth == 0 => Some(point), @@ -144,13 +154,13 @@ impl<'a> FilterCondition<'a> { match self { Condition { fid, op } => match op.negate() { (op, None) => Condition { fid, op }, - (a, Some(b)) => Or( + (a, Some(b)) => Or(vec![ Condition { fid: fid.clone(), op: a }.into(), Condition { fid, op: b }.into(), - ), + ]), }, - Or(a, b) => And(a.negate().into(), b.negate().into()), - And(a, b) => Or(a.negate().into(), b.negate().into()), + Or(subfilters) => And(subfilters.into_iter().map(|x| x.negate().into()).collect()), + And(subfilters) => Or(subfilters.into_iter().map(|x| x.negate().into()).collect()), GeoLowerThan { point, radius } => GeoGreaterThan { point, radius }, GeoGreaterThan { point, radius } => GeoLowerThan { point, radius }, } @@ -172,26 +182,36 @@ fn ws<'a, O>(inner: impl FnMut(Span<'a>) -> IResult) -> impl FnMut(Span<'a>) /// or = and ("OR" WS+ and)* fn parse_or(input: Span) -> IResult { - let (input, lhs) = parse_and(input)?; + let (input, first_filter) = parse_and(input)?; // if we found a `OR` then we MUST find something next - let (input, ors) = many0(preceded(ws(tuple((tag("OR"), multispace1))), cut(parse_and)))(input)?; + let (input, mut ors) = + many0(preceded(ws(tuple((tag("OR"), multispace1))), cut(parse_and)))(input)?; - let expr = ors - .into_iter() - .fold(lhs, |acc, branch| FilterCondition::Or(Box::new(acc), Box::new(branch))); - Ok((input, expr)) + let filter = if ors.is_empty() { + first_filter + } else { + ors.insert(0, first_filter); + FilterCondition::Or(ors) + }; + + Ok((input, filter)) } /// and = not ("AND" not)* fn parse_and(input: Span) -> IResult { - let (input, lhs) = parse_not(input)?; + let (input, first_filter) = parse_not(input)?; // if we found a `AND` then we MUST find something next - let (input, ors) = + let (input, mut ands) = many0(preceded(ws(tuple((tag("AND"), multispace1))), cut(parse_not)))(input)?; - let expr = ors - .into_iter() - .fold(lhs, |acc, branch| FilterCondition::And(Box::new(acc), Box::new(branch))); - Ok((input, expr)) + + let filter = if ands.is_empty() { + first_filter + } else { + ands.insert(0, first_filter); + FilterCondition::And(ands) + }; + + Ok((input, filter)) } /// not = ("NOT" WS+ not) | primary @@ -477,7 +497,7 @@ pub mod tests { ( "NOT subscribers 100 TO 1000", Fc::Or( - Fc::Condition { + vec![Fc::Condition { fid: rtok("NOT ", "subscribers"), op: Condition::LowerThan(rtok("NOT subscribers ", "100")), } @@ -486,7 +506,7 @@ pub mod tests { fid: rtok("NOT ", "subscribers"), op: Condition::GreaterThan(rtok("NOT subscribers 100 TO ", "1000")), } - .into(), + .into()], ), ), ( @@ -506,7 +526,7 @@ pub mod tests { // test simple `or` and `and` ( "channel = ponce AND 'dog race' != 'bernese mountain'", - Fc::And( + Fc::And(vec![ Fc::Condition { fid: rtok("", "channel"), op: Condition::Equal(rtok("channel = ", "ponce")), @@ -520,11 +540,11 @@ pub mod tests { )), } .into(), - ), + ]), ), ( "channel = ponce OR 'dog race' != 'bernese mountain'", - Fc::Or( + Fc::Or(vec![ Fc::Condition { fid: rtok("", "channel"), op: Condition::Equal(rtok("channel = ", "ponce")), @@ -538,12 +558,12 @@ pub mod tests { )), } .into(), - ), + ]), ), ( "channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000", - Fc::Or( - Fc::And( + Fc::Or(vec![ + Fc::And(vec![ Fc::Condition { fid: rtok("", "channel"), op: Condition::Equal(rtok("channel = ", "ponce")), @@ -557,7 +577,7 @@ pub mod tests { )), } .into(), - ) + ]) .into(), Fc::Condition { fid: rtok( @@ -570,30 +590,30 @@ pub mod tests { )), } .into(), - ), + ]), ), // test parenthesis ( "channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > 1000 )", - Fc::And( + Fc::And(vec![ Fc::Condition { fid: rtok("", "channel"), op: Condition::Equal(rtok("channel = ", "ponce")) }.into(), - Fc::Or( - Fc::Condition { fid: rtok("channel = ponce AND ( '", "dog race"), op: Condition::NotEqual(rtok("channel = ponce AND ( 'dog race' != '", "bernese mountain"))}.into(), - Fc::Condition { fid: rtok("channel = ponce AND ( 'dog race' != 'bernese mountain' OR ", "subscribers"), op: Condition::GreaterThan(rtok("channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > ", "1000")) }.into(), - ).into()), + Fc::Or(vec![ + Fc::Condition { fid: rtok("channel = ponce AND ( '", "dog race"), op: Condition::NotEqual(rtok("channel = ponce AND ( 'dog race' != '", "bernese mountain"))}.into(), + Fc::Condition { fid: rtok("channel = ponce AND ( 'dog race' != 'bernese mountain' OR ", "subscribers"), op: Condition::GreaterThan(rtok("channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > ", "1000")) }.into(),] + ).into()]), ), ( "(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, 14)", - Fc::And( - Fc::Or( - Fc::And( + Fc::And(vec![ + Fc::Or(vec![ + Fc::And(vec![ Fc::Condition { fid: rtok("(", "channel"), op: Condition::Equal(rtok("(channel = ", "ponce")) }.into(), Fc::Condition { fid: rtok("(channel = ponce AND '", "dog race"), op: Condition::NotEqual(rtok("(channel = ponce AND 'dog race' != '", "bernese mountain")) }.into(), - ).into(), + ]).into(), Fc::Condition { fid: rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR ", "subscribers"), op: Condition::GreaterThan(rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > ", "1000")) }.into(), - ).into(), + ]).into(), Fc::GeoLowerThan { point: [rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(", "12"), rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, ", "13")], radius: rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, ", "14") }.into() - ) + ]) ) ]; @@ -657,6 +677,15 @@ pub mod tests { #[test] fn depth() { let filter = FilterCondition::parse("account_ids=1 OR account_ids=2 OR account_ids=3 OR account_ids=4 OR account_ids=5 OR account_ids=6").unwrap().unwrap(); - assert!(filter.token_at_depth(5).is_some()); + assert!(filter.token_at_depth(1).is_some()); + assert!(filter.token_at_depth(2).is_none()); + + let filter = FilterCondition::parse("(account_ids=1 OR (account_ids=2 AND account_ids=3) OR (account_ids=4 AND account_ids=5) OR account_ids=6)").unwrap().unwrap(); + assert!(filter.token_at_depth(2).is_some()); + assert!(filter.token_at_depth(3).is_none()); + + let filter = FilterCondition::parse("account_ids=1 OR account_ids=2 AND account_ids=3 OR account_ids=4 AND account_ids=5 OR account_ids=6").unwrap().unwrap(); + assert!(filter.token_at_depth(2).is_some()); + assert!(filter.token_at_depth(3).is_none()); } } diff --git a/http-ui/src/main.rs b/http-ui/src/main.rs index 83fce9a9c..da5595cc0 100644 --- a/http-ui/src/main.rs +++ b/http-ui/src/main.rs @@ -745,10 +745,9 @@ async fn main() -> anyhow::Result<()> { }; let condition = match (filters, facet_filters) { - (Some(filters), Some(facet_filters)) => Some(FilterCondition::And( - Box::new(filters.into()), - Box::new(facet_filters.into()), - )), + (Some(filters), Some(facet_filters)) => { + Some(FilterCondition::And(vec![filters.into(), facet_filters.into()])) + } (Some(condition), None) | (None, Some(condition)) => Some(condition.into()), _otherwise => None, }; diff --git a/milli/src/search/facet/filter.rs b/milli/src/search/facet/filter.rs index 90aab826a..d14b33f80 100644 --- a/milli/src/search/facet/filter.rs +++ b/milli/src/search/facet/filter.rs @@ -89,52 +89,44 @@ impl<'a> Filter<'a> { I: IntoIterator>, J: IntoIterator, { - let mut ands: Option = None; + let mut ands = vec![]; for either in array { match either { Either::Left(array) => { - let mut ors = None; + let mut ors = vec![]; for rule in array { if let Some(filter) = Self::from_str(rule.as_ref())? { - let condition = filter.condition; - ors = match ors.take() { - Some(ors) => { - Some(FilterCondition::Or(Box::new(ors), Box::new(condition))) - } - None => Some(condition), - }; + ors.push(filter.condition); } } - if let Some(rule) = ors { - ands = match ands.take() { - Some(ands) => { - Some(FilterCondition::And(Box::new(ands), Box::new(rule))) - } - None => Some(rule), - }; + if ors.len() > 1 { + ands.push(FilterCondition::Or(ors)); + } else if ors.len() == 1 { + ands.push(ors[0].clone()); } } Either::Right(rule) => { if let Some(filter) = Self::from_str(rule.as_ref())? { - let condition = filter.condition; - ands = match ands.take() { - Some(ands) => { - Some(FilterCondition::And(Box::new(ands), Box::new(condition))) - } - None => Some(condition), - }; + ands.push(filter.condition); } } } } + let and = if ands.is_empty() { + return Ok(None); + } else if ands.len() == 1 { + ands[0].clone() + } else { + FilterCondition::And(ands) + }; - if let Some(token) = ands.as_ref().and_then(|fc| fc.token_at_depth(MAX_FILTER_DEPTH)) { + if let Some(token) = and.token_at_depth(MAX_FILTER_DEPTH) { return Err(token.as_external_error(FilterError::TooDeep).into()); } - Ok(ands.map(|ands| Self { condition: ands })) + Ok(Some(Self { condition: and })) } pub fn from_str(expression: &'a str) -> Result> { @@ -397,38 +389,28 @@ impl<'a> Filter<'a> { } } } - FilterCondition::Or(lhs, rhs) => { - let lhs = Self::inner_evaluate( - &(lhs.as_ref().clone()).into(), - rtxn, - index, - filterable_fields, - )?; - let rhs = Self::inner_evaluate( - &(rhs.as_ref().clone()).into(), - rtxn, - index, - filterable_fields, - )?; - Ok(lhs | rhs) - } - FilterCondition::And(lhs, rhs) => { - let lhs = Self::inner_evaluate( - &(lhs.as_ref().clone()).into(), - rtxn, - index, - filterable_fields, - )?; - if lhs.is_empty() { - return Ok(lhs); + FilterCondition::Or(subfilters) => { + let mut bitmap = RoaringBitmap::new(); + for f in subfilters { + bitmap |= Self::inner_evaluate(&(f.clone()).into(), rtxn, index, filterable_fields)?; + } + Ok(bitmap) + } + FilterCondition::And(subfilters) => { + let mut subfilters_iter = subfilters.iter(); + if let Some(first_subfilter) = subfilters_iter.next() { + let mut bitmap = + Self::inner_evaluate(&(first_subfilter.clone()).into(), rtxn, index, filterable_fields)?; + for f in subfilters_iter { + if bitmap.is_empty() { + return Ok(bitmap); + } + bitmap &= Self::inner_evaluate(&(f.clone()).into(), rtxn, index, filterable_fields)?; + } + Ok(bitmap) + } else { + Ok(RoaringBitmap::new()) } - let rhs = Self::inner_evaluate( - &(rhs.as_ref().clone()).into(), - rtxn, - index, - filterable_fields, - )?; - Ok(lhs & rhs) } FilterCondition::GeoLowerThan { point, radius } => { if filterable_fields.contains("_geo") { @@ -732,12 +714,10 @@ mod tests { } } - let error = Filter::from_str(&filter_string).unwrap_err(); - assert!( - error.to_string().starts_with("Too many filter conditions"), - "{}", - error.to_string() - ); + // Note: the filter used to be rejected for being too deep, but that is + // no longer the case + let filter = Filter::from_str(&filter_string).unwrap(); + assert!(filter.is_some()); } #[test]