Make AND+OR filters n-ary (store a vector of subfilters instead of 2)

NOTE: The token_at_depth is method is a bit useless now, as the only
cases where there would be a toke at depth 1000 are the cases where
the parser already stack-overflowed earlier.

Example: (((((... (x=1) ...)))))
This commit is contained in:
Loïc Lecrenier 2022-06-20 18:46:57 +02:00
parent f55034ed54
commit 258c3dd563
3 changed files with 118 additions and 110 deletions

View File

@ -113,8 +113,8 @@ impl<'a> From<Span<'a>> for Token<'a> {
#[derive(Debug, Clone, PartialEq, Eq)] #[derive(Debug, Clone, PartialEq, Eq)]
pub enum FilterCondition<'a> { pub enum FilterCondition<'a> {
Condition { fid: Token<'a>, op: Condition<'a> }, Condition { fid: Token<'a>, op: Condition<'a> },
Or(Box<Self>, Box<Self>), Or(Vec<Self>),
And(Box<Self>, Box<Self>), And(Vec<Self>),
GeoLowerThan { point: [Token<'a>; 2], radius: Token<'a> }, GeoLowerThan { point: [Token<'a>; 2], radius: Token<'a> },
GeoGreaterThan { point: [Token<'a>; 2], radius: Token<'a> }, GeoGreaterThan { point: [Token<'a>; 2], radius: Token<'a> },
} }
@ -124,13 +124,23 @@ impl<'a> FilterCondition<'a> {
pub fn token_at_depth(&self, depth: usize) -> Option<&Token> { pub fn token_at_depth(&self, depth: usize) -> Option<&Token> {
match self { match self {
FilterCondition::Condition { fid, .. } if depth == 0 => Some(fid), FilterCondition::Condition { fid, .. } if depth == 0 => Some(fid),
FilterCondition::Or(left, right) => { FilterCondition::Or(subfilters) => {
let depth = depth.saturating_sub(1); let depth = depth.saturating_sub(1);
right.token_at_depth(depth).or_else(|| left.token_at_depth(depth)) for f in subfilters.iter() {
if let Some(t) = f.token_at_depth(depth) {
return Some(t);
}
}
None
} }
FilterCondition::And(left, right) => { FilterCondition::And(subfilters) => {
let depth = depth.saturating_sub(1); let depth = depth.saturating_sub(1);
right.token_at_depth(depth).or_else(|| left.token_at_depth(depth)) for f in subfilters.iter() {
if let Some(t) = f.token_at_depth(depth) {
return Some(t);
}
}
None
} }
FilterCondition::GeoLowerThan { point: [point, _], .. } if depth == 0 => Some(point), FilterCondition::GeoLowerThan { point: [point, _], .. } if depth == 0 => Some(point),
FilterCondition::GeoGreaterThan { point: [point, _], .. } if depth == 0 => Some(point), FilterCondition::GeoGreaterThan { point: [point, _], .. } if depth == 0 => Some(point),
@ -144,13 +154,13 @@ impl<'a> FilterCondition<'a> {
match self { match self {
Condition { fid, op } => match op.negate() { Condition { fid, op } => match op.negate() {
(op, None) => Condition { fid, op }, (op, None) => Condition { fid, op },
(a, Some(b)) => Or( (a, Some(b)) => Or(vec![
Condition { fid: fid.clone(), op: a }.into(), Condition { fid: fid.clone(), op: a }.into(),
Condition { fid, op: b }.into(), Condition { fid, op: b }.into(),
), ]),
}, },
Or(a, b) => And(a.negate().into(), b.negate().into()), Or(subfilters) => And(subfilters.into_iter().map(|x| x.negate().into()).collect()),
And(a, b) => Or(a.negate().into(), b.negate().into()), And(subfilters) => Or(subfilters.into_iter().map(|x| x.negate().into()).collect()),
GeoLowerThan { point, radius } => GeoGreaterThan { point, radius }, GeoLowerThan { point, radius } => GeoGreaterThan { point, radius },
GeoGreaterThan { point, radius } => GeoLowerThan { point, radius }, GeoGreaterThan { point, radius } => GeoLowerThan { point, radius },
} }
@ -172,26 +182,36 @@ fn ws<'a, O>(inner: impl FnMut(Span<'a>) -> IResult<O>) -> impl FnMut(Span<'a>)
/// or = and ("OR" WS+ and)* /// or = and ("OR" WS+ and)*
fn parse_or(input: Span) -> IResult<FilterCondition> { fn parse_or(input: Span) -> IResult<FilterCondition> {
let (input, lhs) = parse_and(input)?; let (input, first_filter) = parse_and(input)?;
// if we found a `OR` then we MUST find something next // if we found a `OR` then we MUST find something next
let (input, ors) = many0(preceded(ws(tuple((tag("OR"), multispace1))), cut(parse_and)))(input)?; let (input, mut ors) =
many0(preceded(ws(tuple((tag("OR"), multispace1))), cut(parse_and)))(input)?;
let expr = ors let filter = if ors.is_empty() {
.into_iter() first_filter
.fold(lhs, |acc, branch| FilterCondition::Or(Box::new(acc), Box::new(branch))); } else {
Ok((input, expr)) ors.insert(0, first_filter);
FilterCondition::Or(ors)
};
Ok((input, filter))
} }
/// and = not ("AND" not)* /// and = not ("AND" not)*
fn parse_and(input: Span) -> IResult<FilterCondition> { fn parse_and(input: Span) -> IResult<FilterCondition> {
let (input, lhs) = parse_not(input)?; let (input, first_filter) = parse_not(input)?;
// if we found a `AND` then we MUST find something next // if we found a `AND` then we MUST find something next
let (input, ors) = let (input, mut ands) =
many0(preceded(ws(tuple((tag("AND"), multispace1))), cut(parse_not)))(input)?; many0(preceded(ws(tuple((tag("AND"), multispace1))), cut(parse_not)))(input)?;
let expr = ors
.into_iter() let filter = if ands.is_empty() {
.fold(lhs, |acc, branch| FilterCondition::And(Box::new(acc), Box::new(branch))); first_filter
Ok((input, expr)) } else {
ands.insert(0, first_filter);
FilterCondition::And(ands)
};
Ok((input, filter))
} }
/// not = ("NOT" WS+ not) | primary /// not = ("NOT" WS+ not) | primary
@ -477,7 +497,7 @@ pub mod tests {
( (
"NOT subscribers 100 TO 1000", "NOT subscribers 100 TO 1000",
Fc::Or( Fc::Or(
Fc::Condition { vec![Fc::Condition {
fid: rtok("NOT ", "subscribers"), fid: rtok("NOT ", "subscribers"),
op: Condition::LowerThan(rtok("NOT subscribers ", "100")), op: Condition::LowerThan(rtok("NOT subscribers ", "100")),
} }
@ -486,7 +506,7 @@ pub mod tests {
fid: rtok("NOT ", "subscribers"), fid: rtok("NOT ", "subscribers"),
op: Condition::GreaterThan(rtok("NOT subscribers 100 TO ", "1000")), op: Condition::GreaterThan(rtok("NOT subscribers 100 TO ", "1000")),
} }
.into(), .into()],
), ),
), ),
( (
@ -506,7 +526,7 @@ pub mod tests {
// test simple `or` and `and` // test simple `or` and `and`
( (
"channel = ponce AND 'dog race' != 'bernese mountain'", "channel = ponce AND 'dog race' != 'bernese mountain'",
Fc::And( Fc::And(vec![
Fc::Condition { Fc::Condition {
fid: rtok("", "channel"), fid: rtok("", "channel"),
op: Condition::Equal(rtok("channel = ", "ponce")), op: Condition::Equal(rtok("channel = ", "ponce")),
@ -520,11 +540,11 @@ pub mod tests {
)), )),
} }
.into(), .into(),
), ]),
), ),
( (
"channel = ponce OR 'dog race' != 'bernese mountain'", "channel = ponce OR 'dog race' != 'bernese mountain'",
Fc::Or( Fc::Or(vec![
Fc::Condition { Fc::Condition {
fid: rtok("", "channel"), fid: rtok("", "channel"),
op: Condition::Equal(rtok("channel = ", "ponce")), op: Condition::Equal(rtok("channel = ", "ponce")),
@ -538,12 +558,12 @@ pub mod tests {
)), )),
} }
.into(), .into(),
), ]),
), ),
( (
"channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000", "channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000",
Fc::Or( Fc::Or(vec![
Fc::And( Fc::And(vec![
Fc::Condition { Fc::Condition {
fid: rtok("", "channel"), fid: rtok("", "channel"),
op: Condition::Equal(rtok("channel = ", "ponce")), op: Condition::Equal(rtok("channel = ", "ponce")),
@ -557,7 +577,7 @@ pub mod tests {
)), )),
} }
.into(), .into(),
) ])
.into(), .into(),
Fc::Condition { Fc::Condition {
fid: rtok( fid: rtok(
@ -570,30 +590,30 @@ pub mod tests {
)), )),
} }
.into(), .into(),
), ]),
), ),
// test parenthesis // test parenthesis
( (
"channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > 1000 )", "channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > 1000 )",
Fc::And( Fc::And(vec![
Fc::Condition { fid: rtok("", "channel"), op: Condition::Equal(rtok("channel = ", "ponce")) }.into(), Fc::Condition { fid: rtok("", "channel"), op: Condition::Equal(rtok("channel = ", "ponce")) }.into(),
Fc::Or( Fc::Or(vec![
Fc::Condition { fid: rtok("channel = ponce AND ( '", "dog race"), op: Condition::NotEqual(rtok("channel = ponce AND ( 'dog race' != '", "bernese mountain"))}.into(), Fc::Condition { fid: rtok("channel = ponce AND ( '", "dog race"), op: Condition::NotEqual(rtok("channel = ponce AND ( 'dog race' != '", "bernese mountain"))}.into(),
Fc::Condition { fid: rtok("channel = ponce AND ( 'dog race' != 'bernese mountain' OR ", "subscribers"), op: Condition::GreaterThan(rtok("channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > ", "1000")) }.into(), Fc::Condition { fid: rtok("channel = ponce AND ( 'dog race' != 'bernese mountain' OR ", "subscribers"), op: Condition::GreaterThan(rtok("channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > ", "1000")) }.into(),]
).into()), ).into()]),
), ),
( (
"(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, 14)", "(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, 14)",
Fc::And( Fc::And(vec![
Fc::Or( Fc::Or(vec![
Fc::And( Fc::And(vec![
Fc::Condition { fid: rtok("(", "channel"), op: Condition::Equal(rtok("(channel = ", "ponce")) }.into(), Fc::Condition { fid: rtok("(", "channel"), op: Condition::Equal(rtok("(channel = ", "ponce")) }.into(),
Fc::Condition { fid: rtok("(channel = ponce AND '", "dog race"), op: Condition::NotEqual(rtok("(channel = ponce AND 'dog race' != '", "bernese mountain")) }.into(), Fc::Condition { fid: rtok("(channel = ponce AND '", "dog race"), op: Condition::NotEqual(rtok("(channel = ponce AND 'dog race' != '", "bernese mountain")) }.into(),
).into(), ]).into(),
Fc::Condition { fid: rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR ", "subscribers"), op: Condition::GreaterThan(rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > ", "1000")) }.into(), Fc::Condition { fid: rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR ", "subscribers"), op: Condition::GreaterThan(rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > ", "1000")) }.into(),
).into(), ]).into(),
Fc::GeoLowerThan { point: [rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(", "12"), rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, ", "13")], radius: rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, ", "14") }.into() Fc::GeoLowerThan { point: [rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(", "12"), rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, ", "13")], radius: rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, ", "14") }.into()
) ])
) )
]; ];
@ -657,6 +677,15 @@ pub mod tests {
#[test] #[test]
fn depth() { fn depth() {
let filter = FilterCondition::parse("account_ids=1 OR account_ids=2 OR account_ids=3 OR account_ids=4 OR account_ids=5 OR account_ids=6").unwrap().unwrap(); let filter = FilterCondition::parse("account_ids=1 OR account_ids=2 OR account_ids=3 OR account_ids=4 OR account_ids=5 OR account_ids=6").unwrap().unwrap();
assert!(filter.token_at_depth(5).is_some()); assert!(filter.token_at_depth(1).is_some());
assert!(filter.token_at_depth(2).is_none());
let filter = FilterCondition::parse("(account_ids=1 OR (account_ids=2 AND account_ids=3) OR (account_ids=4 AND account_ids=5) OR account_ids=6)").unwrap().unwrap();
assert!(filter.token_at_depth(2).is_some());
assert!(filter.token_at_depth(3).is_none());
let filter = FilterCondition::parse("account_ids=1 OR account_ids=2 AND account_ids=3 OR account_ids=4 AND account_ids=5 OR account_ids=6").unwrap().unwrap();
assert!(filter.token_at_depth(2).is_some());
assert!(filter.token_at_depth(3).is_none());
} }
} }

View File

@ -745,10 +745,9 @@ async fn main() -> anyhow::Result<()> {
}; };
let condition = match (filters, facet_filters) { let condition = match (filters, facet_filters) {
(Some(filters), Some(facet_filters)) => Some(FilterCondition::And( (Some(filters), Some(facet_filters)) => {
Box::new(filters.into()), Some(FilterCondition::And(vec![filters.into(), facet_filters.into()]))
Box::new(facet_filters.into()), }
)),
(Some(condition), None) | (None, Some(condition)) => Some(condition.into()), (Some(condition), None) | (None, Some(condition)) => Some(condition.into()),
_otherwise => None, _otherwise => None,
}; };

View File

@ -89,52 +89,44 @@ impl<'a> Filter<'a> {
I: IntoIterator<Item = Either<J, &'a str>>, I: IntoIterator<Item = Either<J, &'a str>>,
J: IntoIterator<Item = &'a str>, J: IntoIterator<Item = &'a str>,
{ {
let mut ands: Option<FilterCondition> = None; let mut ands = vec![];
for either in array { for either in array {
match either { match either {
Either::Left(array) => { Either::Left(array) => {
let mut ors = None; let mut ors = vec![];
for rule in array { for rule in array {
if let Some(filter) = Self::from_str(rule.as_ref())? { if let Some(filter) = Self::from_str(rule.as_ref())? {
let condition = filter.condition; ors.push(filter.condition);
ors = match ors.take() {
Some(ors) => {
Some(FilterCondition::Or(Box::new(ors), Box::new(condition)))
}
None => Some(condition),
};
} }
} }
if let Some(rule) = ors { if ors.len() > 1 {
ands = match ands.take() { ands.push(FilterCondition::Or(ors));
Some(ands) => { } else if ors.len() == 1 {
Some(FilterCondition::And(Box::new(ands), Box::new(rule))) ands.push(ors[0].clone());
}
None => Some(rule),
};
} }
} }
Either::Right(rule) => { Either::Right(rule) => {
if let Some(filter) = Self::from_str(rule.as_ref())? { if let Some(filter) = Self::from_str(rule.as_ref())? {
let condition = filter.condition; ands.push(filter.condition);
ands = match ands.take() {
Some(ands) => {
Some(FilterCondition::And(Box::new(ands), Box::new(condition)))
}
None => Some(condition),
};
} }
} }
} }
} }
let and = if ands.is_empty() {
return Ok(None);
} else if ands.len() == 1 {
ands[0].clone()
} else {
FilterCondition::And(ands)
};
if let Some(token) = ands.as_ref().and_then(|fc| fc.token_at_depth(MAX_FILTER_DEPTH)) { if let Some(token) = and.token_at_depth(MAX_FILTER_DEPTH) {
return Err(token.as_external_error(FilterError::TooDeep).into()); return Err(token.as_external_error(FilterError::TooDeep).into());
} }
Ok(ands.map(|ands| Self { condition: ands })) Ok(Some(Self { condition: and }))
} }
pub fn from_str(expression: &'a str) -> Result<Option<Self>> { pub fn from_str(expression: &'a str) -> Result<Option<Self>> {
@ -397,38 +389,28 @@ impl<'a> Filter<'a> {
} }
} }
} }
FilterCondition::Or(lhs, rhs) => { FilterCondition::Or(subfilters) => {
let lhs = Self::inner_evaluate( let mut bitmap = RoaringBitmap::new();
&(lhs.as_ref().clone()).into(), for f in subfilters {
rtxn, bitmap |= Self::inner_evaluate(&(f.clone()).into(), rtxn, index, filterable_fields)?;
index, }
filterable_fields, Ok(bitmap)
)?; }
let rhs = Self::inner_evaluate( FilterCondition::And(subfilters) => {
&(rhs.as_ref().clone()).into(), let mut subfilters_iter = subfilters.iter();
rtxn, if let Some(first_subfilter) = subfilters_iter.next() {
index, let mut bitmap =
filterable_fields, Self::inner_evaluate(&(first_subfilter.clone()).into(), rtxn, index, filterable_fields)?;
)?; for f in subfilters_iter {
Ok(lhs | rhs) if bitmap.is_empty() {
} return Ok(bitmap);
FilterCondition::And(lhs, rhs) => { }
let lhs = Self::inner_evaluate( bitmap &= Self::inner_evaluate(&(f.clone()).into(), rtxn, index, filterable_fields)?;
&(lhs.as_ref().clone()).into(), }
rtxn, Ok(bitmap)
index, } else {
filterable_fields, Ok(RoaringBitmap::new())
)?;
if lhs.is_empty() {
return Ok(lhs);
} }
let rhs = Self::inner_evaluate(
&(rhs.as_ref().clone()).into(),
rtxn,
index,
filterable_fields,
)?;
Ok(lhs & rhs)
} }
FilterCondition::GeoLowerThan { point, radius } => { FilterCondition::GeoLowerThan { point, radius } => {
if filterable_fields.contains("_geo") { if filterable_fields.contains("_geo") {
@ -732,12 +714,10 @@ mod tests {
} }
} }
let error = Filter::from_str(&filter_string).unwrap_err(); // Note: the filter used to be rejected for being too deep, but that is
assert!( // no longer the case
error.to_string().starts_with("Too many filter conditions"), let filter = Filter::from_str(&filter_string).unwrap();
"{}", assert!(filter.is_some());
error.to_string()
);
} }
#[test] #[test]