mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-22 21:04:27 +01:00
Make AND+OR filters n-ary (store a vector of subfilters instead of 2)
NOTE: The token_at_depth is method is a bit useless now, as the only cases where there would be a toke at depth 1000 are the cases where the parser already stack-overflowed earlier. Example: (((((... (x=1) ...)))))
This commit is contained in:
parent
f55034ed54
commit
258c3dd563
@ -113,8 +113,8 @@ impl<'a> From<Span<'a>> for Token<'a> {
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub enum FilterCondition<'a> {
|
||||
Condition { fid: Token<'a>, op: Condition<'a> },
|
||||
Or(Box<Self>, Box<Self>),
|
||||
And(Box<Self>, Box<Self>),
|
||||
Or(Vec<Self>),
|
||||
And(Vec<Self>),
|
||||
GeoLowerThan { point: [Token<'a>; 2], radius: Token<'a> },
|
||||
GeoGreaterThan { point: [Token<'a>; 2], radius: Token<'a> },
|
||||
}
|
||||
@ -124,13 +124,23 @@ impl<'a> FilterCondition<'a> {
|
||||
pub fn token_at_depth(&self, depth: usize) -> Option<&Token> {
|
||||
match self {
|
||||
FilterCondition::Condition { fid, .. } if depth == 0 => Some(fid),
|
||||
FilterCondition::Or(left, right) => {
|
||||
FilterCondition::Or(subfilters) => {
|
||||
let depth = depth.saturating_sub(1);
|
||||
right.token_at_depth(depth).or_else(|| left.token_at_depth(depth))
|
||||
for f in subfilters.iter() {
|
||||
if let Some(t) = f.token_at_depth(depth) {
|
||||
return Some(t);
|
||||
}
|
||||
FilterCondition::And(left, right) => {
|
||||
}
|
||||
None
|
||||
}
|
||||
FilterCondition::And(subfilters) => {
|
||||
let depth = depth.saturating_sub(1);
|
||||
right.token_at_depth(depth).or_else(|| left.token_at_depth(depth))
|
||||
for f in subfilters.iter() {
|
||||
if let Some(t) = f.token_at_depth(depth) {
|
||||
return Some(t);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
FilterCondition::GeoLowerThan { point: [point, _], .. } if depth == 0 => Some(point),
|
||||
FilterCondition::GeoGreaterThan { point: [point, _], .. } if depth == 0 => Some(point),
|
||||
@ -144,13 +154,13 @@ impl<'a> FilterCondition<'a> {
|
||||
match self {
|
||||
Condition { fid, op } => match op.negate() {
|
||||
(op, None) => Condition { fid, op },
|
||||
(a, Some(b)) => Or(
|
||||
(a, Some(b)) => Or(vec![
|
||||
Condition { fid: fid.clone(), op: a }.into(),
|
||||
Condition { fid, op: b }.into(),
|
||||
),
|
||||
]),
|
||||
},
|
||||
Or(a, b) => And(a.negate().into(), b.negate().into()),
|
||||
And(a, b) => Or(a.negate().into(), b.negate().into()),
|
||||
Or(subfilters) => And(subfilters.into_iter().map(|x| x.negate().into()).collect()),
|
||||
And(subfilters) => Or(subfilters.into_iter().map(|x| x.negate().into()).collect()),
|
||||
GeoLowerThan { point, radius } => GeoGreaterThan { point, radius },
|
||||
GeoGreaterThan { point, radius } => GeoLowerThan { point, radius },
|
||||
}
|
||||
@ -172,26 +182,36 @@ fn ws<'a, O>(inner: impl FnMut(Span<'a>) -> IResult<O>) -> impl FnMut(Span<'a>)
|
||||
|
||||
/// or = and ("OR" WS+ and)*
|
||||
fn parse_or(input: Span) -> IResult<FilterCondition> {
|
||||
let (input, lhs) = parse_and(input)?;
|
||||
let (input, first_filter) = parse_and(input)?;
|
||||
// if we found a `OR` then we MUST find something next
|
||||
let (input, ors) = many0(preceded(ws(tuple((tag("OR"), multispace1))), cut(parse_and)))(input)?;
|
||||
let (input, mut ors) =
|
||||
many0(preceded(ws(tuple((tag("OR"), multispace1))), cut(parse_and)))(input)?;
|
||||
|
||||
let expr = ors
|
||||
.into_iter()
|
||||
.fold(lhs, |acc, branch| FilterCondition::Or(Box::new(acc), Box::new(branch)));
|
||||
Ok((input, expr))
|
||||
let filter = if ors.is_empty() {
|
||||
first_filter
|
||||
} else {
|
||||
ors.insert(0, first_filter);
|
||||
FilterCondition::Or(ors)
|
||||
};
|
||||
|
||||
Ok((input, filter))
|
||||
}
|
||||
|
||||
/// and = not ("AND" not)*
|
||||
fn parse_and(input: Span) -> IResult<FilterCondition> {
|
||||
let (input, lhs) = parse_not(input)?;
|
||||
let (input, first_filter) = parse_not(input)?;
|
||||
// if we found a `AND` then we MUST find something next
|
||||
let (input, ors) =
|
||||
let (input, mut ands) =
|
||||
many0(preceded(ws(tuple((tag("AND"), multispace1))), cut(parse_not)))(input)?;
|
||||
let expr = ors
|
||||
.into_iter()
|
||||
.fold(lhs, |acc, branch| FilterCondition::And(Box::new(acc), Box::new(branch)));
|
||||
Ok((input, expr))
|
||||
|
||||
let filter = if ands.is_empty() {
|
||||
first_filter
|
||||
} else {
|
||||
ands.insert(0, first_filter);
|
||||
FilterCondition::And(ands)
|
||||
};
|
||||
|
||||
Ok((input, filter))
|
||||
}
|
||||
|
||||
/// not = ("NOT" WS+ not) | primary
|
||||
@ -477,7 +497,7 @@ pub mod tests {
|
||||
(
|
||||
"NOT subscribers 100 TO 1000",
|
||||
Fc::Or(
|
||||
Fc::Condition {
|
||||
vec![Fc::Condition {
|
||||
fid: rtok("NOT ", "subscribers"),
|
||||
op: Condition::LowerThan(rtok("NOT subscribers ", "100")),
|
||||
}
|
||||
@ -486,7 +506,7 @@ pub mod tests {
|
||||
fid: rtok("NOT ", "subscribers"),
|
||||
op: Condition::GreaterThan(rtok("NOT subscribers 100 TO ", "1000")),
|
||||
}
|
||||
.into(),
|
||||
.into()],
|
||||
),
|
||||
),
|
||||
(
|
||||
@ -506,7 +526,7 @@ pub mod tests {
|
||||
// test simple `or` and `and`
|
||||
(
|
||||
"channel = ponce AND 'dog race' != 'bernese mountain'",
|
||||
Fc::And(
|
||||
Fc::And(vec![
|
||||
Fc::Condition {
|
||||
fid: rtok("", "channel"),
|
||||
op: Condition::Equal(rtok("channel = ", "ponce")),
|
||||
@ -520,11 +540,11 @@ pub mod tests {
|
||||
)),
|
||||
}
|
||||
.into(),
|
||||
),
|
||||
]),
|
||||
),
|
||||
(
|
||||
"channel = ponce OR 'dog race' != 'bernese mountain'",
|
||||
Fc::Or(
|
||||
Fc::Or(vec![
|
||||
Fc::Condition {
|
||||
fid: rtok("", "channel"),
|
||||
op: Condition::Equal(rtok("channel = ", "ponce")),
|
||||
@ -538,12 +558,12 @@ pub mod tests {
|
||||
)),
|
||||
}
|
||||
.into(),
|
||||
),
|
||||
]),
|
||||
),
|
||||
(
|
||||
"channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000",
|
||||
Fc::Or(
|
||||
Fc::And(
|
||||
Fc::Or(vec![
|
||||
Fc::And(vec![
|
||||
Fc::Condition {
|
||||
fid: rtok("", "channel"),
|
||||
op: Condition::Equal(rtok("channel = ", "ponce")),
|
||||
@ -557,7 +577,7 @@ pub mod tests {
|
||||
)),
|
||||
}
|
||||
.into(),
|
||||
)
|
||||
])
|
||||
.into(),
|
||||
Fc::Condition {
|
||||
fid: rtok(
|
||||
@ -570,30 +590,30 @@ pub mod tests {
|
||||
)),
|
||||
}
|
||||
.into(),
|
||||
),
|
||||
]),
|
||||
),
|
||||
// test parenthesis
|
||||
(
|
||||
"channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > 1000 )",
|
||||
Fc::And(
|
||||
Fc::And(vec![
|
||||
Fc::Condition { fid: rtok("", "channel"), op: Condition::Equal(rtok("channel = ", "ponce")) }.into(),
|
||||
Fc::Or(
|
||||
Fc::Or(vec![
|
||||
Fc::Condition { fid: rtok("channel = ponce AND ( '", "dog race"), op: Condition::NotEqual(rtok("channel = ponce AND ( 'dog race' != '", "bernese mountain"))}.into(),
|
||||
Fc::Condition { fid: rtok("channel = ponce AND ( 'dog race' != 'bernese mountain' OR ", "subscribers"), op: Condition::GreaterThan(rtok("channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > ", "1000")) }.into(),
|
||||
).into()),
|
||||
Fc::Condition { fid: rtok("channel = ponce AND ( 'dog race' != 'bernese mountain' OR ", "subscribers"), op: Condition::GreaterThan(rtok("channel = ponce AND ( 'dog race' != 'bernese mountain' OR subscribers > ", "1000")) }.into(),]
|
||||
).into()]),
|
||||
),
|
||||
(
|
||||
"(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, 14)",
|
||||
Fc::And(
|
||||
Fc::Or(
|
||||
Fc::And(
|
||||
Fc::And(vec![
|
||||
Fc::Or(vec![
|
||||
Fc::And(vec![
|
||||
Fc::Condition { fid: rtok("(", "channel"), op: Condition::Equal(rtok("(channel = ", "ponce")) }.into(),
|
||||
Fc::Condition { fid: rtok("(channel = ponce AND '", "dog race"), op: Condition::NotEqual(rtok("(channel = ponce AND 'dog race' != '", "bernese mountain")) }.into(),
|
||||
).into(),
|
||||
]).into(),
|
||||
Fc::Condition { fid: rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR ", "subscribers"), op: Condition::GreaterThan(rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > ", "1000")) }.into(),
|
||||
).into(),
|
||||
]).into(),
|
||||
Fc::GeoLowerThan { point: [rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(", "12"), rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, ", "13")], radius: rtok("(channel = ponce AND 'dog race' != 'bernese mountain' OR subscribers > 1000) AND _geoRadius(12, 13, ", "14") }.into()
|
||||
)
|
||||
])
|
||||
)
|
||||
];
|
||||
|
||||
@ -657,6 +677,15 @@ pub mod tests {
|
||||
#[test]
|
||||
fn depth() {
|
||||
let filter = FilterCondition::parse("account_ids=1 OR account_ids=2 OR account_ids=3 OR account_ids=4 OR account_ids=5 OR account_ids=6").unwrap().unwrap();
|
||||
assert!(filter.token_at_depth(5).is_some());
|
||||
assert!(filter.token_at_depth(1).is_some());
|
||||
assert!(filter.token_at_depth(2).is_none());
|
||||
|
||||
let filter = FilterCondition::parse("(account_ids=1 OR (account_ids=2 AND account_ids=3) OR (account_ids=4 AND account_ids=5) OR account_ids=6)").unwrap().unwrap();
|
||||
assert!(filter.token_at_depth(2).is_some());
|
||||
assert!(filter.token_at_depth(3).is_none());
|
||||
|
||||
let filter = FilterCondition::parse("account_ids=1 OR account_ids=2 AND account_ids=3 OR account_ids=4 AND account_ids=5 OR account_ids=6").unwrap().unwrap();
|
||||
assert!(filter.token_at_depth(2).is_some());
|
||||
assert!(filter.token_at_depth(3).is_none());
|
||||
}
|
||||
}
|
||||
|
@ -745,10 +745,9 @@ async fn main() -> anyhow::Result<()> {
|
||||
};
|
||||
|
||||
let condition = match (filters, facet_filters) {
|
||||
(Some(filters), Some(facet_filters)) => Some(FilterCondition::And(
|
||||
Box::new(filters.into()),
|
||||
Box::new(facet_filters.into()),
|
||||
)),
|
||||
(Some(filters), Some(facet_filters)) => {
|
||||
Some(FilterCondition::And(vec![filters.into(), facet_filters.into()]))
|
||||
}
|
||||
(Some(condition), None) | (None, Some(condition)) => Some(condition.into()),
|
||||
_otherwise => None,
|
||||
};
|
||||
|
@ -89,52 +89,44 @@ impl<'a> Filter<'a> {
|
||||
I: IntoIterator<Item = Either<J, &'a str>>,
|
||||
J: IntoIterator<Item = &'a str>,
|
||||
{
|
||||
let mut ands: Option<FilterCondition> = None;
|
||||
let mut ands = vec![];
|
||||
|
||||
for either in array {
|
||||
match either {
|
||||
Either::Left(array) => {
|
||||
let mut ors = None;
|
||||
let mut ors = vec![];
|
||||
for rule in array {
|
||||
if let Some(filter) = Self::from_str(rule.as_ref())? {
|
||||
let condition = filter.condition;
|
||||
ors = match ors.take() {
|
||||
Some(ors) => {
|
||||
Some(FilterCondition::Or(Box::new(ors), Box::new(condition)))
|
||||
}
|
||||
None => Some(condition),
|
||||
};
|
||||
ors.push(filter.condition);
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(rule) = ors {
|
||||
ands = match ands.take() {
|
||||
Some(ands) => {
|
||||
Some(FilterCondition::And(Box::new(ands), Box::new(rule)))
|
||||
}
|
||||
None => Some(rule),
|
||||
};
|
||||
if ors.len() > 1 {
|
||||
ands.push(FilterCondition::Or(ors));
|
||||
} else if ors.len() == 1 {
|
||||
ands.push(ors[0].clone());
|
||||
}
|
||||
}
|
||||
Either::Right(rule) => {
|
||||
if let Some(filter) = Self::from_str(rule.as_ref())? {
|
||||
let condition = filter.condition;
|
||||
ands = match ands.take() {
|
||||
Some(ands) => {
|
||||
Some(FilterCondition::And(Box::new(ands), Box::new(condition)))
|
||||
ands.push(filter.condition);
|
||||
}
|
||||
None => Some(condition),
|
||||
}
|
||||
}
|
||||
}
|
||||
let and = if ands.is_empty() {
|
||||
return Ok(None);
|
||||
} else if ands.len() == 1 {
|
||||
ands[0].clone()
|
||||
} else {
|
||||
FilterCondition::And(ands)
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(token) = ands.as_ref().and_then(|fc| fc.token_at_depth(MAX_FILTER_DEPTH)) {
|
||||
if let Some(token) = and.token_at_depth(MAX_FILTER_DEPTH) {
|
||||
return Err(token.as_external_error(FilterError::TooDeep).into());
|
||||
}
|
||||
|
||||
Ok(ands.map(|ands| Self { condition: ands }))
|
||||
Ok(Some(Self { condition: and }))
|
||||
}
|
||||
|
||||
pub fn from_str(expression: &'a str) -> Result<Option<Self>> {
|
||||
@ -397,38 +389,28 @@ impl<'a> Filter<'a> {
|
||||
}
|
||||
}
|
||||
}
|
||||
FilterCondition::Or(lhs, rhs) => {
|
||||
let lhs = Self::inner_evaluate(
|
||||
&(lhs.as_ref().clone()).into(),
|
||||
rtxn,
|
||||
index,
|
||||
filterable_fields,
|
||||
)?;
|
||||
let rhs = Self::inner_evaluate(
|
||||
&(rhs.as_ref().clone()).into(),
|
||||
rtxn,
|
||||
index,
|
||||
filterable_fields,
|
||||
)?;
|
||||
Ok(lhs | rhs)
|
||||
FilterCondition::Or(subfilters) => {
|
||||
let mut bitmap = RoaringBitmap::new();
|
||||
for f in subfilters {
|
||||
bitmap |= Self::inner_evaluate(&(f.clone()).into(), rtxn, index, filterable_fields)?;
|
||||
}
|
||||
FilterCondition::And(lhs, rhs) => {
|
||||
let lhs = Self::inner_evaluate(
|
||||
&(lhs.as_ref().clone()).into(),
|
||||
rtxn,
|
||||
index,
|
||||
filterable_fields,
|
||||
)?;
|
||||
if lhs.is_empty() {
|
||||
return Ok(lhs);
|
||||
Ok(bitmap)
|
||||
}
|
||||
FilterCondition::And(subfilters) => {
|
||||
let mut subfilters_iter = subfilters.iter();
|
||||
if let Some(first_subfilter) = subfilters_iter.next() {
|
||||
let mut bitmap =
|
||||
Self::inner_evaluate(&(first_subfilter.clone()).into(), rtxn, index, filterable_fields)?;
|
||||
for f in subfilters_iter {
|
||||
if bitmap.is_empty() {
|
||||
return Ok(bitmap);
|
||||
}
|
||||
bitmap &= Self::inner_evaluate(&(f.clone()).into(), rtxn, index, filterable_fields)?;
|
||||
}
|
||||
Ok(bitmap)
|
||||
} else {
|
||||
Ok(RoaringBitmap::new())
|
||||
}
|
||||
let rhs = Self::inner_evaluate(
|
||||
&(rhs.as_ref().clone()).into(),
|
||||
rtxn,
|
||||
index,
|
||||
filterable_fields,
|
||||
)?;
|
||||
Ok(lhs & rhs)
|
||||
}
|
||||
FilterCondition::GeoLowerThan { point, radius } => {
|
||||
if filterable_fields.contains("_geo") {
|
||||
@ -732,12 +714,10 @@ mod tests {
|
||||
}
|
||||
}
|
||||
|
||||
let error = Filter::from_str(&filter_string).unwrap_err();
|
||||
assert!(
|
||||
error.to_string().starts_with("Too many filter conditions"),
|
||||
"{}",
|
||||
error.to_string()
|
||||
);
|
||||
// Note: the filter used to be rejected for being too deep, but that is
|
||||
// no longer the case
|
||||
let filter = Filter::from_str(&filter_string).unwrap();
|
||||
assert!(filter.is_some());
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
Loading…
Reference in New Issue
Block a user