422: Prefer returning `None` instead of using an `FilterCondition::Empty` state r=Kerollmops a=Kerollmops

This PR is related to the issue comment https://github.com/meilisearch/MeiliSearch/issues/1338#issuecomment-989322889 which exhibits the fact that when a filter is known to be empty no results are returned which is wrong, the filter should not apply as no restriction is done on the documents set.

The filter system on the milli side has introduced an Empty state which was used in this kind of situation but I found out that it is not needed and that when we parse a filter and that it is empty we can simply return `None` as the `Filter::from_array` constructor does. So I removed it and added tests!

On the MeiliSearch side, we just need to match on a `None` and completely ignore the filter in such a case.

Co-authored-by: Clément Renault <clement@meilisearch.com>
This commit is contained in:
bors[bot] 2021-12-09 15:03:04 +00:00 committed by GitHub
commit 11a056d116
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 58 additions and 60 deletions

View File

@ -117,7 +117,7 @@ pub fn run_benches(c: &mut criterion::Criterion, confs: &[Conf]) {
let mut search = index.search(&rtxn);
search.query(query).optional_words(conf.optional_words);
if let Some(filter) = conf.filter {
let filter = Filter::from_str(filter).unwrap();
let filter = Filter::from_str(filter).unwrap().unwrap();
search.filter(filter);
}
if let Some(sort) = &conf.sort {

View File

@ -250,8 +250,9 @@ impl Search {
}
if let Some(ref filter) = self.filter {
let condition = milli::Filter::from_str(filter)?;
search.filter(condition);
if let Some(condition) = milli::Filter::from_str(filter)? {
search.filter(condition);
}
}
if let Some(offset) = self.offset {

View File

@ -109,7 +109,6 @@ pub enum FilterCondition<'a> {
And(Box<Self>, Box<Self>),
GeoLowerThan { point: [Token<'a>; 2], radius: Token<'a> },
GeoGreaterThan { point: [Token<'a>; 2], radius: Token<'a> },
Empty,
}
impl<'a> FilterCondition<'a> {
@ -144,18 +143,17 @@ impl<'a> FilterCondition<'a> {
},
Or(a, b) => And(a.negate().into(), b.negate().into()),
And(a, b) => Or(a.negate().into(), b.negate().into()),
Empty => Empty,
GeoLowerThan { point, radius } => GeoGreaterThan { point, radius },
GeoGreaterThan { point, radius } => GeoLowerThan { point, radius },
}
}
pub fn parse(input: &'a str) -> Result<Self, Error> {
pub fn parse(input: &'a str) -> Result<Option<Self>, Error> {
if input.trim().is_empty() {
return Ok(Self::Empty);
return Ok(None);
}
let span = Span::new_extra(input, input);
parse_filter(span).finish().map(|(_rem, output)| output)
parse_filter(span).finish().map(|(_rem, output)| Some(output))
}
}
@ -560,7 +558,7 @@ pub mod tests {
result.unwrap_err()
);
let filter = result.unwrap();
assert_eq!(filter, expected, "Filter `{}` failed.", input);
assert_eq!(filter, Some(expected), "Filter `{}` failed.", input);
}
}
@ -605,7 +603,7 @@ pub mod tests {
#[test]
fn depth() {
let filter = FilterCondition::parse("account_ids=1 OR account_ids=2 OR account_ids=3 OR account_ids=4 OR account_ids=5 OR account_ids=6").unwrap();
let filter = FilterCondition::parse("account_ids=1 OR account_ids=2 OR account_ids=3 OR account_ids=4 OR account_ids=5 OR account_ids=6").unwrap().unwrap();
assert!(filter.token_at_depth(5).is_some());
}
}

View File

@ -738,7 +738,7 @@ async fn main() -> anyhow::Result<()> {
let filters = match query.filters.as_ref() {
Some(condition) if !condition.trim().is_empty() => {
Some(MilliFilter::from_str(condition).unwrap())
MilliFilter::from_str(condition).unwrap()
}
_otherwise => None,
};

View File

@ -86,13 +86,15 @@ impl<'a> Filter<'a> {
Either::Left(array) => {
let mut ors = None;
for rule in array {
let condition = Self::from_str(rule.as_ref())?.condition;
ors = match ors.take() {
Some(ors) => {
Some(FilterCondition::Or(Box::new(ors), Box::new(condition)))
}
None => Some(condition),
};
if let Some(filter) = Self::from_str(rule.as_ref())? {
let condition = filter.condition;
ors = match ors.take() {
Some(ors) => {
Some(FilterCondition::Or(Box::new(ors), Box::new(condition)))
}
None => Some(condition),
};
}
}
if let Some(rule) = ors {
@ -105,13 +107,15 @@ impl<'a> Filter<'a> {
}
}
Either::Right(rule) => {
let condition = Self::from_str(rule.as_ref())?.condition;
ands = match ands.take() {
Some(ands) => {
Some(FilterCondition::And(Box::new(ands), Box::new(condition)))
}
None => Some(condition),
};
if let Some(filter) = Self::from_str(rule.as_ref())? {
let condition = filter.condition;
ands = match ands.take() {
Some(ands) => {
Some(FilterCondition::And(Box::new(ands), Box::new(condition)))
}
None => Some(condition),
};
}
}
}
}
@ -123,9 +127,10 @@ impl<'a> Filter<'a> {
Ok(ands.map(|ands| Self { condition: ands }))
}
pub fn from_str(expression: &'a str) -> Result<Self> {
pub fn from_str(expression: &'a str) -> Result<Option<Self>> {
let condition = match FilterCondition::parse(expression) {
Ok(fc) => Ok(fc),
Ok(Some(fc)) => Ok(fc),
Ok(None) => return Ok(None),
Err(e) => Err(Error::UserError(UserError::InvalidFilter(e.to_string()))),
}?;
@ -133,7 +138,7 @@ impl<'a> Filter<'a> {
return Err(token.as_external_error(FilterError::TooDeep).into());
}
Ok(Self { condition })
Ok(Some(Self { condition }))
}
}
@ -377,7 +382,6 @@ impl<'a> Filter<'a> {
let rhs = Self::evaluate(&(rhs.as_ref().clone()).into(), rtxn, index)?;
Ok(lhs & rhs)
}
FilterCondition::Empty => Ok(RoaringBitmap::new()),
FilterCondition::GeoLowerThan { point, radius } => {
let filterable_fields = index.filterable_fields(rtxn)?;
if filterable_fields.contains("_geo") {
@ -451,20 +455,20 @@ mod tests {
fn from_array() {
// Simple array with Left
let condition = Filter::from_array(vec![Either::Left(["channel = mv"])]).unwrap().unwrap();
let expected = Filter::from_str("channel = mv").unwrap();
let expected = Filter::from_str("channel = mv").unwrap().unwrap();
assert_eq!(condition, expected);
// Simple array with Right
let condition = Filter::from_array::<_, Option<&str>>(vec![Either::Right("channel = mv")])
.unwrap()
.unwrap();
let expected = Filter::from_str("channel = mv").unwrap();
let expected = Filter::from_str("channel = mv").unwrap().unwrap();
assert_eq!(condition, expected);
// Array with Left and escaped quote
let condition =
Filter::from_array(vec![Either::Left(["channel = \"Mister Mv\""])]).unwrap().unwrap();
let expected = Filter::from_str("channel = \"Mister Mv\"").unwrap();
let expected = Filter::from_str("channel = \"Mister Mv\"").unwrap().unwrap();
assert_eq!(condition, expected);
// Array with Right and escaped quote
@ -472,13 +476,13 @@ mod tests {
Filter::from_array::<_, Option<&str>>(vec![Either::Right("channel = \"Mister Mv\"")])
.unwrap()
.unwrap();
let expected = Filter::from_str("channel = \"Mister Mv\"").unwrap();
let expected = Filter::from_str("channel = \"Mister Mv\"").unwrap().unwrap();
assert_eq!(condition, expected);
// Array with Left and escaped simple quote
let condition =
Filter::from_array(vec![Either::Left(["channel = 'Mister Mv'"])]).unwrap().unwrap();
let expected = Filter::from_str("channel = 'Mister Mv'").unwrap();
let expected = Filter::from_str("channel = 'Mister Mv'").unwrap().unwrap();
assert_eq!(condition, expected);
// Array with Right and escaped simple quote
@ -486,13 +490,13 @@ mod tests {
Filter::from_array::<_, Option<&str>>(vec![Either::Right("channel = 'Mister Mv'")])
.unwrap()
.unwrap();
let expected = Filter::from_str("channel = 'Mister Mv'").unwrap();
let expected = Filter::from_str("channel = 'Mister Mv'").unwrap().unwrap();
assert_eq!(condition, expected);
// Simple with parenthesis
let condition =
Filter::from_array(vec![Either::Left(["(channel = mv)"])]).unwrap().unwrap();
let expected = Filter::from_str("(channel = mv)").unwrap();
let expected = Filter::from_str("(channel = mv)").unwrap().unwrap();
assert_eq!(condition, expected);
// Test that the facet condition is correctly generated.
@ -503,7 +507,9 @@ mod tests {
.unwrap()
.unwrap();
let expected =
Filter::from_str("channel = gotaga AND (timestamp = 44 OR channel != ponce)").unwrap();
Filter::from_str("channel = gotaga AND (timestamp = 44 OR channel != ponce)")
.unwrap()
.unwrap();
println!("\nExpecting: {:#?}\nGot: {:#?}\n", expected, condition);
assert_eq!(condition, expected);
}
@ -516,13 +522,13 @@ mod tests {
let index = Index::new(options, &path).unwrap();
let rtxn = index.read_txn().unwrap();
let filter = Filter::from_str("_geoRadius(42, 150, 10)").unwrap();
let filter = Filter::from_str("_geoRadius(42, 150, 10)").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().starts_with(
"Attribute `_geo` is not filterable. Available filterable attributes are: ``."
));
let filter = Filter::from_str("dog = \"bernese mountain\"").unwrap();
let filter = Filter::from_str("dog = \"bernese mountain\"").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().starts_with(
"Attribute `dog` is not filterable. Available filterable attributes are: ``."
@ -539,13 +545,13 @@ mod tests {
let rtxn = index.read_txn().unwrap();
let filter = Filter::from_str("_geoRadius(-100, 150, 10)").unwrap();
let filter = Filter::from_str("_geoRadius(-100, 150, 10)").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().starts_with(
"Attribute `_geo` is not filterable. Available filterable attributes are: `title`."
));
let filter = Filter::from_str("name = 12").unwrap();
let filter = Filter::from_str("name = 12").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().starts_with(
"Attribute `name` is not filterable. Available filterable attributes are: `title`."
@ -570,7 +576,7 @@ mod tests {
let rtxn = index.read_txn().unwrap();
// georadius have a bad latitude
let filter = Filter::from_str("_geoRadius(-100, 150, 10)").unwrap();
let filter = Filter::from_str("_geoRadius(-100, 150, 10)").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(
error.to_string().starts_with(
@ -581,14 +587,14 @@ mod tests {
);
// georadius have a bad latitude
let filter = Filter::from_str("_geoRadius(-90.0000001, 150, 10)").unwrap();
let filter = Filter::from_str("_geoRadius(-90.0000001, 150, 10)").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().contains(
"Bad latitude `-90.0000001`. Latitude must be contained between -90 and 90 degrees."
));
// georadius have a bad longitude
let filter = Filter::from_str("_geoRadius(-10, 250, 10)").unwrap();
let filter = Filter::from_str("_geoRadius(-10, 250, 10)").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(
error.to_string().contains(
@ -599,7 +605,7 @@ mod tests {
);
// georadius have a bad longitude
let filter = Filter::from_str("_geoRadius(-10, 180.000001, 10)").unwrap();
let filter = Filter::from_str("_geoRadius(-10, 180.000001, 10)").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().contains(
"Bad longitude `180.000001`. Longitude must be contained between -180 and 180 degrees."
@ -608,19 +614,6 @@ mod tests {
#[test]
fn filter_depth() {
let path = tempfile::tempdir().unwrap();
let mut options = EnvOpenOptions::new();
options.map_size(10 * 1024 * 1024); // 10 MB
let index = Index::new(options, &path).unwrap();
// Set the filterable fields to be the channel.
let mut wtxn = index.write_txn().unwrap();
let mut builder = Settings::new(&mut wtxn, &index);
builder.set_searchable_fields(vec![S("account_ids")]);
builder.set_filterable_fields(hashset! { S("account_ids") });
builder.execute(|_| ()).unwrap();
wtxn.commit().unwrap();
// generates a big (2 MiB) filter with too much of ORs.
let tipic_filter = "account_ids=14361 OR ";
let mut filter_string = String::with_capacity(tipic_filter.len() * 14360);
@ -638,4 +631,10 @@ mod tests {
error.to_string()
);
}
#[test]
fn empty_filter() {
let option = Filter::from_str(" ").unwrap();
assert_eq!(option, None);
}
}

View File

@ -681,7 +681,7 @@ mod tests {
builder.delete_external_id("1_4");
builder.execute().unwrap();
let filter = Filter::from_str("label = sign").unwrap();
let filter = Filter::from_str("label = sign").unwrap().unwrap();
let results = index.search(&wtxn).filter(filter).execute().unwrap();
assert!(results.documents_ids.is_empty());

View File

@ -1060,7 +1060,7 @@ mod tests {
wtxn.commit().unwrap();
let rtxn = index.read_txn().unwrap();
let filter = Filter::from_str("toto = 32").unwrap();
let filter = Filter::from_str("toto = 32").unwrap().unwrap();
let _ = filter.evaluate(&rtxn, &index).unwrap_err();
}