422: Prefer returning `None` instead of using an `FilterCondition::Empty` state r=Kerollmops a=Kerollmops

This PR is related to the issue comment https://github.com/meilisearch/MeiliSearch/issues/1338#issuecomment-989322889 which exhibits the fact that when a filter is known to be empty no results are returned which is wrong, the filter should not apply as no restriction is done on the documents set.

The filter system on the milli side has introduced an Empty state which was used in this kind of situation but I found out that it is not needed and that when we parse a filter and that it is empty we can simply return `None` as the `Filter::from_array` constructor does. So I removed it and added tests!

On the MeiliSearch side, we just need to match on a `None` and completely ignore the filter in such a case.

Co-authored-by: Clément Renault <clement@meilisearch.com>
This commit is contained in:
bors[bot] 2021-12-09 15:03:04 +00:00 committed by GitHub
commit 11a056d116
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 58 additions and 60 deletions

View File

@ -117,7 +117,7 @@ pub fn run_benches(c: &mut criterion::Criterion, confs: &[Conf]) {
let mut search = index.search(&rtxn); let mut search = index.search(&rtxn);
search.query(query).optional_words(conf.optional_words); search.query(query).optional_words(conf.optional_words);
if let Some(filter) = conf.filter { if let Some(filter) = conf.filter {
let filter = Filter::from_str(filter).unwrap(); let filter = Filter::from_str(filter).unwrap().unwrap();
search.filter(filter); search.filter(filter);
} }
if let Some(sort) = &conf.sort { if let Some(sort) = &conf.sort {

View File

@ -250,9 +250,10 @@ impl Search {
} }
if let Some(ref filter) = self.filter { if let Some(ref filter) = self.filter {
let condition = milli::Filter::from_str(filter)?; if let Some(condition) = milli::Filter::from_str(filter)? {
search.filter(condition); search.filter(condition);
} }
}
if let Some(offset) = self.offset { if let Some(offset) = self.offset {
search.offset(offset); search.offset(offset);

View File

@ -109,7 +109,6 @@ pub enum FilterCondition<'a> {
And(Box<Self>, Box<Self>), And(Box<Self>, Box<Self>),
GeoLowerThan { point: [Token<'a>; 2], radius: Token<'a> }, GeoLowerThan { point: [Token<'a>; 2], radius: Token<'a> },
GeoGreaterThan { point: [Token<'a>; 2], radius: Token<'a> }, GeoGreaterThan { point: [Token<'a>; 2], radius: Token<'a> },
Empty,
} }
impl<'a> FilterCondition<'a> { impl<'a> FilterCondition<'a> {
@ -144,18 +143,17 @@ impl<'a> FilterCondition<'a> {
}, },
Or(a, b) => And(a.negate().into(), b.negate().into()), Or(a, b) => And(a.negate().into(), b.negate().into()),
And(a, b) => Or(a.negate().into(), b.negate().into()), And(a, b) => Or(a.negate().into(), b.negate().into()),
Empty => Empty,
GeoLowerThan { point, radius } => GeoGreaterThan { point, radius }, GeoLowerThan { point, radius } => GeoGreaterThan { point, radius },
GeoGreaterThan { point, radius } => GeoLowerThan { point, radius }, GeoGreaterThan { point, radius } => GeoLowerThan { point, radius },
} }
} }
pub fn parse(input: &'a str) -> Result<Self, Error> { pub fn parse(input: &'a str) -> Result<Option<Self>, Error> {
if input.trim().is_empty() { if input.trim().is_empty() {
return Ok(Self::Empty); return Ok(None);
} }
let span = Span::new_extra(input, input); let span = Span::new_extra(input, input);
parse_filter(span).finish().map(|(_rem, output)| output) parse_filter(span).finish().map(|(_rem, output)| Some(output))
} }
} }
@ -560,7 +558,7 @@ pub mod tests {
result.unwrap_err() result.unwrap_err()
); );
let filter = result.unwrap(); let filter = result.unwrap();
assert_eq!(filter, expected, "Filter `{}` failed.", input); assert_eq!(filter, Some(expected), "Filter `{}` failed.", input);
} }
} }
@ -605,7 +603,7 @@ pub mod tests {
#[test] #[test]
fn depth() { fn depth() {
let filter = FilterCondition::parse("account_ids=1 OR account_ids=2 OR account_ids=3 OR account_ids=4 OR account_ids=5 OR account_ids=6").unwrap(); let filter = FilterCondition::parse("account_ids=1 OR account_ids=2 OR account_ids=3 OR account_ids=4 OR account_ids=5 OR account_ids=6").unwrap().unwrap();
assert!(filter.token_at_depth(5).is_some()); assert!(filter.token_at_depth(5).is_some());
} }
} }

View File

@ -738,7 +738,7 @@ async fn main() -> anyhow::Result<()> {
let filters = match query.filters.as_ref() { let filters = match query.filters.as_ref() {
Some(condition) if !condition.trim().is_empty() => { Some(condition) if !condition.trim().is_empty() => {
Some(MilliFilter::from_str(condition).unwrap()) MilliFilter::from_str(condition).unwrap()
} }
_otherwise => None, _otherwise => None,
}; };

View File

@ -86,7 +86,8 @@ impl<'a> Filter<'a> {
Either::Left(array) => { Either::Left(array) => {
let mut ors = None; let mut ors = None;
for rule in array { for rule in array {
let condition = Self::from_str(rule.as_ref())?.condition; if let Some(filter) = Self::from_str(rule.as_ref())? {
let condition = filter.condition;
ors = match ors.take() { ors = match ors.take() {
Some(ors) => { Some(ors) => {
Some(FilterCondition::Or(Box::new(ors), Box::new(condition))) Some(FilterCondition::Or(Box::new(ors), Box::new(condition)))
@ -94,6 +95,7 @@ impl<'a> Filter<'a> {
None => Some(condition), None => Some(condition),
}; };
} }
}
if let Some(rule) = ors { if let Some(rule) = ors {
ands = match ands.take() { ands = match ands.take() {
@ -105,7 +107,8 @@ impl<'a> Filter<'a> {
} }
} }
Either::Right(rule) => { Either::Right(rule) => {
let condition = Self::from_str(rule.as_ref())?.condition; if let Some(filter) = Self::from_str(rule.as_ref())? {
let condition = filter.condition;
ands = match ands.take() { ands = match ands.take() {
Some(ands) => { Some(ands) => {
Some(FilterCondition::And(Box::new(ands), Box::new(condition))) Some(FilterCondition::And(Box::new(ands), Box::new(condition)))
@ -115,6 +118,7 @@ impl<'a> Filter<'a> {
} }
} }
} }
}
if let Some(token) = ands.as_ref().and_then(|fc| fc.token_at_depth(MAX_FILTER_DEPTH)) { if let Some(token) = ands.as_ref().and_then(|fc| fc.token_at_depth(MAX_FILTER_DEPTH)) {
return Err(token.as_external_error(FilterError::TooDeep).into()); return Err(token.as_external_error(FilterError::TooDeep).into());
@ -123,9 +127,10 @@ impl<'a> Filter<'a> {
Ok(ands.map(|ands| Self { condition: ands })) Ok(ands.map(|ands| Self { condition: ands }))
} }
pub fn from_str(expression: &'a str) -> Result<Self> { pub fn from_str(expression: &'a str) -> Result<Option<Self>> {
let condition = match FilterCondition::parse(expression) { let condition = match FilterCondition::parse(expression) {
Ok(fc) => Ok(fc), Ok(Some(fc)) => Ok(fc),
Ok(None) => return Ok(None),
Err(e) => Err(Error::UserError(UserError::InvalidFilter(e.to_string()))), Err(e) => Err(Error::UserError(UserError::InvalidFilter(e.to_string()))),
}?; }?;
@ -133,7 +138,7 @@ impl<'a> Filter<'a> {
return Err(token.as_external_error(FilterError::TooDeep).into()); return Err(token.as_external_error(FilterError::TooDeep).into());
} }
Ok(Self { condition }) Ok(Some(Self { condition }))
} }
} }
@ -377,7 +382,6 @@ impl<'a> Filter<'a> {
let rhs = Self::evaluate(&(rhs.as_ref().clone()).into(), rtxn, index)?; let rhs = Self::evaluate(&(rhs.as_ref().clone()).into(), rtxn, index)?;
Ok(lhs & rhs) Ok(lhs & rhs)
} }
FilterCondition::Empty => Ok(RoaringBitmap::new()),
FilterCondition::GeoLowerThan { point, radius } => { FilterCondition::GeoLowerThan { point, radius } => {
let filterable_fields = index.filterable_fields(rtxn)?; let filterable_fields = index.filterable_fields(rtxn)?;
if filterable_fields.contains("_geo") { if filterable_fields.contains("_geo") {
@ -451,20 +455,20 @@ mod tests {
fn from_array() { fn from_array() {
// Simple array with Left // Simple array with Left
let condition = Filter::from_array(vec![Either::Left(["channel = mv"])]).unwrap().unwrap(); let condition = Filter::from_array(vec![Either::Left(["channel = mv"])]).unwrap().unwrap();
let expected = Filter::from_str("channel = mv").unwrap(); let expected = Filter::from_str("channel = mv").unwrap().unwrap();
assert_eq!(condition, expected); assert_eq!(condition, expected);
// Simple array with Right // Simple array with Right
let condition = Filter::from_array::<_, Option<&str>>(vec![Either::Right("channel = mv")]) let condition = Filter::from_array::<_, Option<&str>>(vec![Either::Right("channel = mv")])
.unwrap() .unwrap()
.unwrap(); .unwrap();
let expected = Filter::from_str("channel = mv").unwrap(); let expected = Filter::from_str("channel = mv").unwrap().unwrap();
assert_eq!(condition, expected); assert_eq!(condition, expected);
// Array with Left and escaped quote // Array with Left and escaped quote
let condition = let condition =
Filter::from_array(vec![Either::Left(["channel = \"Mister Mv\""])]).unwrap().unwrap(); Filter::from_array(vec![Either::Left(["channel = \"Mister Mv\""])]).unwrap().unwrap();
let expected = Filter::from_str("channel = \"Mister Mv\"").unwrap(); let expected = Filter::from_str("channel = \"Mister Mv\"").unwrap().unwrap();
assert_eq!(condition, expected); assert_eq!(condition, expected);
// Array with Right and escaped quote // Array with Right and escaped quote
@ -472,13 +476,13 @@ mod tests {
Filter::from_array::<_, Option<&str>>(vec![Either::Right("channel = \"Mister Mv\"")]) Filter::from_array::<_, Option<&str>>(vec![Either::Right("channel = \"Mister Mv\"")])
.unwrap() .unwrap()
.unwrap(); .unwrap();
let expected = Filter::from_str("channel = \"Mister Mv\"").unwrap(); let expected = Filter::from_str("channel = \"Mister Mv\"").unwrap().unwrap();
assert_eq!(condition, expected); assert_eq!(condition, expected);
// Array with Left and escaped simple quote // Array with Left and escaped simple quote
let condition = let condition =
Filter::from_array(vec![Either::Left(["channel = 'Mister Mv'"])]).unwrap().unwrap(); Filter::from_array(vec![Either::Left(["channel = 'Mister Mv'"])]).unwrap().unwrap();
let expected = Filter::from_str("channel = 'Mister Mv'").unwrap(); let expected = Filter::from_str("channel = 'Mister Mv'").unwrap().unwrap();
assert_eq!(condition, expected); assert_eq!(condition, expected);
// Array with Right and escaped simple quote // Array with Right and escaped simple quote
@ -486,13 +490,13 @@ mod tests {
Filter::from_array::<_, Option<&str>>(vec![Either::Right("channel = 'Mister Mv'")]) Filter::from_array::<_, Option<&str>>(vec![Either::Right("channel = 'Mister Mv'")])
.unwrap() .unwrap()
.unwrap(); .unwrap();
let expected = Filter::from_str("channel = 'Mister Mv'").unwrap(); let expected = Filter::from_str("channel = 'Mister Mv'").unwrap().unwrap();
assert_eq!(condition, expected); assert_eq!(condition, expected);
// Simple with parenthesis // Simple with parenthesis
let condition = let condition =
Filter::from_array(vec![Either::Left(["(channel = mv)"])]).unwrap().unwrap(); Filter::from_array(vec![Either::Left(["(channel = mv)"])]).unwrap().unwrap();
let expected = Filter::from_str("(channel = mv)").unwrap(); let expected = Filter::from_str("(channel = mv)").unwrap().unwrap();
assert_eq!(condition, expected); assert_eq!(condition, expected);
// Test that the facet condition is correctly generated. // Test that the facet condition is correctly generated.
@ -503,7 +507,9 @@ mod tests {
.unwrap() .unwrap()
.unwrap(); .unwrap();
let expected = let expected =
Filter::from_str("channel = gotaga AND (timestamp = 44 OR channel != ponce)").unwrap(); Filter::from_str("channel = gotaga AND (timestamp = 44 OR channel != ponce)")
.unwrap()
.unwrap();
println!("\nExpecting: {:#?}\nGot: {:#?}\n", expected, condition); println!("\nExpecting: {:#?}\nGot: {:#?}\n", expected, condition);
assert_eq!(condition, expected); assert_eq!(condition, expected);
} }
@ -516,13 +522,13 @@ mod tests {
let index = Index::new(options, &path).unwrap(); let index = Index::new(options, &path).unwrap();
let rtxn = index.read_txn().unwrap(); let rtxn = index.read_txn().unwrap();
let filter = Filter::from_str("_geoRadius(42, 150, 10)").unwrap(); let filter = Filter::from_str("_geoRadius(42, 150, 10)").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err(); let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().starts_with( assert!(error.to_string().starts_with(
"Attribute `_geo` is not filterable. Available filterable attributes are: ``." "Attribute `_geo` is not filterable. Available filterable attributes are: ``."
)); ));
let filter = Filter::from_str("dog = \"bernese mountain\"").unwrap(); let filter = Filter::from_str("dog = \"bernese mountain\"").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err(); let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().starts_with( assert!(error.to_string().starts_with(
"Attribute `dog` is not filterable. Available filterable attributes are: ``." "Attribute `dog` is not filterable. Available filterable attributes are: ``."
@ -539,13 +545,13 @@ mod tests {
let rtxn = index.read_txn().unwrap(); let rtxn = index.read_txn().unwrap();
let filter = Filter::from_str("_geoRadius(-100, 150, 10)").unwrap(); let filter = Filter::from_str("_geoRadius(-100, 150, 10)").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err(); let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().starts_with( assert!(error.to_string().starts_with(
"Attribute `_geo` is not filterable. Available filterable attributes are: `title`." "Attribute `_geo` is not filterable. Available filterable attributes are: `title`."
)); ));
let filter = Filter::from_str("name = 12").unwrap(); let filter = Filter::from_str("name = 12").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err(); let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().starts_with( assert!(error.to_string().starts_with(
"Attribute `name` is not filterable. Available filterable attributes are: `title`." "Attribute `name` is not filterable. Available filterable attributes are: `title`."
@ -570,7 +576,7 @@ mod tests {
let rtxn = index.read_txn().unwrap(); let rtxn = index.read_txn().unwrap();
// georadius have a bad latitude // georadius have a bad latitude
let filter = Filter::from_str("_geoRadius(-100, 150, 10)").unwrap(); let filter = Filter::from_str("_geoRadius(-100, 150, 10)").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err(); let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!( assert!(
error.to_string().starts_with( error.to_string().starts_with(
@ -581,14 +587,14 @@ mod tests {
); );
// georadius have a bad latitude // georadius have a bad latitude
let filter = Filter::from_str("_geoRadius(-90.0000001, 150, 10)").unwrap(); let filter = Filter::from_str("_geoRadius(-90.0000001, 150, 10)").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err(); let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().contains( assert!(error.to_string().contains(
"Bad latitude `-90.0000001`. Latitude must be contained between -90 and 90 degrees." "Bad latitude `-90.0000001`. Latitude must be contained between -90 and 90 degrees."
)); ));
// georadius have a bad longitude // georadius have a bad longitude
let filter = Filter::from_str("_geoRadius(-10, 250, 10)").unwrap(); let filter = Filter::from_str("_geoRadius(-10, 250, 10)").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err(); let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!( assert!(
error.to_string().contains( error.to_string().contains(
@ -599,7 +605,7 @@ mod tests {
); );
// georadius have a bad longitude // georadius have a bad longitude
let filter = Filter::from_str("_geoRadius(-10, 180.000001, 10)").unwrap(); let filter = Filter::from_str("_geoRadius(-10, 180.000001, 10)").unwrap().unwrap();
let error = filter.evaluate(&rtxn, &index).unwrap_err(); let error = filter.evaluate(&rtxn, &index).unwrap_err();
assert!(error.to_string().contains( assert!(error.to_string().contains(
"Bad longitude `180.000001`. Longitude must be contained between -180 and 180 degrees." "Bad longitude `180.000001`. Longitude must be contained between -180 and 180 degrees."
@ -608,19 +614,6 @@ mod tests {
#[test] #[test]
fn filter_depth() { fn filter_depth() {
let path = tempfile::tempdir().unwrap();
let mut options = EnvOpenOptions::new();
options.map_size(10 * 1024 * 1024); // 10 MB
let index = Index::new(options, &path).unwrap();
// Set the filterable fields to be the channel.
let mut wtxn = index.write_txn().unwrap();
let mut builder = Settings::new(&mut wtxn, &index);
builder.set_searchable_fields(vec![S("account_ids")]);
builder.set_filterable_fields(hashset! { S("account_ids") });
builder.execute(|_| ()).unwrap();
wtxn.commit().unwrap();
// generates a big (2 MiB) filter with too much of ORs. // generates a big (2 MiB) filter with too much of ORs.
let tipic_filter = "account_ids=14361 OR "; let tipic_filter = "account_ids=14361 OR ";
let mut filter_string = String::with_capacity(tipic_filter.len() * 14360); let mut filter_string = String::with_capacity(tipic_filter.len() * 14360);
@ -638,4 +631,10 @@ mod tests {
error.to_string() error.to_string()
); );
} }
#[test]
fn empty_filter() {
let option = Filter::from_str(" ").unwrap();
assert_eq!(option, None);
}
} }

View File

@ -681,7 +681,7 @@ mod tests {
builder.delete_external_id("1_4"); builder.delete_external_id("1_4");
builder.execute().unwrap(); builder.execute().unwrap();
let filter = Filter::from_str("label = sign").unwrap(); let filter = Filter::from_str("label = sign").unwrap().unwrap();
let results = index.search(&wtxn).filter(filter).execute().unwrap(); let results = index.search(&wtxn).filter(filter).execute().unwrap();
assert!(results.documents_ids.is_empty()); assert!(results.documents_ids.is_empty());

View File

@ -1060,7 +1060,7 @@ mod tests {
wtxn.commit().unwrap(); wtxn.commit().unwrap();
let rtxn = index.read_txn().unwrap(); let rtxn = index.read_txn().unwrap();
let filter = Filter::from_str("toto = 32").unwrap(); let filter = Filter::from_str("toto = 32").unwrap().unwrap();
let _ = filter.evaluate(&rtxn, &index).unwrap_err(); let _ = filter.evaluate(&rtxn, &index).unwrap_err();
} }