From ca97cb0eda3fb18f20928e6526389d2a09be07ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Mon, 30 May 2022 13:58:11 +0200 Subject: [PATCH] Implement the IN filter operator --- filter-parser/src/lib.rs | 80 ++++++++++++++++++++++++++++++-- milli/src/search/facet/filter.rs | 26 +++++++++++ 2 files changed, 103 insertions(+), 3 deletions(-) diff --git a/filter-parser/src/lib.rs b/filter-parser/src/lib.rs index c5eeb84a9..bfb02d63c 100644 --- a/filter-parser/src/lib.rs +++ b/filter-parser/src/lib.rs @@ -6,12 +6,14 @@ //! or = and ("OR" WS+ and)* //! and = not ("AND" WS+ not)* //! not = ("NOT" WS+ not) | primary -//! primary = (WS* "(" WS* expression WS* ")" WS*) | geoRadius | condition | exists | not_exists | to +//! primary = (WS* "(" WS* expression WS* ")" WS*) | geoRadius | in | condition | exists | not_exists | to +//! in = value "IN" WS* "[" value_list "]" //! condition = value ("=" | "!=" | ">" | ">=" | "<" | "<=") value //! exists = value "EXISTS" //! not_exists = value "NOT" WS+ "EXISTS" //! to = value value "TO" WS+ value //! value = WS* ( word | singleQuoted | doubleQuoted) WS+ +//! value_list = (value ("," value)* ","?)? //! singleQuoted = "'" .* all but quotes "'" //! doubleQuoted = "\"" .* all but double quotes "\"" //! word = (alphanumeric | _ | - | .)+ @@ -51,7 +53,7 @@ pub use error::{Error, ErrorKind}; use nom::branch::alt; use nom::bytes::complete::tag; use nom::character::complete::{char, multispace0, multispace1}; -use nom::combinator::{cut, eof, map}; +use nom::combinator::{cut, eof, map, opt}; use nom::multi::{many0, separated_list1}; use nom::number::complete::recognize_float; use nom::sequence::{delimited, preceded, terminated, tuple}; @@ -114,6 +116,7 @@ impl<'a> From> for Token<'a> { pub enum FilterCondition<'a> { Not(Box), Condition { fid: Token<'a>, op: Condition<'a> }, + In { fid: Token<'a>, els: Vec> }, Or(Vec), And(Vec), GeoLowerThan { point: [Token<'a>; 2], radius: Token<'a> }, @@ -161,7 +164,36 @@ fn ws<'a, O>(inner: impl FnMut(Span<'a>) -> IResult) -> impl FnMut(Span<'a>) delimited(multispace0, inner, multispace0) } -/// or = and ("OR" WS+ and)* + +/// value_list = (value ("," value)* ","?)? +fn parse_value_list<'a>(input: Span<'a>) -> IResult>> { + let (input, first_value) = opt(parse_value)(input)?; + if let Some(first_value) = first_value { + let value_list_el_parser = preceded(ws(tag(",")), parse_value); + + let (input, mut values) = many0(value_list_el_parser)(input)?; + let (input, _) = opt(ws(tag(",")))(input)?; + values.insert(0, first_value); + + Ok((input, values)) + } else { + Ok((input, vec![])) + } +} + +/// in = value "IN" "[" value_list "]" +fn parse_in(input: Span) -> IResult { + let (input, value) = parse_value(input)?; + let (input, _) = ws(tag("IN"))(input)?; + + let mut els_parser = delimited(tag("["), parse_value_list, tag("]")); + + let (input, content) = els_parser(input)?; + let filter = FilterCondition::In { fid: value, els: content }; + Ok((input, filter)) +} + +/// or = and ("OR" and) fn parse_or(input: Span) -> IResult { let (input, first_filter) = parse_and(input)?; // if we found a `OR` then we MUST find something next @@ -257,6 +289,7 @@ fn parse_primary(input: Span) -> IResult { }), ), parse_geo_radius, + parse_in, parse_condition, parse_exists, parse_not_exists, @@ -297,6 +330,47 @@ pub mod tests { let test_case = [ // simple test + ( + "colour IN[]", + Fc::In { + fid: rtok("", "colour"), + els: vec![] + } + ), + ( + "colour IN[green]", + Fc::In { + fid: rtok("", "colour"), + els: vec![rtok("colour IN[", "green")] + } + ), + ( + "colour IN[green,]", + Fc::In { + fid: rtok("", "colour"), + els: vec![rtok("colour IN[", "green")] + } + ), + ( + "colour IN[green,blue]", + Fc::In { + fid: rtok("", "colour"), + els: vec![ + rtok("colour IN[", "green"), + rtok("colour IN[green, ", "blue"), + ] + } + ), + ( + " colour IN [ green , blue , ]", + Fc::In { + fid: rtok(" ", "colour"), + els: vec![ + rtok("colour IN [ ", "green"), + rtok("colour IN [ green , ", "blue"), + ] + } + ), ( "channel = Ponce", Fc::Condition { diff --git a/milli/src/search/facet/filter.rs b/milli/src/search/facet/filter.rs index ac3215dea..25ffe1842 100644 --- a/milli/src/search/facet/filter.rs +++ b/milli/src/search/facet/filter.rs @@ -362,6 +362,32 @@ impl<'a> Filter<'a> { )?; return Ok(all_ids - selected); } + FilterCondition::In { fid, els } => { + // TODO: this could be optimised + let filterable_fields = index.filterable_fields(rtxn)?; + + if crate::is_faceted(fid.value(), &filterable_fields) { + let field_ids_map = index.fields_ids_map(rtxn)?; + + if let Some(fid) = field_ids_map.id(fid.value()) { + let mut bitmap = RoaringBitmap::new(); + + for el in els { + let op = Condition::Equal(el.clone()); + let el_bitmap = Self::evaluate_operator(rtxn, index, fid, &op)?; + bitmap |= el_bitmap; + } + Ok(bitmap) + } else { + Ok(RoaringBitmap::new()) + } + } else { + return Err(fid.as_external_error(FilterError::AttributeNotFilterable { + attribute: fid.value(), + filterable_fields, + }))?; + } + } FilterCondition::Condition { fid, op } => { if crate::is_faceted(fid.value(), filterable_fields) { let field_ids_map = index.fields_ids_map(rtxn)?;