From dc2b63abdf00b2d52c797028fe0b08f24d235f44 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Tue, 27 Jul 2021 16:24:21 +0200 Subject: [PATCH] Introduce an empty FilterCondition variant to support unknown fields --- milli/src/search/facet/filter_condition.rs | 111 ++++++++++++--------- milli/src/update/index_documents/mod.rs | 7 +- milli/src/update/settings.rs | 5 +- 3 files changed, 71 insertions(+), 52 deletions(-) diff --git a/milli/src/search/facet/filter_condition.rs b/milli/src/search/facet/filter_condition.rs index cc108f855..5ca9f7e5a 100644 --- a/milli/src/search/facet/filter_condition.rs +++ b/milli/src/search/facet/filter_condition.rs @@ -6,6 +6,7 @@ use std::str::FromStr; use either::Either; use heed::types::DecodeIgnore; +use itertools::Itertools; use log::debug; use pest::error::{Error as PestError, ErrorVariant}; use pest::iterators::{Pair, Pairs}; @@ -54,6 +55,7 @@ pub enum FilterCondition { Operator(FieldId, Operator), Or(Box, Box), And(Box, Box), + Empty, } impl FilterCondition { @@ -108,7 +110,7 @@ impl FilterCondition { expression: &str, ) -> Result { let fields_ids_map = index.fields_ids_map(rtxn)?; - let filterable_fields = index.filterable_fields_ids(rtxn)?; + let filterable_fields = index.filterable_fields(rtxn)?; let lexed = FilterParser::parse(Rule::prgm, expression).map_err(UserError::InvalidFilter)?; FilterCondition::from_pairs(&fields_ids_map, &filterable_fields, lexed) @@ -116,7 +118,7 @@ impl FilterCondition { fn from_pairs( fim: &FieldsIdsMap, - ff: &HashSet, + ff: &HashSet, expression: Pairs, ) -> Result { PREC_CLIMBER.climb( @@ -150,17 +152,22 @@ impl FilterCondition { }, Or(a, b) => And(Box::new(a.negate()), Box::new(b.negate())), And(a, b) => Or(Box::new(a.negate()), Box::new(b.negate())), + Empty => Empty, } } fn between( fields_ids_map: &FieldsIdsMap, - filterable_fields: &HashSet, + filterable_fields: &HashSet, item: Pair, ) -> Result { let mut items = item.into_inner(); - let fid = field_id(fields_ids_map, filterable_fields, &mut items) - .map_err(UserError::InvalidFilterAttribute)?; + let fid = match field_id(fields_ids_map, filterable_fields, &mut items) + .map_err(UserError::InvalidFilterAttribute)? + { + Some(fid) => fid, + None => return Ok(Empty), + }; let (lresult, _) = pest_parse(items.next().unwrap()); let (rresult, _) = pest_parse(items.next().unwrap()); @@ -173,12 +180,16 @@ impl FilterCondition { fn equal( fields_ids_map: &FieldsIdsMap, - filterable_fields: &HashSet, + filterable_fields: &HashSet, item: Pair, ) -> Result { let mut items = item.into_inner(); - let fid = field_id(fields_ids_map, filterable_fields, &mut items) - .map_err(UserError::InvalidFilterAttribute)?; + let fid = match field_id(fields_ids_map, filterable_fields, &mut items) + .map_err(UserError::InvalidFilterAttribute)? + { + Some(fid) => fid, + None => return Ok(Empty), + }; let value = items.next().unwrap(); let (result, svalue) = pest_parse(value); @@ -189,12 +200,16 @@ impl FilterCondition { fn greater_than( fields_ids_map: &FieldsIdsMap, - filterable_fields: &HashSet, + filterable_fields: &HashSet, item: Pair, ) -> Result { let mut items = item.into_inner(); - let fid = field_id(fields_ids_map, filterable_fields, &mut items) - .map_err(UserError::InvalidFilterAttribute)?; + let fid = match field_id(fields_ids_map, filterable_fields, &mut items) + .map_err(UserError::InvalidFilterAttribute)? + { + Some(fid) => fid, + None => return Ok(Empty), + }; let value = items.next().unwrap(); let (result, _svalue) = pest_parse(value); @@ -205,12 +220,16 @@ impl FilterCondition { fn greater_than_or_equal( fields_ids_map: &FieldsIdsMap, - filterable_fields: &HashSet, + filterable_fields: &HashSet, item: Pair, ) -> Result { let mut items = item.into_inner(); - let fid = field_id(fields_ids_map, filterable_fields, &mut items) - .map_err(UserError::InvalidFilterAttribute)?; + let fid = match field_id(fields_ids_map, filterable_fields, &mut items) + .map_err(UserError::InvalidFilterAttribute)? + { + Some(fid) => fid, + None => return Ok(Empty), + }; let value = items.next().unwrap(); let (result, _svalue) = pest_parse(value); @@ -221,12 +240,16 @@ impl FilterCondition { fn lower_than( fields_ids_map: &FieldsIdsMap, - filterable_fields: &HashSet, + filterable_fields: &HashSet, item: Pair, ) -> Result { let mut items = item.into_inner(); - let fid = field_id(fields_ids_map, filterable_fields, &mut items) - .map_err(UserError::InvalidFilterAttribute)?; + let fid = match field_id(fields_ids_map, filterable_fields, &mut items) + .map_err(UserError::InvalidFilterAttribute)? + { + Some(fid) => fid, + None => return Ok(Empty), + }; let value = items.next().unwrap(); let (result, _svalue) = pest_parse(value); @@ -237,12 +260,16 @@ impl FilterCondition { fn lower_than_or_equal( fields_ids_map: &FieldsIdsMap, - filterable_fields: &HashSet, + filterable_fields: &HashSet, item: Pair, ) -> Result { let mut items = item.into_inner(); - let fid = field_id(fields_ids_map, filterable_fields, &mut items) - .map_err(UserError::InvalidFilterAttribute)?; + let fid = match field_id(fields_ids_map, filterable_fields, &mut items) + .map_err(UserError::InvalidFilterAttribute)? + { + Some(fid) => fid, + None => return Ok(Empty), + }; let value = items.next().unwrap(); let (result, _svalue) = pest_parse(value); @@ -461,57 +488,41 @@ impl FilterCondition { let rhs = rhs.evaluate(rtxn, index)?; Ok(lhs & rhs) } + Empty => Ok(RoaringBitmap::new()), } } } -/// Retrieve the field id base on the pest value, returns an error is -/// the field does not exist or is not filterable. +/// Retrieve the field id base on the pest value. +/// +/// Returns an error if the given value is not filterable. +/// +/// Returns Ok(None) if the given value is filterable, but is not yet ascociated to a field_id. /// /// The pest pair is simply a string associated with a span, a location to highlight in /// the error message. fn field_id( fields_ids_map: &FieldsIdsMap, - filterable_fields: &HashSet, + filterable_fields: &HashSet, items: &mut Pairs, -) -> StdResult> { +) -> StdResult, PestError> { // lexing ensures that we at least have a key let key = items.next().unwrap(); - let field_id = match fields_ids_map.id(key.as_str()) { - Some(field_id) => field_id, - None => { - return Err(PestError::new_from_span( - ErrorVariant::CustomError { - message: format!( - "attribute `{}` not found, available attributes are: {}", - key.as_str(), - fields_ids_map.iter().map(|(_, n)| n).collect::>().join(", "), - ), - }, - key.as_span(), - )) - } - }; - - if !filterable_fields.contains(&field_id) { + if !filterable_fields.contains(key.as_str()) { return Err(PestError::new_from_span( ErrorVariant::CustomError { message: format!( "attribute `{}` is not filterable, available filterable attributes are: {}", key.as_str(), - filterable_fields - .iter() - .flat_map(|id| { fields_ids_map.name(*id) }) - .collect::>() - .join(", "), + filterable_fields.iter().join(", "), ), }, key.as_span(), )); } - Ok(field_id) + Ok(fields_ids_map.id(key.as_str())) } /// Tries to parse the pest pair into the type `T` specified, always returns @@ -552,6 +563,9 @@ mod tests { // Set the filterable fields to be the channel. let mut wtxn = index.write_txn().unwrap(); + let mut map = index.fields_ids_map(&wtxn).unwrap(); + map.insert("channel"); + index.put_fields_ids_map(&mut wtxn, &map).unwrap(); let mut builder = Settings::new(&mut wtxn, &index, 0); builder.set_filterable_fields(hashset! { S("channel") }); builder.execute(|_, _| ()).unwrap(); @@ -581,6 +595,9 @@ mod tests { // Set the filterable fields to be the channel. let mut wtxn = index.write_txn().unwrap(); + let mut map = index.fields_ids_map(&wtxn).unwrap(); + map.insert("timestamp"); + index.put_fields_ids_map(&mut wtxn, &map).unwrap(); let mut builder = Settings::new(&mut wtxn, &index, 0); builder.set_filterable_fields(hashset! { "timestamp".into() }); builder.execute(|_, _| ()).unwrap(); diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index efe16def7..aae1e4eb4 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -391,6 +391,10 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { documents_file, } = output; + // The fields_ids_map is put back to the store now so the rest of the transaction sees an + // up to date field map. + self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?; + // We delete the documents that this document addition replaces. This way we are // able to simply insert all the documents even if they already exist in the database. if !replaced_documents_ids.is_empty() { @@ -596,9 +600,6 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { debug!("Writing using the write method: {:?}", write_method); - // We write the fields ids map into the main database - self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?; - // We write the field distribution into the main database self.index.put_field_distribution(self.wtxn, &field_distribution)?; diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index 743483613..07bdfd6fa 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -674,7 +674,8 @@ mod tests { let count = index .facet_id_f64_docids .remap_key_type::() - .prefix_iter(&rtxn, &[0, 0]) + // The faceted field id is 2u16 + .prefix_iter(&rtxn, &[0, 2, 0]) .unwrap() .count(); assert_eq!(count, 3); @@ -700,7 +701,7 @@ mod tests { let count = index .facet_id_f64_docids .remap_key_type::() - .prefix_iter(&rtxn, &[0, 0]) + .prefix_iter(&rtxn, &[0, 2, 0]) .unwrap() .count(); assert_eq!(count, 4);