From aa02a7fdd8ca73f2474dcfd2b48fbc4e1447b95e Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Thu, 22 Jul 2021 17:04:38 +0200 Subject: [PATCH 1/4] Add a test to check that we indeed impact the relevancy --- milli/src/update/settings.rs | 49 ++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index e4adbccb9..a78cc9da8 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -1066,4 +1066,53 @@ mod tests { builder.execute(|_, _| ()).unwrap(); wtxn.commit().unwrap(); } + + #[test] + fn setting_impact_relevancy() { + let path = tempfile::tempdir().unwrap(); + let mut options = EnvOpenOptions::new(); + options.map_size(10 * 1024 * 1024); // 10 MB + let index = Index::new(options, &path).unwrap(); + + // Set the genres setting + let mut wtxn = index.write_txn().unwrap(); + let mut builder = Settings::new(&mut wtxn, &index, 0); + builder.set_filterable_fields(hashset! { S("genres") }); + builder.execute(|_, _| ()).unwrap(); + + let content = &br#"[ + { + "id": 11, + "title": "Star Wars", + "overview": + "Princess Leia is captured and held hostage by the evil Imperial forces in their effort to take over the galactic Empire. Venturesome Luke Skywalker and dashing captain Han Solo team together with the loveable robot duo R2-D2 and C-3PO to rescue the beautiful princess and restore peace and justice in the Empire.", + "genres": ["Adventure", "Action", "Science Fiction"], + "poster": "https://image.tmdb.org/t/p/w500/6FfCtAuVAW8XJjZ7eWeLibRLWTw.jpg", + "release_date": 233366400 + }, + { + "id": 30, + "title": "Magnetic Rose", + "overview": "", + "genres": ["Animation", "Science Fiction"], + "poster": "https://image.tmdb.org/t/p/w500/gSuHDeWemA1menrwfMRChnSmMVN.jpg", + "release_date": 819676800 + } + ]"#[..]; + let mut builder = IndexDocuments::new(&mut wtxn, &index, 1); + builder.update_format(UpdateFormat::Json); + builder.execute(content, |_, _| ()).unwrap(); + wtxn.commit().unwrap(); + + // We now try to reset the primary key + let rtxn = index.read_txn().unwrap(); + let SearchResult { documents_ids, .. } = index.search(&rtxn).query("S").execute().unwrap(); + let first_id = documents_ids[0]; + let documents = index.documents(&rtxn, documents_ids).unwrap(); + let (_, content) = documents.iter().find(|(id, _)| *id == first_id).unwrap(); + + let fid = index.fields_ids_map(&rtxn).unwrap().id("title").unwrap(); + let line = std::str::from_utf8(content.get(fid).unwrap()).unwrap(); + assert_eq!(line, r#""Star Wars""#); + } } From 7aa6cc9b04ab78ca5374faf2e1118d7fa6b269da Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Thu, 22 Jul 2021 17:11:17 +0200 Subject: [PATCH 2/4] Do not insert fields in the map when changing the settings --- milli/src/index.rs | 50 ++++---------------- milli/src/search/criteria/asc_desc.rs | 32 +++++++------ milli/src/search/facet/facet_distribution.rs | 13 ++--- milli/src/search/mod.rs | 15 +++--- milli/src/update/delete_documents.rs | 14 +++--- milli/src/update/settings.rs | 18 ------- 6 files changed, 46 insertions(+), 96 deletions(-) diff --git a/milli/src/index.rs b/milli/src/index.rs index f26643de7..63da6b1e8 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -8,7 +8,7 @@ use heed::types::*; use heed::{Database, PolyDatabase, RoTxn, RwTxn}; use roaring::RoaringBitmap; -use crate::error::{FieldIdMapMissingEntry, InternalError, UserError}; +use crate::error::{InternalError, UserError}; use crate::fields_ids_map::FieldsIdsMap; use crate::heed_codec::facet::{ FacetLevelValueF64Codec, FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec, @@ -353,15 +353,8 @@ impl Index { let fields_ids_map = self.fields_ids_map(rtxn)?; let mut fields_ids = Vec::new(); for name in fields.into_iter() { - match fields_ids_map.id(name) { - Some(field_id) => fields_ids.push(field_id), - None => { - return Err(FieldIdMapMissingEntry::FieldName { - field_name: name.to_string(), - process: "Index::displayed_fields_ids", - } - .into()) - } + if let Some(field_id) = fields_ids_map.id(name) { + fields_ids.push(field_id); } } Ok(Some(fields_ids)) @@ -403,15 +396,8 @@ impl Index { let fields_ids_map = self.fields_ids_map(rtxn)?; let mut fields_ids = Vec::new(); for name in fields { - match fields_ids_map.id(name) { - Some(field_id) => fields_ids.push(field_id), - None => { - return Err(FieldIdMapMissingEntry::FieldName { - field_name: name.to_string(), - process: "Index::searchable_fields_ids", - } - .into()) - } + if let Some(field_id) = fields_ids_map.id(name) { + fields_ids.push(field_id); } } Ok(Some(fields_ids)) @@ -451,17 +437,8 @@ impl Index { let mut fields_ids = HashSet::new(); for name in fields { - match fields_ids_map.id(&name) { - Some(field_id) => { - fields_ids.insert(field_id); - } - None => { - return Err(FieldIdMapMissingEntry::FieldName { - field_name: name, - process: "Index::filterable_fields_ids", - } - .into()) - } + if let Some(field_id) = fields_ids_map.id(&name) { + fields_ids.insert(field_id); } } @@ -498,17 +475,8 @@ impl Index { let mut fields_ids = HashSet::new(); for name in fields.into_iter() { - match fields_ids_map.id(&name) { - Some(field_id) => { - fields_ids.insert(field_id); - } - None => { - return Err(FieldIdMapMissingEntry::FieldName { - field_name: name, - process: "Index::faceted_fields_ids", - } - .into()) - } + if let Some(field_id) = fields_ids_map.id(&name) { + fields_ids.insert(field_id); } } diff --git a/milli/src/search/criteria/asc_desc.rs b/milli/src/search/criteria/asc_desc.rs index 99d63c90d..4a664d042 100644 --- a/milli/src/search/criteria/asc_desc.rs +++ b/milli/src/search/criteria/asc_desc.rs @@ -6,7 +6,6 @@ use ordered_float::OrderedFloat; use roaring::RoaringBitmap; use super::{Criterion, CriterionParameters, CriterionResult}; -use crate::error::FieldIdMapMissingEntry; use crate::search::criteria::{resolve_query_tree, CriteriaBuilder}; use crate::search::facet::FacetNumberIter; use crate::search::query_tree::Operation; @@ -20,7 +19,7 @@ pub struct AscDesc<'t> { index: &'t Index, rtxn: &'t heed::RoTxn<'t>, field_name: String, - field_id: FieldId, + field_id: Option, ascending: bool, query_tree: Option, candidates: Box> + 't>, @@ -57,11 +56,11 @@ impl<'t> AscDesc<'t> { ascending: bool, ) -> Result { let fields_ids_map = index.fields_ids_map(rtxn)?; - let field_id = - fields_ids_map.id(&field_name).ok_or_else(|| FieldIdMapMissingEntry::FieldName { - field_name: field_name.clone(), - process: "AscDesc::new", - })?; + let field_id = fields_ids_map.id(&field_name); + let faceted_candidates = match field_id { + Some(field_id) => index.number_faceted_documents_ids(rtxn, field_id)?, + None => RoaringBitmap::default(), + }; Ok(AscDesc { index, @@ -72,7 +71,7 @@ impl<'t> AscDesc<'t> { query_tree: None, candidates: Box::new(std::iter::empty()), allowed_candidates: RoaringBitmap::new(), - faceted_candidates: index.number_faceted_documents_ids(rtxn, field_id)?, + faceted_candidates, bucket_candidates: RoaringBitmap::new(), parent, }) @@ -132,13 +131,16 @@ impl<'t> Criterion for AscDesc<'t> { } self.allowed_candidates = &candidates - params.excluded_candidates; - self.candidates = facet_ordered( - self.index, - self.rtxn, - self.field_id, - self.ascending, - candidates & &self.faceted_candidates, - )?; + self.candidates = match self.field_id { + Some(field_id) => facet_ordered( + self.index, + self.rtxn, + field_id, + self.ascending, + candidates & &self.faceted_candidates, + )?, + None => Box::new(std::iter::empty()), + }; } None => return Ok(None), }, diff --git a/milli/src/search/facet/facet_distribution.rs b/milli/src/search/facet/facet_distribution.rs index 94f875dfc..bfbea76c3 100644 --- a/milli/src/search/facet/facet_distribution.rs +++ b/milli/src/search/facet/facet_distribution.rs @@ -5,7 +5,7 @@ use std::{cmp, fmt, mem}; use heed::types::ByteSlice; use roaring::RoaringBitmap; -use crate::error::{FieldIdMapMissingEntry, UserError}; +use crate::error::UserError; use crate::facet::FacetType; use crate::heed_codec::facet::{ FacetStringLevelZeroCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, @@ -277,13 +277,10 @@ impl<'a> FacetDistribution<'a> { let mut distribution = BTreeMap::new(); for name in fields { - let fid = - fields_ids_map.id(&name).ok_or_else(|| FieldIdMapMissingEntry::FieldName { - field_name: name.clone(), - process: "FacetDistribution::execute", - })?; - let values = self.facet_values(fid)?; - distribution.insert(name, values); + if let Some(fid) = fields_ids_map.id(&name) { + let values = self.facet_values(fid)?; + distribution.insert(name, values); + } } Ok(distribution) diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 574459547..871f464ef 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -18,7 +18,6 @@ pub(crate) use self::facet::ParserRule; pub use self::facet::{FacetDistribution, FacetNumberIter, FilterCondition, Operator}; pub use self::matching_words::MatchingWords; use self::query_tree::QueryTreeBuilder; -use crate::error::FieldIdMapMissingEntry; use crate::search::criteria::r#final::{Final, FinalResult}; use crate::{DocumentId, Index, Result}; @@ -142,13 +141,13 @@ impl<'a> Search<'a> { None => self.perform_sort(NoopDistinct, matching_words, criteria), Some(name) => { let field_ids_map = self.index.fields_ids_map(self.rtxn)?; - let id = - field_ids_map.id(name).ok_or_else(|| FieldIdMapMissingEntry::FieldName { - field_name: name.to_string(), - process: "distinct attribute", - })?; - let distinct = FacetDistinct::new(id, self.index, self.rtxn); - self.perform_sort(distinct, matching_words, criteria) + match field_ids_map.id(name) { + Some(fid) => { + let distinct = FacetDistinct::new(fid, self.index, self.rtxn); + self.perform_sort(distinct, matching_words, criteria) + } + None => Ok(SearchResult::default()), + } } } } diff --git a/milli/src/update/delete_documents.rs b/milli/src/update/delete_documents.rs index bcb7d7580..bd56688f6 100644 --- a/milli/src/update/delete_documents.rs +++ b/milli/src/update/delete_documents.rs @@ -8,7 +8,7 @@ use roaring::RoaringBitmap; use serde_json::Value; use super::ClearDocuments; -use crate::error::{FieldIdMapMissingEntry, InternalError, UserError}; +use crate::error::{InternalError, UserError}; use crate::heed_codec::facet::FacetStringLevelZeroValueCodec; use crate::heed_codec::CboRoaringBitmapCodec; use crate::index::{db_name, main_key}; @@ -82,11 +82,13 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { key: Some(main_key::PRIMARY_KEY_KEY), } })?; - let id_field = - fields_ids_map.id(primary_key).ok_or_else(|| FieldIdMapMissingEntry::FieldName { - field_name: primary_key.to_string(), - process: "DeleteDocuments::execute", - })?; + + // If we can't find the id of the primary key it means that the database + // is empty and it should be safe to return that we deleted 0 documents. + let id_field = match fields_ids_map.id(primary_key) { + Some(field) => field, + None => return Ok(0), + }; let Index { env: _env, diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index a78cc9da8..743483613 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -235,15 +235,9 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { fn update_displayed(&mut self) -> Result { match self.displayed_fields { Setting::Set(ref fields) => { - let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?; // fields are deduplicated, only the first occurrence is taken into account let names: Vec<_> = fields.iter().unique().map(String::as_str).collect(); - - for name in names.iter() { - fields_ids_map.insert(name).ok_or(UserError::AttributeLimitReached)?; - } self.index.put_displayed_fields(self.wtxn, &names)?; - self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?; } Setting::Reset => { self.index.delete_displayed_fields(self.wtxn)?; @@ -256,11 +250,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { fn update_distinct_field(&mut self) -> Result { match self.distinct_field { Setting::Set(ref attr) => { - let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?; - fields_ids_map.insert(attr).ok_or(UserError::AttributeLimitReached)?; - self.index.put_distinct_field(self.wtxn, &attr)?; - self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?; } Setting::Reset => { self.index.delete_distinct_field(self.wtxn)?; @@ -388,14 +378,11 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { fn update_filterable(&mut self) -> Result<()> { match self.filterable_fields { Setting::Set(ref fields) => { - let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?; let mut new_facets = HashSet::new(); for name in fields { - fields_ids_map.insert(name).ok_or(UserError::AttributeLimitReached)?; new_facets.insert(name.clone()); } self.index.put_filterable_fields(self.wtxn, &new_facets)?; - self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?; } Setting::Reset => { self.index.delete_filterable_fields(self.wtxn)?; @@ -408,17 +395,12 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { fn update_criteria(&mut self) -> Result<()> { match self.criteria { Setting::Set(ref fields) => { - let mut fields_ids_map = self.index.fields_ids_map(self.wtxn)?; let mut new_criteria = Vec::new(); for name in fields { let criterion: Criterion = name.parse()?; - if let Some(name) = criterion.field_name() { - fields_ids_map.insert(name).ok_or(UserError::AttributeLimitReached)?; - } new_criteria.push(criterion); } self.index.put_criteria(self.wtxn, &new_criteria)?; - self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?; } Setting::Reset => { self.index.delete_criteria(self.wtxn)?; From b12738cfe9f74efea0d474ffba05c947c275dd28 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Thu, 22 Jul 2021 19:18:22 +0200 Subject: [PATCH 3/4] Use the right DB prefixes to store the faceted fields --- milli/src/index.rs | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/milli/src/index.rs b/milli/src/index.rs index 63da6b1e8..305a127ca 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -492,10 +492,10 @@ impl Index { field_id: FieldId, docids: &RoaringBitmap, ) -> heed::Result<()> { - let mut buffer = [0u8; main_key::STRING_FACETED_DOCUMENTS_IDS_PREFIX.len() + 2]; - buffer[..main_key::STRING_FACETED_DOCUMENTS_IDS_PREFIX.len()] - .copy_from_slice(main_key::STRING_FACETED_DOCUMENTS_IDS_PREFIX.as_bytes()); - buffer[main_key::STRING_FACETED_DOCUMENTS_IDS_PREFIX.len()..] + let mut buffer = [0u8; main_key::NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.len() + 2]; + buffer[..main_key::NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.len()] + .copy_from_slice(main_key::NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.as_bytes()); + buffer[main_key::NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.len()..] .copy_from_slice(&field_id.to_be_bytes()); self.main.put::<_, ByteSlice, RoaringBitmapCodec>(wtxn, &buffer, docids) } @@ -506,10 +506,10 @@ impl Index { rtxn: &RoTxn, field_id: FieldId, ) -> heed::Result { - let mut buffer = [0u8; main_key::STRING_FACETED_DOCUMENTS_IDS_PREFIX.len() + 2]; - buffer[..main_key::STRING_FACETED_DOCUMENTS_IDS_PREFIX.len()] - .copy_from_slice(main_key::STRING_FACETED_DOCUMENTS_IDS_PREFIX.as_bytes()); - buffer[main_key::STRING_FACETED_DOCUMENTS_IDS_PREFIX.len()..] + let mut buffer = [0u8; main_key::NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.len() + 2]; + buffer[..main_key::NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.len()] + .copy_from_slice(main_key::NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.as_bytes()); + buffer[main_key::NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.len()..] .copy_from_slice(&field_id.to_be_bytes()); match self.main.get::<_, ByteSlice, RoaringBitmapCodec>(rtxn, &buffer)? { Some(docids) => Ok(docids), @@ -524,10 +524,10 @@ impl Index { field_id: FieldId, docids: &RoaringBitmap, ) -> heed::Result<()> { - let mut buffer = [0u8; main_key::NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.len() + 2]; - buffer[..main_key::NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.len()] - .copy_from_slice(main_key::NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.as_bytes()); - buffer[main_key::NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.len()..] + let mut buffer = [0u8; main_key::STRING_FACETED_DOCUMENTS_IDS_PREFIX.len() + 2]; + buffer[..main_key::STRING_FACETED_DOCUMENTS_IDS_PREFIX.len()] + .copy_from_slice(main_key::STRING_FACETED_DOCUMENTS_IDS_PREFIX.as_bytes()); + buffer[main_key::STRING_FACETED_DOCUMENTS_IDS_PREFIX.len()..] .copy_from_slice(&field_id.to_be_bytes()); self.main.put::<_, ByteSlice, RoaringBitmapCodec>(wtxn, &buffer, docids) } @@ -538,10 +538,10 @@ impl Index { rtxn: &RoTxn, field_id: FieldId, ) -> heed::Result { - let mut buffer = [0u8; main_key::NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.len() + 1]; - buffer[..main_key::NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.len()] - .copy_from_slice(main_key::NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.as_bytes()); - buffer[main_key::NUMBER_FACETED_DOCUMENTS_IDS_PREFIX.len()..] + let mut buffer = [0u8; main_key::STRING_FACETED_DOCUMENTS_IDS_PREFIX.len() + 1]; + buffer[..main_key::STRING_FACETED_DOCUMENTS_IDS_PREFIX.len()] + .copy_from_slice(main_key::STRING_FACETED_DOCUMENTS_IDS_PREFIX.as_bytes()); + buffer[main_key::STRING_FACETED_DOCUMENTS_IDS_PREFIX.len()..] .copy_from_slice(&field_id.to_be_bytes()); match self.main.get::<_, ByteSlice, RoaringBitmapCodec>(rtxn, &buffer)? { Some(docids) => Ok(docids), From dc2b63abdf00b2d52c797028fe0b08f24d235f44 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Tue, 27 Jul 2021 16:24:21 +0200 Subject: [PATCH 4/4] Introduce an empty FilterCondition variant to support unknown fields --- milli/src/search/facet/filter_condition.rs | 111 ++++++++++++--------- milli/src/update/index_documents/mod.rs | 7 +- milli/src/update/settings.rs | 5 +- 3 files changed, 71 insertions(+), 52 deletions(-) diff --git a/milli/src/search/facet/filter_condition.rs b/milli/src/search/facet/filter_condition.rs index cc108f855..5ca9f7e5a 100644 --- a/milli/src/search/facet/filter_condition.rs +++ b/milli/src/search/facet/filter_condition.rs @@ -6,6 +6,7 @@ use std::str::FromStr; use either::Either; use heed::types::DecodeIgnore; +use itertools::Itertools; use log::debug; use pest::error::{Error as PestError, ErrorVariant}; use pest::iterators::{Pair, Pairs}; @@ -54,6 +55,7 @@ pub enum FilterCondition { Operator(FieldId, Operator), Or(Box, Box), And(Box, Box), + Empty, } impl FilterCondition { @@ -108,7 +110,7 @@ impl FilterCondition { expression: &str, ) -> Result { let fields_ids_map = index.fields_ids_map(rtxn)?; - let filterable_fields = index.filterable_fields_ids(rtxn)?; + let filterable_fields = index.filterable_fields(rtxn)?; let lexed = FilterParser::parse(Rule::prgm, expression).map_err(UserError::InvalidFilter)?; FilterCondition::from_pairs(&fields_ids_map, &filterable_fields, lexed) @@ -116,7 +118,7 @@ impl FilterCondition { fn from_pairs( fim: &FieldsIdsMap, - ff: &HashSet, + ff: &HashSet, expression: Pairs, ) -> Result { PREC_CLIMBER.climb( @@ -150,17 +152,22 @@ impl FilterCondition { }, Or(a, b) => And(Box::new(a.negate()), Box::new(b.negate())), And(a, b) => Or(Box::new(a.negate()), Box::new(b.negate())), + Empty => Empty, } } fn between( fields_ids_map: &FieldsIdsMap, - filterable_fields: &HashSet, + filterable_fields: &HashSet, item: Pair, ) -> Result { let mut items = item.into_inner(); - let fid = field_id(fields_ids_map, filterable_fields, &mut items) - .map_err(UserError::InvalidFilterAttribute)?; + let fid = match field_id(fields_ids_map, filterable_fields, &mut items) + .map_err(UserError::InvalidFilterAttribute)? + { + Some(fid) => fid, + None => return Ok(Empty), + }; let (lresult, _) = pest_parse(items.next().unwrap()); let (rresult, _) = pest_parse(items.next().unwrap()); @@ -173,12 +180,16 @@ impl FilterCondition { fn equal( fields_ids_map: &FieldsIdsMap, - filterable_fields: &HashSet, + filterable_fields: &HashSet, item: Pair, ) -> Result { let mut items = item.into_inner(); - let fid = field_id(fields_ids_map, filterable_fields, &mut items) - .map_err(UserError::InvalidFilterAttribute)?; + let fid = match field_id(fields_ids_map, filterable_fields, &mut items) + .map_err(UserError::InvalidFilterAttribute)? + { + Some(fid) => fid, + None => return Ok(Empty), + }; let value = items.next().unwrap(); let (result, svalue) = pest_parse(value); @@ -189,12 +200,16 @@ impl FilterCondition { fn greater_than( fields_ids_map: &FieldsIdsMap, - filterable_fields: &HashSet, + filterable_fields: &HashSet, item: Pair, ) -> Result { let mut items = item.into_inner(); - let fid = field_id(fields_ids_map, filterable_fields, &mut items) - .map_err(UserError::InvalidFilterAttribute)?; + let fid = match field_id(fields_ids_map, filterable_fields, &mut items) + .map_err(UserError::InvalidFilterAttribute)? + { + Some(fid) => fid, + None => return Ok(Empty), + }; let value = items.next().unwrap(); let (result, _svalue) = pest_parse(value); @@ -205,12 +220,16 @@ impl FilterCondition { fn greater_than_or_equal( fields_ids_map: &FieldsIdsMap, - filterable_fields: &HashSet, + filterable_fields: &HashSet, item: Pair, ) -> Result { let mut items = item.into_inner(); - let fid = field_id(fields_ids_map, filterable_fields, &mut items) - .map_err(UserError::InvalidFilterAttribute)?; + let fid = match field_id(fields_ids_map, filterable_fields, &mut items) + .map_err(UserError::InvalidFilterAttribute)? + { + Some(fid) => fid, + None => return Ok(Empty), + }; let value = items.next().unwrap(); let (result, _svalue) = pest_parse(value); @@ -221,12 +240,16 @@ impl FilterCondition { fn lower_than( fields_ids_map: &FieldsIdsMap, - filterable_fields: &HashSet, + filterable_fields: &HashSet, item: Pair, ) -> Result { let mut items = item.into_inner(); - let fid = field_id(fields_ids_map, filterable_fields, &mut items) - .map_err(UserError::InvalidFilterAttribute)?; + let fid = match field_id(fields_ids_map, filterable_fields, &mut items) + .map_err(UserError::InvalidFilterAttribute)? + { + Some(fid) => fid, + None => return Ok(Empty), + }; let value = items.next().unwrap(); let (result, _svalue) = pest_parse(value); @@ -237,12 +260,16 @@ impl FilterCondition { fn lower_than_or_equal( fields_ids_map: &FieldsIdsMap, - filterable_fields: &HashSet, + filterable_fields: &HashSet, item: Pair, ) -> Result { let mut items = item.into_inner(); - let fid = field_id(fields_ids_map, filterable_fields, &mut items) - .map_err(UserError::InvalidFilterAttribute)?; + let fid = match field_id(fields_ids_map, filterable_fields, &mut items) + .map_err(UserError::InvalidFilterAttribute)? + { + Some(fid) => fid, + None => return Ok(Empty), + }; let value = items.next().unwrap(); let (result, _svalue) = pest_parse(value); @@ -461,57 +488,41 @@ impl FilterCondition { let rhs = rhs.evaluate(rtxn, index)?; Ok(lhs & rhs) } + Empty => Ok(RoaringBitmap::new()), } } } -/// Retrieve the field id base on the pest value, returns an error is -/// the field does not exist or is not filterable. +/// Retrieve the field id base on the pest value. +/// +/// Returns an error if the given value is not filterable. +/// +/// Returns Ok(None) if the given value is filterable, but is not yet ascociated to a field_id. /// /// The pest pair is simply a string associated with a span, a location to highlight in /// the error message. fn field_id( fields_ids_map: &FieldsIdsMap, - filterable_fields: &HashSet, + filterable_fields: &HashSet, items: &mut Pairs, -) -> StdResult> { +) -> StdResult, PestError> { // lexing ensures that we at least have a key let key = items.next().unwrap(); - let field_id = match fields_ids_map.id(key.as_str()) { - Some(field_id) => field_id, - None => { - return Err(PestError::new_from_span( - ErrorVariant::CustomError { - message: format!( - "attribute `{}` not found, available attributes are: {}", - key.as_str(), - fields_ids_map.iter().map(|(_, n)| n).collect::>().join(", "), - ), - }, - key.as_span(), - )) - } - }; - - if !filterable_fields.contains(&field_id) { + if !filterable_fields.contains(key.as_str()) { return Err(PestError::new_from_span( ErrorVariant::CustomError { message: format!( "attribute `{}` is not filterable, available filterable attributes are: {}", key.as_str(), - filterable_fields - .iter() - .flat_map(|id| { fields_ids_map.name(*id) }) - .collect::>() - .join(", "), + filterable_fields.iter().join(", "), ), }, key.as_span(), )); } - Ok(field_id) + Ok(fields_ids_map.id(key.as_str())) } /// Tries to parse the pest pair into the type `T` specified, always returns @@ -552,6 +563,9 @@ mod tests { // Set the filterable fields to be the channel. let mut wtxn = index.write_txn().unwrap(); + let mut map = index.fields_ids_map(&wtxn).unwrap(); + map.insert("channel"); + index.put_fields_ids_map(&mut wtxn, &map).unwrap(); let mut builder = Settings::new(&mut wtxn, &index, 0); builder.set_filterable_fields(hashset! { S("channel") }); builder.execute(|_, _| ()).unwrap(); @@ -581,6 +595,9 @@ mod tests { // Set the filterable fields to be the channel. let mut wtxn = index.write_txn().unwrap(); + let mut map = index.fields_ids_map(&wtxn).unwrap(); + map.insert("timestamp"); + index.put_fields_ids_map(&mut wtxn, &map).unwrap(); let mut builder = Settings::new(&mut wtxn, &index, 0); builder.set_filterable_fields(hashset! { "timestamp".into() }); builder.execute(|_, _| ()).unwrap(); diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index efe16def7..aae1e4eb4 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -391,6 +391,10 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { documents_file, } = output; + // The fields_ids_map is put back to the store now so the rest of the transaction sees an + // up to date field map. + self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?; + // We delete the documents that this document addition replaces. This way we are // able to simply insert all the documents even if they already exist in the database. if !replaced_documents_ids.is_empty() { @@ -596,9 +600,6 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { debug!("Writing using the write method: {:?}", write_method); - // We write the fields ids map into the main database - self.index.put_fields_ids_map(self.wtxn, &fields_ids_map)?; - // We write the field distribution into the main database self.index.put_field_distribution(self.wtxn, &field_distribution)?; diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index 743483613..07bdfd6fa 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -674,7 +674,8 @@ mod tests { let count = index .facet_id_f64_docids .remap_key_type::() - .prefix_iter(&rtxn, &[0, 0]) + // The faceted field id is 2u16 + .prefix_iter(&rtxn, &[0, 2, 0]) .unwrap() .count(); assert_eq!(count, 3); @@ -700,7 +701,7 @@ mod tests { let count = index .facet_id_f64_docids .remap_key_type::() - .prefix_iter(&rtxn, &[0, 0]) + .prefix_iter(&rtxn, &[0, 2, 0]) .unwrap() .count(); assert_eq!(count, 4);