diff --git a/milli/src/search/criteria/asc_desc.rs b/milli/src/search/criteria/asc_desc.rs index 32857b8d7..f57d6d54f 100644 --- a/milli/src/search/criteria/asc_desc.rs +++ b/milli/src/search/criteria/asc_desc.rs @@ -50,7 +50,6 @@ impl<'t> AscDesc<'t> { Self::new(index, rtxn, parent, field_name, false) } - fn new( index: &'t Index, rtxn: &'t heed::RoTxn, @@ -59,7 +58,6 @@ impl<'t> AscDesc<'t> { ascending: bool, ) -> anyhow::Result { let fields_ids_map = index.fields_ids_map(rtxn)?; - let faceted_fields = index.faceted_fields(rtxn)?; let field_id = fields_ids_map .id(&field_name) .with_context(|| format!("field {:?} isn't registered", field_name))?; diff --git a/milli/src/search/facet/facet_condition.rs b/milli/src/search/facet/facet_condition.rs index 899b99b71..b189f5f0f 100644 --- a/milli/src/search/facet/facet_condition.rs +++ b/milli/src/search/facet/facet_condition.rs @@ -55,27 +55,45 @@ pub enum FacetCondition { And(Box, Box), } -fn field_id<'a>( +fn field_id( fields_ids_map: &FieldsIdsMap, faceted_fields: &HashSet, - items: &mut Pairs<'a, Rule>, + items: &mut Pairs, ) -> Result> { // lexing ensures that we at least have a key let key = items.next().unwrap(); - match fields_ids_map.id(key.as_str()) { - Some(field_id) => Ok(field_id), - None => Err(PestError::new_from_span( + + let field_id = match fields_ids_map.id(key.as_str()) { + Some(field_id) => field_id, + None => return Err(PestError::new_from_span( ErrorVariant::CustomError { message: format!( "attribute `{}` not found, available attributes are: {}", key.as_str(), - fields_ids_map.iter().map(|(_, n)| n).collect::>().join(", ") + fields_ids_map.iter().map(|(_, n)| n).collect::>().join(", "), ), }, key.as_span(), )), + }; + + if !faceted_fields.contains(&field_id) { + return Err(PestError::new_from_span( + ErrorVariant::CustomError { + message: format!( + "attribute `{}` is not faceted, available faceted attributes are: {}", + key.as_str(), + faceted_fields.iter().flat_map(|id| { + fields_ids_map.name(*id) + }).collect::>().join(", "), + ), + }, + key.as_span(), + )); } + + Ok(field_id) } fn pest_parse(pair: Pair) -> (Result>, String) @@ -84,12 +102,10 @@ where T: FromStr, { let result = match pair.as_str().parse::() { Ok(value) => Ok(value), - Err(e) => { - Err(PestError::::new_from_span( - ErrorVariant::CustomError { message: e.to_string() }, - pair.as_span(), - )) - } + Err(e) => Err(PestError::::new_from_span( + ErrorVariant::CustomError { message: e.to_string() }, + pair.as_span(), + )), }; (result, pair.as_str().to_string()) @@ -106,8 +122,6 @@ impl FacetCondition { A: AsRef, B: AsRef, { - let fields_ids_map = index.fields_ids_map(rtxn)?; - let faceted_fields = index.faceted_fields(rtxn)?; let mut ands = None; for either in array { @@ -202,7 +216,6 @@ impl FacetCondition { item: Pair, ) -> anyhow::Result { - let item_span = item.as_span(); let mut items = item.into_inner(); let fid = field_id(fields_ids_map, faceted_fields, &mut items)?; @@ -236,7 +249,6 @@ impl FacetCondition { item: Pair, ) -> anyhow::Result { - let item_span = item.as_span(); let mut items = item.into_inner(); let fid = field_id(fields_ids_map, faceted_fields, &mut items)?; @@ -252,7 +264,6 @@ impl FacetCondition { item: Pair, ) -> anyhow::Result { - let item_span = item.as_span(); let mut items = item.into_inner(); let fid = field_id(fields_ids_map, faceted_fields, &mut items)?; @@ -268,7 +279,6 @@ impl FacetCondition { item: Pair, ) -> anyhow::Result { - let item_span = item.as_span(); let mut items = item.into_inner(); let fid = field_id(fields_ids_map, faceted_fields, &mut items)?; @@ -284,7 +294,6 @@ impl FacetCondition { item: Pair, ) -> anyhow::Result { - let item_span = item.as_span(); let mut items = item.into_inner(); let fid = field_id(fields_ids_map, faceted_fields, &mut items)?; diff --git a/milli/src/search/facet/facet_distribution.rs b/milli/src/search/facet/facet_distribution.rs index 7fd2d385b..c6122cc77 100644 --- a/milli/src/search/facet/facet_distribution.rs +++ b/milli/src/search/facet/facet_distribution.rs @@ -3,12 +3,12 @@ use std::ops::Bound::Unbounded; use std::{cmp, fmt}; use anyhow::Context; -use heed::BytesDecode; +use heed::{Database, BytesDecode}; +use heed::types::{ByteSlice, Unit}; use roaring::RoaringBitmap; -use crate::facet::{FacetType, FacetValue}; -use crate::heed_codec::facet::{FacetValueStringCodec, FacetLevelValueF64Codec}; -use crate::heed_codec::facet::{FieldDocIdFacetStringCodec, FieldDocIdFacetF64Codec}; +use crate::facet::FacetType; +use crate::heed_codec::facet::FacetValueStringCodec; use crate::search::facet::{FacetIter, FacetRange}; use crate::{Index, FieldId, DocumentId}; @@ -60,86 +60,81 @@ impl<'a> FacetDistribution<'a> { /// There is a small amount of candidates OR we ask for facet string values so we /// decide to iterate over the facet values of each one of them, one by one. - fn facet_values_from_documents( + fn facet_distribution_from_documents( &self, field_id: FieldId, facet_type: FacetType, candidates: &RoaringBitmap, - ) -> heed::Result> + distribution: &mut BTreeMap, + ) -> heed::Result<()> { fn fetch_facet_values<'t, KC, K: 't>( - index: &Index, rtxn: &'t heed::RoTxn, + db: Database, field_id: FieldId, candidates: &RoaringBitmap, - ) -> heed::Result> + distribution: &mut BTreeMap, + ) -> heed::Result<()> where + K: fmt::Display, KC: BytesDecode<'t, DItem = (FieldId, DocumentId, K)>, - K: Into, { - let mut facet_values = BTreeMap::new(); let mut key_buffer = vec![field_id]; for docid in candidates.into_iter().take(CANDIDATES_THRESHOLD as usize) { key_buffer.truncate(1); key_buffer.extend_from_slice(&docid.to_be_bytes()); - let iter = index.field_id_docid_facet_values + let iter = db + .remap_key_type::() .prefix_iter(rtxn, &key_buffer)? .remap_key_type::(); for result in iter { let ((_, _, value), ()) = result?; - *facet_values.entry(value.into()).or_insert(0) += 1; + *distribution.entry(value.to_string()).or_insert(0) += 1; } } - Ok(facet_values) + Ok(()) } - let index = self.index; - let rtxn = self.rtxn; match facet_type { - FacetType::String => { - fetch_facet_values::(index, rtxn, field_id, candidates) - }, FacetType::Number => { - fetch_facet_values::(index, rtxn, field_id, candidates) + let db = self.index.field_id_docid_facet_f64s; + fetch_facet_values(self.rtxn, db, field_id, candidates, distribution) }, + FacetType::String => { + let db = self.index.field_id_docid_facet_strings; + fetch_facet_values(self.rtxn, db, field_id, candidates, distribution) + } } } /// There is too much documents, we use the facet levels to move throught /// the facet values, to find the candidates and values associated. - fn facet_values_from_facet_levels( + fn facet_numbers_distribution_from_facet_levels( &self, field_id: FieldId, - facet_type: FacetType, candidates: &RoaringBitmap, - ) -> heed::Result> + distribution: &mut BTreeMap, + ) -> heed::Result<()> { - let iter = match facet_type { - FacetType::String => unreachable!(), - FacetType::Number => { - let iter = FacetIter::new_non_reducing( - self.rtxn, self.index, field_id, candidates.clone(), - )?; - iter.map(|r| r.map(|(v, docids)| (FacetValue::from(v), docids))) - }, - }; + let iter = FacetIter::new_non_reducing( + self.rtxn, self.index, field_id, candidates.clone(), + )?; - let mut facet_values = BTreeMap::new(); for result in iter { let (value, mut docids) = result?; docids.intersect_with(candidates); if !docids.is_empty() { - facet_values.insert(value, docids.len()); + distribution.insert(value.to_string(), docids.len()); } - if facet_values.len() == self.max_values_by_facet { + if distribution.len() == self.max_values_by_facet { break; } } - Ok(facet_values) + Ok(()) } /// Placeholder search, a.k.a. no candidates were specified. We iterate throught the @@ -147,80 +142,73 @@ impl<'a> FacetDistribution<'a> { fn facet_values_from_raw_facet_database( &self, field_id: FieldId, - facet_type: FacetType, - ) -> heed::Result> + ) -> heed::Result> { - let db = self.index.facet_field_id_value_docids; - let level = 0; - let iter = match facet_type { - FacetType::String => { - let iter = db - .prefix_iter(self.rtxn, &[field_id])? - .remap_key_type::() - .map(|r| r.map(|((_, v), docids)| (FacetValue::from(v), docids))); - Box::new(iter) as Box::> - }, - FacetType::Number => { - let db = db.remap_key_type::(); - let range = FacetRange::new( - self.rtxn, db, field_id, level, Unbounded, Unbounded, - )?; - Box::new(range.map(|r| r.map(|((_, _, v, _), docids)| (FacetValue::from(v), docids)))) - }, - }; + let mut distribution = BTreeMap::new(); - let mut facet_values = BTreeMap::new(); - for result in iter { - let (value, docids) = result?; - facet_values.insert(value, docids.len()); - if facet_values.len() == self.max_values_by_facet { + let db = self.index.facet_id_f64_docids; + let range = FacetRange::new(self.rtxn, db, field_id, 0, Unbounded, Unbounded)?; + + for result in range { + let ((_, _, value, _), docids) = result?; + distribution.insert(value.to_string(), docids.len()); + if distribution.len() == self.max_values_by_facet { break; } } - Ok(facet_values) + let iter = self.index + .facet_id_string_docids + .remap_key_type::() + .prefix_iter(self.rtxn, &[field_id])? + .remap_key_type::(); + + for result in iter { + let ((_, value), docids) = result?; + distribution.insert(value.to_string(), docids.len()); + if distribution.len() == self.max_values_by_facet { + break; + } + } + + Ok(distribution) } - fn facet_values( - &self, - field_id: FieldId, - facet_type: FacetType, - ) -> heed::Result> - { + fn facet_values(&self, field_id: FieldId) -> heed::Result> { + use FacetType::{Number, String}; + if let Some(candidates) = self.candidates.as_ref() { // Classic search, candidates were specified, we must return facet values only related // to those candidates. We also enter here for facet strings for performance reasons. - if candidates.len() <= CANDIDATES_THRESHOLD || facet_type == FacetType::String { - self.facet_values_from_documents(field_id, facet_type, candidates) + let mut distribution = BTreeMap::new(); + if candidates.len() <= CANDIDATES_THRESHOLD { + self.facet_distribution_from_documents(field_id, Number, candidates, &mut distribution)?; + self.facet_distribution_from_documents(field_id, String, candidates, &mut distribution)?; } else { - self.facet_values_from_facet_levels(field_id, facet_type, candidates) + self.facet_numbers_distribution_from_facet_levels(field_id, candidates, &mut distribution)?; + self.facet_distribution_from_documents(field_id, String, candidates, &mut distribution)?; } + + Ok(distribution) } else { - self.facet_values_from_raw_facet_database(field_id, facet_type) + self.facet_values_from_raw_facet_database(field_id) } } - pub fn execute(&self) -> anyhow::Result>> { + pub fn execute(&self) -> anyhow::Result>> { let fields_ids_map = self.index.fields_ids_map(self.rtxn)?; let faceted_fields = self.index.faceted_fields(self.rtxn)?; - let fields_ids: Vec<_> = match &self.facets { - Some(names) => names - .iter() - .filter_map(|n| faceted_fields.get(n).map(|t| (n.to_string(), *t))) - .collect(), - None => faceted_fields.into_iter().collect(), - }; - let mut facets_values = BTreeMap::new(); - for (name, ftype) in fields_ids { + let mut distribution = BTreeMap::new(); + for name in faceted_fields { let fid = fields_ids_map.id(&name).with_context(|| { format!("missing field name {:?} from the fields id map", name) })?; - let values = self.facet_values(fid, ftype)?; - facets_values.insert(name, values); + let values = self.facet_values(fid)?; + distribution.insert(name, values); } - Ok(facets_values) + Ok(distribution) } } diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index f2211ad78..623581706 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -141,15 +141,12 @@ impl<'a> Search<'a> { let field_ids_map = self.index.fields_ids_map(self.rtxn)?; let id = field_ids_map.id(name).expect("distinct not present in field map"); let faceted_fields = self.index.faceted_fields(self.rtxn)?; - match faceted_fields.get(name) { - Some(facet_type) => { - let distinct = FacetDistinct::new(id, self.index, self.rtxn); - self.perform_sort(distinct, matching_words, criteria) - } - None => { - let distinct = MapDistinct::new(id, self.index, self.rtxn); - self.perform_sort(distinct, matching_words, criteria) - } + if faceted_fields.contains(name) { + let distinct = FacetDistinct::new(id, self.index, self.rtxn); + self.perform_sort(distinct, matching_words, criteria) + } else { + let distinct = MapDistinct::new(id, self.index, self.rtxn); + self.perform_sort(distinct, matching_words, criteria) } } }