From b41bf586580f1cce26b30768f7c26bd7bb02f8a2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Wed, 27 Jan 2021 16:13:49 +0100 Subject: [PATCH] Split the FacetDistribution facet_values method into three --- src/search/facet/facet_distribution.rs | 278 ++++++++++++++----------- 1 file changed, 154 insertions(+), 124 deletions(-) diff --git a/src/search/facet/facet_distribution.rs b/src/search/facet/facet_distribution.rs index fc3c72853..4a650b9e6 100644 --- a/src/search/facet/facet_distribution.rs +++ b/src/search/facet/facet_distribution.rs @@ -57,6 +57,155 @@ impl<'a> FacetDistribution<'a> { self } + /// There is a small amount of candidates OR we ask for facet string values so we + /// decide to iterate over the facet values of each one of them, one by one. + fn facet_values_from_documents( + &self, + field_id: FieldId, + facet_type: FacetType, + candidates: &RoaringBitmap, + ) -> heed::Result> + { + let mut key_buffer = vec![field_id]; + match facet_type { + FacetType::String => { + let mut facet_values = BTreeMap::new(); + for docid in candidates.into_iter().take(CANDIDATES_THRESHOLD as usize) { + key_buffer.truncate(1); + key_buffer.extend_from_slice(&docid.to_be_bytes()); + let iter = self.index.field_id_docid_facet_values + .prefix_iter(self.rtxn, &key_buffer)? + .remap_key_type::(); + + for result in iter { + let ((_, _, value), ()) = result?; + *facet_values.entry(FacetValue::from(value)).or_insert(0) += 1; + } + } + Ok(facet_values) + }, + FacetType::Float => { + let mut facet_values = BTreeMap::new(); + for docid in candidates { + key_buffer.truncate(1); + key_buffer.extend_from_slice(&docid.to_be_bytes()); + let iter = self.index.field_id_docid_facet_values + .prefix_iter(self.rtxn, &key_buffer)? + .remap_key_type::(); + + for result in iter { + let ((_, _, value), ()) = result?; + *facet_values.entry(FacetValue::from(value)).or_insert(0) += 1; + } + } + Ok(facet_values) + }, + FacetType::Integer => { + let mut facet_values = BTreeMap::new(); + for docid in candidates { + key_buffer.truncate(1); + key_buffer.extend_from_slice(&docid.to_be_bytes()); + let iter = self.index.field_id_docid_facet_values + .prefix_iter(self.rtxn, &key_buffer)? + .remap_key_type::(); + + for result in iter { + let ((_, _, value), ()) = result?; + *facet_values.entry(FacetValue::from(value)).or_insert(0) += 1; + } + } + Ok(facet_values) + }, + } + } + + /// There is too much documents, we use the facet levels to move throught + /// the facet values, to find the candidates and values associated. + fn facet_values_from_facet_levels( + &self, + field_id: FieldId, + facet_type: FacetType, + candidates: &RoaringBitmap, + ) -> heed::Result> + { + let iter = match facet_type { + FacetType::String => unreachable!(), + FacetType::Float => { + let iter = FacetIter::::new_non_reducing( + self.rtxn, self.index, field_id, candidates.clone(), + )?; + let iter = iter.map(|r| r.map(|(v, docids)| (FacetValue::from(v), docids))); + Box::new(iter) as Box::> + }, + FacetType::Integer => { + let iter = FacetIter::::new_non_reducing( + self.rtxn, self.index, field_id, candidates.clone(), + )?; + Box::new(iter.map(|r| r.map(|(v, docids)| (FacetValue::from(v), docids)))) + }, + }; + + let mut facet_values = BTreeMap::new(); + for result in iter { + let (value, mut docids) = result?; + docids.intersect_with(candidates); + if !docids.is_empty() { + facet_values.insert(value, docids.len()); + } + if facet_values.len() == self.max_values_by_facet { + break; + } + } + + Ok(facet_values) + } + + /// Placeholder search, a.k.a. no candidates were specified. We iterate throught the + /// facet values one by one and iterate on the facet level 0 for numbers. + fn facet_values_from_raw_facet_database( + &self, + field_id: FieldId, + facet_type: FacetType, + ) -> heed::Result> + { + let db = self.index.facet_field_id_value_docids; + let level = 0; + let iter = match facet_type { + FacetType::String => { + let iter = db + .prefix_iter(self.rtxn, &[field_id])? + .remap_key_type::() + .map(|r| r.map(|((_, v), docids)| (FacetValue::from(v), docids))); + Box::new(iter) as Box::> + }, + FacetType::Float => { + let db = db.remap_key_type::(); + let range = FacetRange::::new( + self.rtxn, db, field_id, level, Unbounded, Unbounded, + )?; + Box::new(range.map(|r| r.map(|((_, _, v, _), docids)| (FacetValue::from(v), docids)))) + }, + FacetType::Integer => { + let db = db.remap_key_type::(); + let range = FacetRange::::new( + self.rtxn, db, field_id, level, Unbounded, Unbounded, + )?; + Box::new(range.map(|r| r.map(|((_, _, v, _), docids)| (FacetValue::from(v), docids)))) + }, + }; + + let mut facet_values = BTreeMap::new(); + for result in iter { + let (value, docids) = result?; + facet_values.insert(value, docids.len()); + if facet_values.len() == self.max_values_by_facet { + break; + } + } + + Ok(facet_values) + } + fn facet_values( &self, field_id: FieldId, @@ -64,134 +213,15 @@ impl<'a> FacetDistribution<'a> { ) -> heed::Result> { if let Some(candidates) = self.candidates.as_ref() { - // Classic search, candidates were specified, we must return - // facet values only related to those candidates. + // Classic search, candidates were specified, we must return facet values only related + // to those candidates. We also enter here for facet strings for performance reasons. if candidates.len() <= CANDIDATES_THRESHOLD || facet_type == FacetType::String { - // There is a small amount of candidates OR we ask for facet string values so we - // decide to iterate over the facet values of each one of them, one by one. - let mut key_buffer = vec![field_id]; - match facet_type { - FacetType::String => { - let mut facet_values = BTreeMap::new(); - for docid in candidates.into_iter().take(CANDIDATES_THRESHOLD as usize) { - key_buffer.truncate(1); - key_buffer.extend_from_slice(&docid.to_be_bytes()); - let iter = self.index.field_id_docid_facet_values - .prefix_iter(self.rtxn, &key_buffer)? - .remap_key_type::(); - - for result in iter { - let ((_, _, value), ()) = result?; - *facet_values.entry(FacetValue::from(value)).or_insert(0) += 1; - } - } - Ok(facet_values) - }, - FacetType::Float => { - let mut facet_values = BTreeMap::new(); - for docid in candidates { - key_buffer.truncate(1); - key_buffer.extend_from_slice(&docid.to_be_bytes()); - let iter = self.index.field_id_docid_facet_values - .prefix_iter(self.rtxn, &key_buffer)? - .remap_key_type::(); - - for result in iter { - let ((_, _, value), ()) = result?; - *facet_values.entry(FacetValue::from(value)).or_insert(0) += 1; - } - } - Ok(facet_values) - }, - FacetType::Integer => { - let mut facet_values = BTreeMap::new(); - for docid in candidates { - key_buffer.truncate(1); - key_buffer.extend_from_slice(&docid.to_be_bytes()); - let iter = self.index.field_id_docid_facet_values - .prefix_iter(self.rtxn, &key_buffer)? - .remap_key_type::(); - - for result in iter { - let ((_, _, value), ()) = result?; - *facet_values.entry(FacetValue::from(value)).or_insert(0) += 1; - } - } - Ok(facet_values) - }, - } + self.facet_values_from_documents(field_id, facet_type, candidates) } else { - // There is too much documents, we use the facet levels to move throught - // the facet values, to find the candidates and values associated. - let iter = match facet_type { - FacetType::String => unreachable!(), - FacetType::Float => { - let iter = FacetIter::::new_non_reducing( - self.rtxn, self.index, field_id, candidates.clone(), - )?; - let iter = iter.map(|r| r.map(|(v, docids)| (FacetValue::from(v), docids))); - Box::new(iter) as Box::> - }, - FacetType::Integer => { - let iter = FacetIter::::new_non_reducing( - self.rtxn, self.index, field_id, candidates.clone(), - )?; - Box::new(iter.map(|r| r.map(|(v, docids)| (FacetValue::from(v), docids)))) - }, - }; - - let mut facet_values = BTreeMap::new(); - for result in iter { - let (value, mut docids) = result?; - docids.intersect_with(candidates); - if !docids.is_empty() { - facet_values.insert(value, docids.len()); - } - if facet_values.len() == self.max_values_by_facet { - break; - } - } - - Ok(facet_values) + self.facet_values_from_facet_levels(field_id, facet_type, candidates) } } else { - // Placeholder search, a.k.a. no candidates were specified. We iterate throught the - // facet values one by one and iterate on the facet level 0 for numbers. - let db = self.index.facet_field_id_value_docids; - let iter = match facet_type { - FacetType::String => { - let iter = db - .prefix_iter(self.rtxn, &[field_id])? - .remap_key_type::() - .map(|r| r.map(|((_, v), docids)| (FacetValue::from(v), docids))); - Box::new(iter) as Box::> - }, - FacetType::Float => { - let db = db.remap_key_type::(); - let range = FacetRange::::new( - self.rtxn, db, field_id, 0, Unbounded, Unbounded, - )?; - Box::new(range.map(|r| r.map(|((_, _, v, _), docids)| (FacetValue::from(v), docids)))) - }, - FacetType::Integer => { - let db = db.remap_key_type::(); - let range = FacetRange::::new( - self.rtxn, db, field_id, 0, Unbounded, Unbounded, - )?; - Box::new(range.map(|r| r.map(|((_, _, v, _), docids)| (FacetValue::from(v), docids)))) - }, - }; - - let mut facet_values = BTreeMap::new(); - for result in iter { - let (value, docids) = result?; - facet_values.insert(value, docids.len()); - if facet_values.len() == self.max_values_by_facet { - break; - } - } - - Ok(facet_values) + self.facet_values_from_raw_facet_database(field_id, facet_type) } }