Add min and max facet stats

This commit is contained in:
Louis Dureuil 2023-01-25 15:04:08 +01:00
parent 8ae441a4db
commit 74859ecd61
No known key found for this signature in database
4 changed files with 112 additions and 6 deletions

View File

@ -108,7 +108,7 @@ pub struct SearchHit {
pub matches_position: Option<MatchesPosition>, pub matches_position: Option<MatchesPosition>,
} }
#[derive(Serialize, Debug, Clone, PartialEq, Eq)] #[derive(Serialize, Debug, Clone, PartialEq)]
#[serde(rename_all = "camelCase")] #[serde(rename_all = "camelCase")]
pub struct SearchResult { pub struct SearchResult {
pub hits: Vec<SearchHit>, pub hits: Vec<SearchHit>,
@ -118,6 +118,8 @@ pub struct SearchResult {
pub hits_info: HitsInfo, pub hits_info: HitsInfo,
#[serde(skip_serializing_if = "Option::is_none")] #[serde(skip_serializing_if = "Option::is_none")]
pub facet_distribution: Option<BTreeMap<String, BTreeMap<String, u64>>>, pub facet_distribution: Option<BTreeMap<String, BTreeMap<String, u64>>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub facet_stats: Option<BTreeMap<String, FacetStats>>,
} }
#[derive(Serialize, Debug, Clone, PartialEq, Eq)] #[derive(Serialize, Debug, Clone, PartialEq, Eq)]
@ -129,6 +131,12 @@ pub enum HitsInfo {
OffsetLimit { limit: usize, offset: usize, estimated_total_hits: usize }, OffsetLimit { limit: usize, offset: usize, estimated_total_hits: usize },
} }
#[derive(Serialize, Debug, Clone, PartialEq)]
pub struct FacetStats {
pub min: f64,
pub max: f64,
}
pub fn perform_search( pub fn perform_search(
index: &Index, index: &Index,
query: SearchQuery, query: SearchQuery,
@ -300,7 +308,7 @@ pub fn perform_search(
HitsInfo::OffsetLimit { limit: query.limit, offset, estimated_total_hits: number_of_hits } HitsInfo::OffsetLimit { limit: query.limit, offset, estimated_total_hits: number_of_hits }
}; };
let facet_distribution = match query.facets { let (facet_distribution, facet_stats) = match query.facets {
Some(ref fields) => { Some(ref fields) => {
let mut facet_distribution = index.facets_distribution(&rtxn); let mut facet_distribution = index.facets_distribution(&rtxn);
@ -314,18 +322,23 @@ pub fn perform_search(
facet_distribution.facets(fields); facet_distribution.facets(fields);
} }
let distribution = facet_distribution.candidates(candidates).execute()?; let distribution = facet_distribution.candidates(candidates).execute()?;
let stats = facet_distribution.compute_stats()?;
Some(distribution) (Some(distribution), Some(stats))
} }
None => None, None => (None, None),
}; };
let facet_stats = facet_stats.map(|stats| {
stats.into_iter().map(|(k, (min, max))| (k, FacetStats { min, max })).collect()
});
let result = SearchResult { let result = SearchResult {
hits: documents, hits: documents,
hits_info, hits_info,
query: query.q.clone().unwrap_or_default(), query: query.q.clone().unwrap_or_default(),
processing_time_ms: before_search.elapsed().as_millis(), processing_time_ms: before_search.elapsed().as_millis(),
facet_distribution, facet_distribution,
facet_stats,
}; };
Ok(result) Ok(result)
} }

View File

@ -1,5 +1,6 @@
use std::mem::take; use std::mem::take;
use heed::BytesDecode;
use itertools::Itertools; use itertools::Itertools;
use log::debug; use log::debug;
use ordered_float::OrderedFloat; use ordered_float::OrderedFloat;
@ -7,7 +8,7 @@ use roaring::RoaringBitmap;
use super::{Criterion, CriterionParameters, CriterionResult}; use super::{Criterion, CriterionParameters, CriterionResult};
use crate::facet::FacetType; use crate::facet::FacetType;
use crate::heed_codec::facet::FacetGroupKeyCodec; use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec};
use crate::heed_codec::ByteSliceRefCodec; use crate::heed_codec::ByteSliceRefCodec;
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder, InitialCandidates}; use crate::search::criteria::{resolve_query_tree, CriteriaBuilder, InitialCandidates};
use crate::search::facet::{ascending_facet_sort, descending_facet_sort}; use crate::search::facet::{ascending_facet_sort, descending_facet_sort};
@ -196,6 +197,38 @@ fn facet_ordered_iterative<'t>(
Ok(Box::new(number_iter.chain(string_iter).map(Ok)) as Box<dyn Iterator<Item = _>>) Ok(Box::new(number_iter.chain(string_iter).map(Ok)) as Box<dyn Iterator<Item = _>>)
} }
fn facet_extreme_value<'t>(
mut extreme_it: Box<dyn Iterator<Item = heed::Result<(RoaringBitmap, &[u8])>> + 't>,
) -> Result<Option<f64>> {
let extreme_value =
if let Some(extreme_value) = extreme_it.next() { extreme_value } else { return Ok(None) };
let (_, extreme_value) = extreme_value?;
Ok(OrderedF64Codec::bytes_decode(extreme_value))
}
pub fn facet_min_value<'t>(
index: &'t Index,
rtxn: &'t heed::RoTxn,
field_id: FieldId,
candidates: RoaringBitmap,
) -> Result<Option<f64>> {
let db = index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
let it = ascending_facet_sort(rtxn, db, field_id, candidates)?;
facet_extreme_value(it)
}
pub fn facet_max_value<'t>(
index: &'t Index,
rtxn: &'t heed::RoTxn,
field_id: FieldId,
candidates: RoaringBitmap,
) -> Result<Option<f64>> {
let db = index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
let it = descending_facet_sort(rtxn, db, field_id, candidates)?;
facet_extreme_value(it)
}
fn facet_ordered_set_based<'t>( fn facet_ordered_set_based<'t>(
index: &'t Index, index: &'t Index,
rtxn: &'t heed::RoTxn, rtxn: &'t heed::RoTxn,

View File

@ -21,6 +21,7 @@ use crate::update::{MAX_LENGTH_FOR_PREFIX_PROXIMITY_DB, MAX_PROXIMITY_FOR_PREFIX
use crate::{AscDesc as AscDescName, DocumentId, FieldId, Index, Member, Result}; use crate::{AscDesc as AscDescName, DocumentId, FieldId, Index, Member, Result};
mod asc_desc; mod asc_desc;
pub use asc_desc::{facet_max_value, facet_min_value};
mod attribute; mod attribute;
mod exactness; mod exactness;
pub mod r#final; pub mod r#final;

View File

@ -278,6 +278,65 @@ impl<'a> FacetDistribution<'a> {
} }
} }
pub fn compute_stats(&self) -> Result<BTreeMap<String, (f64, f64)>> {
let fields_ids_map = self.index.fields_ids_map(self.rtxn)?;
let filterable_fields = self.index.filterable_fields(self.rtxn)?;
let candidates = if let Some(candidates) = self.candidates.clone() {
candidates
} else {
return Ok(Default::default());
};
let fields = match &self.facets {
Some(facets) => {
let invalid_fields: HashSet<_> = facets
.iter()
.filter(|facet| !crate::is_faceted(facet, &filterable_fields))
.collect();
if !invalid_fields.is_empty() {
return Err(UserError::InvalidFacetsDistribution {
invalid_facets_name: invalid_fields.into_iter().cloned().collect(),
valid_facets_name: filterable_fields.into_iter().collect(),
}
.into());
} else {
facets.clone()
}
}
None => filterable_fields,
};
let mut distribution = BTreeMap::new();
for (fid, name) in fields_ids_map.iter() {
if crate::is_faceted(name, &fields) {
let min_value = if let Some(min_value) = crate::search::criteria::facet_min_value(
self.index,
self.rtxn,
fid,
candidates.clone(),
)? {
min_value
} else {
continue;
};
let max_value = if let Some(max_value) = crate::search::criteria::facet_max_value(
self.index,
self.rtxn,
fid,
candidates.clone(),
)? {
max_value
} else {
continue;
};
distribution.insert(name.to_string(), (min_value, max_value));
}
}
Ok(distribution)
}
pub fn execute(&self) -> Result<BTreeMap<String, BTreeMap<String, u64>>> { pub fn execute(&self) -> Result<BTreeMap<String, BTreeMap<String, u64>>> {
let fields_ids_map = self.index.fields_ids_map(self.rtxn)?; let fields_ids_map = self.index.fields_ids_map(self.rtxn)?;
let filterable_fields = self.index.filterable_fields(self.rtxn)?; let filterable_fields = self.index.filterable_fields(self.rtxn)?;