enables facet count

This commit is contained in:
mpostma 2020-05-12 11:22:09 +02:00
parent effbb7f7f1
commit e5126af458
8 changed files with 88 additions and 58 deletions

View file

@ -17,7 +17,6 @@ use slice_group_by::{GroupBy, GroupByMut};
use crate::error::Error;
use crate::criterion::{Criteria, Context, ContextMut};
use crate::distinct_map::{BufferedDistinctMap, DistinctMap};
use crate::facets::FacetKey;
use crate::raw_document::RawDocument;
use crate::{database::MainT, reordered_attrs::ReorderedAttrs};
use crate::{store, Document, DocumentId, MResult};
@ -30,7 +29,8 @@ pub struct SortResult {
pub documents: Vec<Document>,
pub nb_hits: usize,
pub is_exhaustive: bool,
pub facets: Option<HashMap<FacetKey, usize>>,
pub facets: Option<HashMap<String, HashMap<String, usize>>>,
pub exhaustive_facet_count: Option<bool>,
}
pub fn bucket_sort<'c, FI>(
@ -38,7 +38,7 @@ pub fn bucket_sort<'c, FI>(
query: &str,
range: Range<usize>,
facets_docids: Option<SetBuf<DocumentId>>,
facet_count_docids: Option<HashMap<FacetKey, Cow<Set<DocumentId>>>>,
facet_count_docids: Option<HashMap<String, HashMap<String, Cow<Set<DocumentId>>>>>,
filter: Option<FI>,
criteria: Criteria<'c>,
searchable_attrs: Option<ReorderedAttrs>,
@ -120,15 +120,10 @@ where
docids = Cow::Owned(intersection);
}
if let Some(facet_count_docids) = facet_count_docids {
let mut facets = HashMap::new();
for (key, document_ids) in facet_count_docids {
let mut counter = Counter::new();
let op = OpBuilder::new(document_ids.as_ref(), document_ids.as_ref()).intersection();
SetOperation::<DocumentId>::extend_collection(op, &mut counter);
facets.insert(key, counter.0);
}
result.facets = Some(facets);
if let Some(f) = facet_count_docids {
// hardcoded value, until approximation optimization
result.exhaustive_facet_count = Some(true);
result.facets = Some(facet_count(f, &docids));
}
let before = Instant::now();
@ -216,7 +211,7 @@ pub fn bucket_sort_with_distinct<'c, FI, FD>(
query: &str,
range: Range<usize>,
facets_docids: Option<SetBuf<DocumentId>>,
facet_count_docids: Option<HashMap<FacetKey, Cow<Set<DocumentId>>>>,
facet_count_docids: Option<HashMap<String, HashMap<String, Cow<Set<DocumentId>>>>>,
filter: Option<FI>,
distinct: FD,
distinct_size: usize,
@ -276,15 +271,10 @@ where
docids = Cow::Owned(intersection);
}
if let Some(facet_count_docids) = facet_count_docids {
let mut facets = HashMap::new();
for (key, document_ids) in facet_count_docids {
let mut counter = Counter::new();
let op = OpBuilder::new(document_ids.as_ref(), document_ids.as_ref()).intersection();
SetOperation::<DocumentId>::extend_collection(op, &mut counter);
facets.insert(key, counter.0);
}
result.facets = Some(facets);
if let Some(f) = facet_count_docids {
// hardcoded value, until approximation optimization
result.exhaustive_facet_count = Some(true);
result.facets = Some(facet_count(f, &docids));
}
let before = Instant::now();
@ -618,3 +608,22 @@ impl Deref for PostingsListView<'_> {
}
}
}
/// For each entry in facet_docids, calculates the number of documents in the intersection with candidate_docids.
fn facet_count(
facet_docids: HashMap<String, HashMap<String, Cow<Set<DocumentId>>>>,
candidate_docids: &Set<DocumentId>,
) -> HashMap<String, HashMap<String, usize>> {
let mut facets_counts = HashMap::with_capacity(facet_docids.len());
for (key, doc_map) in facet_docids {
let mut count_map = HashMap::with_capacity(doc_map.len());
for (value, docids) in doc_map {
let mut counter = Counter::new();
let op = OpBuilder::new(docids.as_ref(), candidate_docids).intersection();
SetOperation::<DocumentId>::extend_collection(op, &mut counter);
count_map.insert(value, counter.0);
}
facets_counts.insert(key, count_map);
}
facets_counts
}

View file

@ -12,7 +12,7 @@ use crate::facets::FacetFilter;
use either::Either;
use sdset::SetOperation;
use meilisearch_schema::FieldId;
use meilisearch_schema::{Schema, FieldId};
pub struct QueryBuilder<'c, 'f, 'd, 'i, 'q> {
criteria: Criteria<'c>,
@ -21,8 +21,8 @@ pub struct QueryBuilder<'c, 'f, 'd, 'i, 'q> {
distinct: Option<(Box<dyn Fn(DocumentId) -> Option<u64> + 'd>, usize)>,
timeout: Option<Duration>,
index: &'i store::Index,
facet_fitlers: Option<&'q FacetFilter>,
facets: Option<&'q [FieldId]>,
facet_filter: Option<FacetFilter>,
facets: Option<Vec<(FieldId, String)>>,
}
impl<'c, 'f, 'd, 'i, 'q> QueryBuilder<'c, 'f, 'd, 'i, 'q> {
@ -34,8 +34,8 @@ impl<'c, 'f, 'd, 'i, 'q> QueryBuilder<'c, 'f, 'd, 'i, 'q> {
}
/// sets facet attributes to filter on
pub fn set_facet_filters(&mut self, facets: Option<&'q FacetFilter>) {
self.facet_fitlers = facets;
pub fn set_facet_filter(&mut self, facets: Option<FacetFilter>) {
self.facet_filter = facets;
}
/// sets facet attributes for which to return the count
@ -54,7 +54,7 @@ impl<'c, 'f, 'd, 'i, 'q> QueryBuilder<'c, 'f, 'd, 'i, 'q> {
distinct: None,
timeout: None,
index,
facet_fitlers: None,
facet_filter: None,
facets: None,
}
}
@ -87,8 +87,9 @@ impl<'c, 'f, 'd, 'i, 'q> QueryBuilder<'c, 'f, 'd, 'i, 'q> {
reader: &heed::RoTxn<MainT>,
query: &str,
range: Range<usize>,
schema: &Schema,
) -> MResult<SortResult> {
let facets_docids = match self.facet_fitlers {
let facets_docids = match self.facet_filter {
Some(facets) => {
let mut ands = Vec::with_capacity(facets.len());
let mut ors = Vec::new();
@ -120,14 +121,21 @@ impl<'c, 'f, 'd, 'i, 'q> QueryBuilder<'c, 'f, 'd, 'i, 'q> {
None => None
};
// for each field to retrieve the count for, create an HashMap associating the attribute
// value to a set of matching documents. The HashMaps are them collected in another
// HashMap, associating each HashMap to it's field.
let facet_count_docids = match self.facets {
Some(field_ids) => {
let mut facet_count_map = HashMap::new();
for field_id in field_ids {
for pair in self.index.facets.field_document_ids(reader, *field_id)? {
let (facet_key, document_ids) = pair?;
let facet_key_string = facet_key.to_parts(schema)?;
facet_count_map.insert(facet_key, document_ids);
if let Some(field_name) = schema.name(*field_id) {
let mut key_map = HashMap::new();
for pair in self.index.facets.field_document_ids(reader, *field_id)? {
let (facet_key, document_ids) = pair?;
let value = facet_key.value();
key_map.insert(value.to_string(), document_ids);
}
facet_count_map.insert(field_name.to_string(), key_map);
}
}
Some(facet_count_map)

View file

@ -24,7 +24,7 @@ impl Facets {
}
pub fn field_document_ids<'txn>(&self, reader: &'txn RoTxn<MainT>, field_id: FieldId) -> ZResult<RoRange<'txn, FacetKey, CowSet<DocumentId>>> {
self.facets.prefix_iter(reader, &FacetKey::new(field_id, "".to_string()))
self.facets.prefix_iter(reader, &FacetKey::new(field_id, String::new()))
}
pub fn facet_document_ids<'txn>(&self, reader: &'txn RoTxn<MainT>, facet_key: &FacetKey) -> ZResult<Option<Cow<'txn, Set<DocumentId>>>> {

View file

@ -363,7 +363,7 @@ impl Index {
QueryBuilder::new(self)
}
pub fn query_builder_with_criteria<'c, 'f, 'd, 'fa, 'i, 'q>(
pub fn query_builder_with_criteria<'c, 'f, 'd, 'i>(
&'i self,
criteria: Criteria<'c>,
) -> QueryBuilder<'c, 'f, 'd, 'i, 'q> {