From e5126af458c4c90628183b12ab92428022e98272 Mon Sep 17 00:00:00 2001 From: mpostma Date: Tue, 12 May 2020 11:22:09 +0200 Subject: [PATCH] enables facet count --- Cargo.lock | 2 +- meilisearch-core/src/bucket_sort.rs | 53 ++++++++++++--------- meilisearch-core/src/query_builder.rs | 30 +++++++----- meilisearch-core/src/store/facets.rs | 2 +- meilisearch-core/src/store/mod.rs | 2 +- meilisearch-http/Cargo.toml | 2 +- meilisearch-http/src/helpers/meilisearch.rs | 5 +- meilisearch-http/src/routes/search.rs | 50 +++++++++++-------- 8 files changed, 88 insertions(+), 58 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 61c7cd4f5..f56456fbc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1664,8 +1664,8 @@ dependencies = [ "mime", "pretty-bytes", "rand 0.7.3", - "sentry", "regex", + "sentry", "serde", "serde_json", "serde_qs", diff --git a/meilisearch-core/src/bucket_sort.rs b/meilisearch-core/src/bucket_sort.rs index 8f391d74c..337bd8cce 100644 --- a/meilisearch-core/src/bucket_sort.rs +++ b/meilisearch-core/src/bucket_sort.rs @@ -17,7 +17,6 @@ use slice_group_by::{GroupBy, GroupByMut}; use crate::error::Error; use crate::criterion::{Criteria, Context, ContextMut}; use crate::distinct_map::{BufferedDistinctMap, DistinctMap}; -use crate::facets::FacetKey; use crate::raw_document::RawDocument; use crate::{database::MainT, reordered_attrs::ReorderedAttrs}; use crate::{store, Document, DocumentId, MResult}; @@ -30,7 +29,8 @@ pub struct SortResult { pub documents: Vec, pub nb_hits: usize, pub is_exhaustive: bool, - pub facets: Option>, + pub facets: Option>>, + pub exhaustive_facet_count: Option, } pub fn bucket_sort<'c, FI>( @@ -38,7 +38,7 @@ pub fn bucket_sort<'c, FI>( query: &str, range: Range, facets_docids: Option>, - facet_count_docids: Option>>>, + facet_count_docids: Option>>>>, filter: Option, criteria: Criteria<'c>, searchable_attrs: Option, @@ -120,15 +120,10 @@ where docids = Cow::Owned(intersection); } - if let Some(facet_count_docids) = facet_count_docids { - let mut facets = HashMap::new(); - for (key, document_ids) in facet_count_docids { - let mut counter = Counter::new(); - let op = OpBuilder::new(document_ids.as_ref(), document_ids.as_ref()).intersection(); - SetOperation::::extend_collection(op, &mut counter); - facets.insert(key, counter.0); - } - result.facets = Some(facets); + if let Some(f) = facet_count_docids { + // hardcoded value, until approximation optimization + result.exhaustive_facet_count = Some(true); + result.facets = Some(facet_count(f, &docids)); } let before = Instant::now(); @@ -216,7 +211,7 @@ pub fn bucket_sort_with_distinct<'c, FI, FD>( query: &str, range: Range, facets_docids: Option>, - facet_count_docids: Option>>>, + facet_count_docids: Option>>>>, filter: Option, distinct: FD, distinct_size: usize, @@ -276,15 +271,10 @@ where docids = Cow::Owned(intersection); } - if let Some(facet_count_docids) = facet_count_docids { - let mut facets = HashMap::new(); - for (key, document_ids) in facet_count_docids { - let mut counter = Counter::new(); - let op = OpBuilder::new(document_ids.as_ref(), document_ids.as_ref()).intersection(); - SetOperation::::extend_collection(op, &mut counter); - facets.insert(key, counter.0); - } - result.facets = Some(facets); + if let Some(f) = facet_count_docids { + // hardcoded value, until approximation optimization + result.exhaustive_facet_count = Some(true); + result.facets = Some(facet_count(f, &docids)); } let before = Instant::now(); @@ -618,3 +608,22 @@ impl Deref for PostingsListView<'_> { } } } + +/// For each entry in facet_docids, calculates the number of documents in the intersection with candidate_docids. +fn facet_count( + facet_docids: HashMap>>>, + candidate_docids: &Set, +) -> HashMap> { + let mut facets_counts = HashMap::with_capacity(facet_docids.len()); + for (key, doc_map) in facet_docids { + let mut count_map = HashMap::with_capacity(doc_map.len()); + for (value, docids) in doc_map { + let mut counter = Counter::new(); + let op = OpBuilder::new(docids.as_ref(), candidate_docids).intersection(); + SetOperation::::extend_collection(op, &mut counter); + count_map.insert(value, counter.0); + } + facets_counts.insert(key, count_map); + } + facets_counts +} diff --git a/meilisearch-core/src/query_builder.rs b/meilisearch-core/src/query_builder.rs index 77c0421d6..5a995030e 100644 --- a/meilisearch-core/src/query_builder.rs +++ b/meilisearch-core/src/query_builder.rs @@ -12,7 +12,7 @@ use crate::facets::FacetFilter; use either::Either; use sdset::SetOperation; -use meilisearch_schema::FieldId; +use meilisearch_schema::{Schema, FieldId}; pub struct QueryBuilder<'c, 'f, 'd, 'i, 'q> { criteria: Criteria<'c>, @@ -21,8 +21,8 @@ pub struct QueryBuilder<'c, 'f, 'd, 'i, 'q> { distinct: Option<(Box Option + 'd>, usize)>, timeout: Option, index: &'i store::Index, - facet_fitlers: Option<&'q FacetFilter>, - facets: Option<&'q [FieldId]>, + facet_filter: Option, + facets: Option>, } impl<'c, 'f, 'd, 'i, 'q> QueryBuilder<'c, 'f, 'd, 'i, 'q> { @@ -34,8 +34,8 @@ impl<'c, 'f, 'd, 'i, 'q> QueryBuilder<'c, 'f, 'd, 'i, 'q> { } /// sets facet attributes to filter on - pub fn set_facet_filters(&mut self, facets: Option<&'q FacetFilter>) { - self.facet_fitlers = facets; + pub fn set_facet_filter(&mut self, facets: Option) { + self.facet_filter = facets; } /// sets facet attributes for which to return the count @@ -54,7 +54,7 @@ impl<'c, 'f, 'd, 'i, 'q> QueryBuilder<'c, 'f, 'd, 'i, 'q> { distinct: None, timeout: None, index, - facet_fitlers: None, + facet_filter: None, facets: None, } } @@ -87,8 +87,9 @@ impl<'c, 'f, 'd, 'i, 'q> QueryBuilder<'c, 'f, 'd, 'i, 'q> { reader: &heed::RoTxn, query: &str, range: Range, + schema: &Schema, ) -> MResult { - let facets_docids = match self.facet_fitlers { + let facets_docids = match self.facet_filter { Some(facets) => { let mut ands = Vec::with_capacity(facets.len()); let mut ors = Vec::new(); @@ -120,14 +121,21 @@ impl<'c, 'f, 'd, 'i, 'q> QueryBuilder<'c, 'f, 'd, 'i, 'q> { None => None }; + // for each field to retrieve the count for, create an HashMap associating the attribute + // value to a set of matching documents. The HashMaps are them collected in another + // HashMap, associating each HashMap to it's field. let facet_count_docids = match self.facets { Some(field_ids) => { let mut facet_count_map = HashMap::new(); for field_id in field_ids { - for pair in self.index.facets.field_document_ids(reader, *field_id)? { - let (facet_key, document_ids) = pair?; - let facet_key_string = facet_key.to_parts(schema)?; - facet_count_map.insert(facet_key, document_ids); + if let Some(field_name) = schema.name(*field_id) { + let mut key_map = HashMap::new(); + for pair in self.index.facets.field_document_ids(reader, *field_id)? { + let (facet_key, document_ids) = pair?; + let value = facet_key.value(); + key_map.insert(value.to_string(), document_ids); + } + facet_count_map.insert(field_name.to_string(), key_map); } } Some(facet_count_map) diff --git a/meilisearch-core/src/store/facets.rs b/meilisearch-core/src/store/facets.rs index 907ceae9b..216b423c9 100644 --- a/meilisearch-core/src/store/facets.rs +++ b/meilisearch-core/src/store/facets.rs @@ -24,7 +24,7 @@ impl Facets { } pub fn field_document_ids<'txn>(&self, reader: &'txn RoTxn, field_id: FieldId) -> ZResult>> { - self.facets.prefix_iter(reader, &FacetKey::new(field_id, "".to_string())) + self.facets.prefix_iter(reader, &FacetKey::new(field_id, String::new())) } pub fn facet_document_ids<'txn>(&self, reader: &'txn RoTxn, facet_key: &FacetKey) -> ZResult>>> { diff --git a/meilisearch-core/src/store/mod.rs b/meilisearch-core/src/store/mod.rs index ddf6614f5..fb3d68849 100644 --- a/meilisearch-core/src/store/mod.rs +++ b/meilisearch-core/src/store/mod.rs @@ -363,7 +363,7 @@ impl Index { QueryBuilder::new(self) } - pub fn query_builder_with_criteria<'c, 'f, 'd, 'fa, 'i, 'q>( + pub fn query_builder_with_criteria<'c, 'f, 'd, 'i>( &'i self, criteria: Criteria<'c>, ) -> QueryBuilder<'c, 'f, 'd, 'i, 'q> { diff --git a/meilisearch-http/Cargo.toml b/meilisearch-http/Cargo.toml index 8e4876397..6b4c95514 100644 --- a/meilisearch-http/Cargo.toml +++ b/meilisearch-http/Cargo.toml @@ -14,7 +14,7 @@ name = "meilisearch" path = "src/main.rs" [features] -default = ["sentry"] +#default = ["sentry"] [dependencies] actix-cors = "0.2.0" diff --git a/meilisearch-http/src/helpers/meilisearch.rs b/meilisearch-http/src/helpers/meilisearch.rs index 13fdea2d4..66ff58b61 100644 --- a/meilisearch-http/src/helpers/meilisearch.rs +++ b/meilisearch-http/src/helpers/meilisearch.rs @@ -157,7 +157,7 @@ impl<'a> SearchBuilder<'a> { query_builder.set_facets(self.facets.as_deref()); let start = Instant::now(); - let result = query_builder.query(reader, &self.query, self.offset..(self.offset + self.limit)); + let result = query_builder.query(reader, &self.query, self.offset..(self.offset + self.limit), &schema); let search_result = result.map_err(ResponseError::search_documents)?; let time_ms = start.elapsed().as_millis() as usize; @@ -247,7 +247,7 @@ impl<'a> SearchBuilder<'a> { exhaustive_nb_hits: search_result.is_exhaustive, processing_time_ms: time_ms, query: self.query.to_string(), - facets: search_result.facets + facets: search_result.facets, }; Ok(results) @@ -332,6 +332,7 @@ pub struct SearchResult { pub exhaustive_nb_hits: bool, pub processing_time_ms: usize, pub query: String, + pub facets: Option>>, } /// returns the start index and the length on the crop. diff --git a/meilisearch-http/src/routes/search.rs b/meilisearch-http/src/routes/search.rs index 0468784f5..731a6ec90 100644 --- a/meilisearch-http/src/routes/search.rs +++ b/meilisearch-http/src/routes/search.rs @@ -33,7 +33,7 @@ struct SearchQuery { filters: Option, matches: Option, facet_filters: Option, - facets: Option + facets: Option, } #[get("/indexes/{index_uid}/search", wrap = "Authentication::Public")] @@ -94,9 +94,12 @@ async fn search_with_url_query( } } - if let Some(ref facets) = params.facets { + if let Some(facets) = ¶ms.facets { match index.main.attributes_for_faceting(&reader)? { - Some(ref attrs) => { search_builder.add_facets(prepare_facet_list(facets, &schema, attrs)?); }, + Some(ref attrs) => { + let field_ids = prepare_facet_list(&facets, &schema, attrs)?; + search_builder.add_facets(field_ids); + }, None => return Err(ResponseError::FacetExpression("can't return facets count, as no facet is set".to_string())) } @@ -162,26 +165,35 @@ async fn search_with_url_query( Ok(HttpResponse::Ok().json(search_builder.search(&reader)?)) } -fn prepare_facet_list<'fa>(facets: &str, schema: &Schema, facet_attrs: &'fa [FieldId]) -> Result, ResponseError> { - let facet_array = serde_json::from_str(facets).expect("do error handling"); // TODO - match facet_array { - Value::Array(facet_array) => { - let wild_card = Value::String("*".to_string()); - if facet_array.iter().any(|it| it == &wild_card) { - return Ok(Vec::from(facet_attrs)); // TODO can make cow? +/// Parses the incoming string into an array of attributes for which to return a count. It returns +/// a Vec of attribute names ascociated with their id. +/// +/// An error is returned if the array is malformed, or if it contains attributes that are +/// unexisting, or not set as facets. +fn prepare_facet_list(facets: &str, schema: &Schema, facet_attrs: &[FieldId]) -> Result, FacetCountError> { + let json_array = serde_json::from_str(facets)?; + match json_array { + Value::Array(vals) => { + let wildcard = Value::String("*".to_string()); + if vals.iter().any(|f| f == &wildcard) { + return Ok(Vec::from(facet_attrs)); } - let mut fields = Vec::with_capacity(facet_attrs.len()); - for v in facet_array { - match v { - Value::String(name) => { - let id = schema.id(&name).expect("not found error"); // TODO - fields.push(id); + let mut field_ids = Vec::new(); + for facet in vals { + match facet { + Value::String(facet) => { + if let Some(id) = schema.id(&facet) { + if !facet_attrs.contains(&id) { + return Err(ResponseError::FacetExpression("Only attributes set as facet can be counted".to_string())); // TODO make special error + } + field_ids.push(id); + } } - _ => todo!("expected string, found {}", v), + bad_val => return Err(ResponseError::FacetExpression(format!("expected String found {}", bad_val))) } } - return Ok(fields); + Ok(field_ids) } - _ => todo!("error, bad syntax, expected array") + bad_val => return Err(ResponseError::FacetExpression(format!("expected Array found {}", bad_val))) } }