diff --git a/meilisearch-core/src/bucket_sort.rs b/meilisearch-core/src/bucket_sort.rs index 23e7149a2..ec45f36d0 100644 --- a/meilisearch-core/src/bucket_sort.rs +++ b/meilisearch-core/src/bucket_sort.rs @@ -11,7 +11,7 @@ use std::fmt; use compact_arena::{SmallArena, Idx32, mk_arena}; use log::debug; use meilisearch_types::DocIndex; -use sdset::{Set, SetBuf, exponential_search}; +use sdset::{Set, SetBuf, exponential_search, SetOperation}; use slice_group_by::{GroupBy, GroupByMut}; use crate::error::Error; @@ -28,6 +28,7 @@ pub fn bucket_sort<'c, FI>( reader: &heed::RoTxn, query: &str, range: Range, + facets_docids: Option>, filter: Option, criteria: Criteria<'c>, searchable_attrs: Option, @@ -50,6 +51,7 @@ where reader, query, range, + facets_docids, filter, distinct, distinct_size, @@ -94,10 +96,17 @@ where let mut queries_kinds = HashMap::new(); recurs_operation(&mut queries_kinds, &operation); - let QueryResult { docids, queries } = traverse_query_tree(reader, &context, &operation)?; + let QueryResult { mut docids, queries } = traverse_query_tree(reader, &context, &operation)?; debug!("found {} documents", docids.len()); debug!("number of postings {:?}", queries.len()); + if let Some(facets_docids) = facets_docids { + let intersection = sdset::duo::OpBuilder::new(docids.as_ref(), facets_docids.as_set()) + .intersection() + .into_set_buf(); + docids = Cow::Owned(intersection); + } + let before = Instant::now(); mk_arena!(arena); let mut bare_matches = cleanup_bare_matches(&mut arena, &docids, queries); @@ -179,6 +188,7 @@ pub fn bucket_sort_with_distinct<'c, FI, FD>( reader: &heed::RoTxn, query: &str, range: Range, + facets_docids: Option>, filter: Option, distinct: FD, distinct_size: usize, @@ -225,10 +235,17 @@ where let mut queries_kinds = HashMap::new(); recurs_operation(&mut queries_kinds, &operation); - let QueryResult { docids, queries } = traverse_query_tree(reader, &context, &operation)?; + let QueryResult { mut docids, queries } = traverse_query_tree(reader, &context, &operation)?; debug!("found {} documents", docids.len()); debug!("number of postings {:?}", queries.len()); + if let Some(facets_docids) = facets_docids { + let intersection = sdset::duo::OpBuilder::new(docids.as_ref(), facets_docids.as_set()) + .intersection() + .into_set_buf(); + docids = Cow::Owned(intersection); + } + let before = Instant::now(); mk_arena!(arena); let mut bare_matches = cleanup_bare_matches(&mut arena, &docids, queries); diff --git a/meilisearch-core/src/query_builder.rs b/meilisearch-core/src/query_builder.rs index d95c19713..b3bbf21bf 100644 --- a/meilisearch-core/src/query_builder.rs +++ b/meilisearch-core/src/query_builder.rs @@ -1,66 +1,50 @@ -use std::ops::Range; +use std::borrow::Cow; +use std::ops::{Range, Deref}; use std::time::Duration; use crate::database::MainT; use crate::bucket_sort::{bucket_sort, bucket_sort_with_distinct}; use crate::{criterion::Criteria, Document, DocumentId}; use crate::{reordered_attrs::ReorderedAttrs, store, MResult}; +use crate::facets::FacetFilter; -pub struct QueryBuilder<'c, 'f, 'd> { +use either::Either; +use sdset::SetOperation; + +pub struct QueryBuilder<'c, 'f, 'd, 'fa, 'i> { criteria: Criteria<'c>, searchable_attrs: Option, filter: Option bool + 'f>>, distinct: Option<(Box Option + 'd>, usize)>, timeout: Option, - main_store: store::Main, - postings_lists_store: store::PostingsLists, - documents_fields_counts_store: store::DocumentsFieldsCounts, - synonyms_store: store::Synonyms, - prefix_documents_cache_store: store::PrefixDocumentsCache, - prefix_postings_lists_cache_store: store::PrefixPostingsListsCache, + index: &'i store::Index, + facets: Option<&'fa FacetFilter>, } -impl<'c, 'f, 'd> QueryBuilder<'c, 'f, 'd> { - pub fn new( - main: store::Main, - postings_lists: store::PostingsLists, - documents_fields_counts: store::DocumentsFieldsCounts, - synonyms: store::Synonyms, - prefix_documents_cache: store::PrefixDocumentsCache, - prefix_postings_lists_cache: store::PrefixPostingsListsCache, - ) -> QueryBuilder<'c, 'f, 'd> { +impl<'c, 'f, 'd, 'fa, 'i> QueryBuilder<'c, 'f, 'd, 'fa, 'i> { + pub fn new(index: &'i store::Index) -> Self { QueryBuilder::with_criteria( - main, - postings_lists, - documents_fields_counts, - synonyms, - prefix_documents_cache, - prefix_postings_lists_cache, + index, Criteria::default(), ) } + pub fn set_facets(&mut self, facets: Option<&'fa FacetFilter>) { + self.facets = facets; + } + pub fn with_criteria( - main: store::Main, - postings_lists: store::PostingsLists, - documents_fields_counts: store::DocumentsFieldsCounts, - synonyms: store::Synonyms, - prefix_documents_cache: store::PrefixDocumentsCache, - prefix_postings_lists_cache: store::PrefixPostingsListsCache, + index: &'i store::Index, criteria: Criteria<'c>, - ) -> QueryBuilder<'c, 'f, 'd> { + ) -> Self { QueryBuilder { criteria, searchable_attrs: None, filter: None, distinct: None, timeout: None, - main_store: main, - postings_lists_store: postings_lists, - documents_fields_counts_store: documents_fields_counts, - synonyms_store: synonyms, - prefix_documents_cache_store: prefix_documents_cache, - prefix_postings_lists_cache_store: prefix_postings_lists_cache, + index, + facets: None, } } @@ -93,36 +77,70 @@ impl<'c, 'f, 'd> QueryBuilder<'c, 'f, 'd> { query: &str, range: Range, ) -> MResult<(Vec, usize)> { + let facets_docids = match self.facets { + Some(facets) => { + let mut ands = Vec::with_capacity(facets.len()); + let mut ors = Vec::new(); + for f in facets.deref() { + match f { + Either::Left(keys) => { + ors.reserve(keys.len()); + for key in keys { + let docids = self.index.facets.facet_document_ids(reader, &key)?.unwrap_or_default(); + ors.push(docids); + } + let sets: Vec<_> = ors.iter().map(Cow::deref).collect(); + let or_result = sdset::multi::OpBuilder::from_vec(sets).union().into_set_buf(); + ands.push(Cow::Owned(or_result)); + ors.clear(); + } + Either::Right(key) =>{ + match self.index.facets.facet_document_ids(reader, &key)? { + Some(docids) => ands.push(docids), + // no candidates for search, early return. + None => return Ok((vec![], 0)), + } + } + }; + } + let ands: Vec<_> = ands.iter().map(Cow::deref).collect(); + Some(sdset::multi::OpBuilder::from_vec(ands).intersection().into_set_buf()) + } + None => None + }; + match self.distinct { Some((distinct, distinct_size)) => bucket_sort_with_distinct( reader, query, range, + facets_docids, self.filter, distinct, distinct_size, self.criteria, self.searchable_attrs, - self.main_store, - self.postings_lists_store, - self.documents_fields_counts_store, - self.synonyms_store, - self.prefix_documents_cache_store, - self.prefix_postings_lists_cache_store, + self.index.main, + self.index.postings_lists, + self.index.documents_fields_counts, + self.index.synonyms, + self.index.prefix_documents_cache, + self.index.prefix_postings_lists_cache, ), None => bucket_sort( reader, query, range, + facets_docids, self.filter, self.criteria, self.searchable_attrs, - self.main_store, - self.postings_lists_store, - self.documents_fields_counts_store, - self.synonyms_store, - self.prefix_documents_cache_store, - self.prefix_postings_lists_cache_store, + self.index.main, + self.index.postings_lists, + self.index.documents_fields_counts, + self.index.synonyms, + self.index.prefix_documents_cache, + self.index.prefix_postings_lists_cache, ), } } diff --git a/meilisearch-http/src/helpers/meilisearch.rs b/meilisearch-http/src/helpers/meilisearch.rs index 6c8046826..94f496c07 100644 --- a/meilisearch-http/src/helpers/meilisearch.rs +++ b/meilisearch-http/src/helpers/meilisearch.rs @@ -6,6 +6,7 @@ use std::time::Instant; use indexmap::IndexMap; use log::error; use meilisearch_core::Filter; +use meilisearch_core::facets::FacetFilter; use meilisearch_core::criterion::*; use meilisearch_core::settings::RankingRule; use meilisearch_core::{Highlight, Index, MainT, RankedMap}; @@ -34,6 +35,7 @@ impl IndexSearchExt for Index { attributes_to_highlight: None, filters: None, matches: false, + facet_filters: None, } } } @@ -48,6 +50,7 @@ pub struct SearchBuilder<'a> { attributes_to_highlight: Option>, filters: Option, matches: bool, + facet_filters: Option, } impl<'a> SearchBuilder<'a> { @@ -82,6 +85,11 @@ impl<'a> SearchBuilder<'a> { self } + pub fn add_facet_filters(&mut self, filters: FacetFilter) -> &SearchBuilder { + self.facet_filters = Some(filters); + self + } + pub fn filters(&mut self, value: String) -> &SearchBuilder { self.filters = Some(value); self @@ -138,6 +146,8 @@ impl<'a> SearchBuilder<'a> { } } + query_builder.set_facets(self.facet_filters.as_ref()); + let start = Instant::now(); let result = query_builder.query(reader, &self.query, self.offset..(self.offset + self.limit)); let (docs, nb_hits) = result.map_err(ResponseError::search_documents)?; diff --git a/meilisearch-http/src/routes/search.rs b/meilisearch-http/src/routes/search.rs index 1189e79a8..846fc8904 100644 --- a/meilisearch-http/src/routes/search.rs +++ b/meilisearch-http/src/routes/search.rs @@ -12,6 +12,8 @@ use crate::helpers::Authentication; use crate::routes::IndexParam; use crate::Data; +use meilisearch_core::facets::FacetFilter; + pub fn services(cfg: &mut web::ServiceConfig) { cfg.service(search_with_url_query); } @@ -28,6 +30,7 @@ struct SearchQuery { attributes_to_highlight: Option, filters: Option, matches: Option, + facet_filters: Option, } #[get("/indexes/{index_uid}/search", wrap = "Authentication::Public")] @@ -81,6 +84,13 @@ async fn search_with_url_query( } } + if let Some(ref facet_filters) = params.facet_filters { + match index.main.attributes_for_faceting(&reader)? { + Some(ref attrs) => { search_builder.add_facet_filters(FacetFilter::from_str(facet_filters, &schema, attrs)?); }, + None => return Err(ResponseError::FacetExpression("can't filter on facets, as no facet is set".to_string())) + } + } + if let Some(attributes_to_crop) = ¶ms.attributes_to_crop { let default_length = params.crop_length.unwrap_or(200); let mut final_attributes: HashMap = HashMap::new();