From 18a50b4dacdc4764554c9bdeea87e16e60532bcc Mon Sep 17 00:00:00 2001 From: mpostma Date: Fri, 26 Jun 2020 12:59:20 +0200 Subject: [PATCH] fix facet distribution case --- meilisearch-core/src/bucket_sort.rs | 12 ++--- meilisearch-core/src/facets.rs | 14 +++--- meilisearch-core/src/query_builder.rs | 8 ++-- meilisearch-core/src/store/facets.rs | 63 +++++++++++++++++++++------ 4 files changed, 67 insertions(+), 30 deletions(-) diff --git a/meilisearch-core/src/bucket_sort.rs b/meilisearch-core/src/bucket_sort.rs index 01200008b..e2ad64b70 100644 --- a/meilisearch-core/src/bucket_sort.rs +++ b/meilisearch-core/src/bucket_sort.rs @@ -39,7 +39,7 @@ pub fn bucket_sort<'c, FI>( query: &str, range: Range, facets_docids: Option>, - facet_count_docids: Option>>>>, + facet_count_docids: Option>)>>>, filter: Option, criteria: Criteria<'c>, searchable_attrs: Option, @@ -199,7 +199,7 @@ pub fn bucket_sort_with_distinct<'c, FI, FD>( query: &str, range: Range, facets_docids: Option>, - facet_count_docids: Option>>>>, + facet_count_docids: Option>)>>>, filter: Option, distinct: FD, distinct_size: usize, @@ -636,18 +636,18 @@ pub fn placeholder_document_sort( } /// For each entry in facet_docids, calculates the number of documents in the intersection with candidate_docids. -pub fn facet_count( - facet_docids: HashMap>>>, +fn facet_count( + facet_docids: HashMap>)>>, candidate_docids: &Set, ) -> HashMap> { let mut facets_counts = HashMap::with_capacity(facet_docids.len()); for (key, doc_map) in facet_docids { let mut count_map = HashMap::with_capacity(doc_map.len()); - for (value, docids) in doc_map { + for (_, (value, docids)) in doc_map { let mut counter = Counter::new(); let op = OpBuilder::new(docids.as_ref(), candidate_docids).intersection(); SetOperation::::extend_collection(op, &mut counter); - count_map.insert(value, counter.0); + count_map.insert(value.to_string(), counter.0); } facets_counts.insert(key, count_map); } diff --git a/meilisearch-core/src/facets.rs b/meilisearch-core/src/facets.rs index 1295224b7..11135c179 100644 --- a/meilisearch-core/src/facets.rs +++ b/meilisearch-core/src/facets.rs @@ -164,7 +164,7 @@ impl<'a> heed::BytesDecode<'a> for FacetKey { } pub fn add_to_facet_map( - facet_map: &mut HashMap>, + facet_map: &mut HashMap)>, field_id: FieldId, value: Value, document_id: DocumentId, @@ -175,8 +175,8 @@ pub fn add_to_facet_map( Value::Null => return Ok(()), value => return Err(FacetError::InvalidDocumentAttribute(value.to_string())), }; - let key = FacetKey::new(field_id, value); - facet_map.entry(key).or_insert_with(Vec::new).push(document_id); + let key = FacetKey::new(field_id, value.clone()); + facet_map.entry(key).or_insert_with(|| (value, Vec::new())).1.push(document_id); Ok(()) } @@ -185,8 +185,10 @@ pub fn facet_map_from_docids( index: &crate::Index, document_ids: &[DocumentId], attributes_for_facetting: &[FieldId], -) -> MResult>> { - let mut facet_map = HashMap::new(); +) -> MResult)>> { + // A hashmap that ascociate a facet key to a pair containing the original facet attribute + // string with it's case preserved, and a list of document ids for that facet attribute. + let mut facet_map: HashMap)> = HashMap::new(); for document_id in document_ids { for result in index .documents_fields @@ -212,7 +214,7 @@ pub fn facet_map_from_docs( schema: &Schema, documents: &HashMap>, attributes_for_facetting: &[FieldId], -) -> MResult>> { +) -> MResult)>> { let mut facet_map = HashMap::new(); let attributes_for_facetting = attributes_for_facetting .iter() diff --git a/meilisearch-core/src/query_builder.rs b/meilisearch-core/src/query_builder.rs index effaa3f2a..12a134291 100644 --- a/meilisearch-core/src/query_builder.rs +++ b/meilisearch-core/src/query_builder.rs @@ -97,16 +97,14 @@ impl<'c, 'f, 'd, 'i> QueryBuilder<'c, 'f, 'd, 'i> { .unwrap_or_default(); ors.push(docids); } - let sets: Vec<_> = ors.iter().map(Cow::deref).collect(); - let or_result = sdset::multi::OpBuilder::from_vec(sets) - .union() - .into_set_buf(); + let sets: Vec<_> = ors.iter().map(|i| &i.1).map(Cow::deref).collect(); + let or_result = sdset::multi::OpBuilder::from_vec(sets).union().into_set_buf(); ands.push(Cow::Owned(or_result)); ors.clear(); } Either::Right(key) => { match self.index.facets.facet_document_ids(reader, &key)? { - Some(docids) => ands.push(docids), + Some(docids) => ands.push(docids.1), // no candidates for search, early return. None => return Ok(Some(SetBuf::default())), } diff --git a/meilisearch-core/src/store/facets.rs b/meilisearch-core/src/store/facets.rs index 216b423c9..9a3dde6f9 100644 --- a/meilisearch-core/src/store/facets.rs +++ b/meilisearch-core/src/store/facets.rs @@ -1,7 +1,8 @@ use std::borrow::Cow; use std::collections::HashMap; +use std::mem; -use heed::{RwTxn, RoTxn, Result as ZResult, RoRange}; +use heed::{RwTxn, RoTxn, Result as ZResult, RoRange, types::Str, BytesEncode, BytesDecode}; use sdset::{SetBuf, Set, SetOperation}; use meilisearch_types::DocumentId; @@ -14,40 +15,76 @@ use super::cow_set::CowSet; /// contains facet info #[derive(Clone, Copy)] pub struct Facets { - pub(crate) facets: heed::Database>, + pub(crate) facets: heed::Database, +} + +pub struct FacetData; + +impl<'a> BytesEncode<'a> for FacetData { + type EItem = (&'a str, &'a Set); + + fn bytes_encode(item: &'a Self::EItem) -> Option> { + // get size of the first item + let first_size = item.0.as_bytes().len(); + let size = mem::size_of::() + + first_size + + item.1.len() * mem::size_of::(); + let mut buffer = Vec::with_capacity(size); + // encode the length of the first item + buffer.extend_from_slice(&first_size.to_be_bytes()); + buffer.extend_from_slice(Str::bytes_encode(&item.0)?.as_ref()); + let second_slice = CowSet::bytes_encode(&item.1)?; + buffer.extend_from_slice(second_slice.as_ref()); + Some(Cow::Owned(buffer)) + } +} + +impl<'a> BytesDecode<'a> for FacetData { + type DItem = (&'a str, Cow<'a, Set>); + + fn bytes_decode(bytes: &'a [u8]) -> Option { + let mut size_buf = [0; 8]; + size_buf.copy_from_slice(bytes.get(0..8)?); + // decode size of the first item from the bytes + let first_size = usize::from_be_bytes(size_buf); + // decode first and second items + let first_item = Str::bytes_decode(bytes.get(8..(8 + first_size))?)?; + let second_item = CowSet::bytes_decode(bytes.get((8 + first_size)..)?)?; + Some((first_item, second_item)) + } } impl Facets { // we use sdset::SetBuf to ensure the docids are sorted. - pub fn put_facet_document_ids(&self, writer: &mut RwTxn, facet_key: FacetKey, doc_ids: &Set) -> ZResult<()> { - self.facets.put(writer, &facet_key, doc_ids) + pub fn put_facet_document_ids(&self, writer: &mut RwTxn, facet_key: FacetKey, doc_ids: &Set, facet_value: &str) -> ZResult<()> { + self.facets.put(writer, &facet_key, &(facet_value, doc_ids)) } - pub fn field_document_ids<'txn>(&self, reader: &'txn RoTxn, field_id: FieldId) -> ZResult>> { + pub fn field_document_ids<'txn>(&self, reader: &'txn RoTxn, field_id: FieldId) -> ZResult> { self.facets.prefix_iter(reader, &FacetKey::new(field_id, String::new())) } - pub fn facet_document_ids<'txn>(&self, reader: &'txn RoTxn, facet_key: &FacetKey) -> ZResult>>> { + pub fn facet_document_ids<'txn>(&self, reader: &'txn RoTxn, facet_key: &FacetKey) -> ZResult>)>> { self.facets.get(reader, &facet_key) } /// updates the facets store, revmoving the documents from the facets provided in the /// `facet_map` argument - pub fn remove(&self, writer: &mut RwTxn, facet_map: HashMap>) -> ZResult<()> { - for (key, document_ids) in facet_map { - if let Some(old) = self.facets.get(writer, &key)? { + pub fn remove(&self, writer: &mut RwTxn, facet_map: HashMap)>) -> ZResult<()> { + for (key, (name, document_ids)) in facet_map { + if let Some((_, old)) = self.facets.get(writer, &key)? { let to_remove = SetBuf::from_dirty(document_ids); let new = sdset::duo::OpBuilder::new(old.as_ref(), to_remove.as_set()).difference().into_set_buf(); - self.facets.put(writer, &key, new.as_set())?; + self.facets.put(writer, &key, &(&name, new.as_set()))?; } } Ok(()) } - pub fn add(&self, writer: &mut RwTxn, facet_map: HashMap>) -> ZResult<()> { - for (key, document_ids) in facet_map { + pub fn add(&self, writer: &mut RwTxn, facet_map: HashMap)>) -> ZResult<()> { + for (key, (facet_name, document_ids)) in facet_map { let set = SetBuf::from_dirty(document_ids); - self.put_facet_document_ids(writer, key, set.as_set())?; + self.put_facet_document_ids(writer, key, set.as_set(), &facet_name)?; } Ok(()) }