setting up facets

This commit is contained in:
mpostma 2020-05-05 22:19:34 +02:00
parent dd08cfc6a3
commit 59c67f6bc8
11 changed files with 633 additions and 139 deletions

View file

@ -0,0 +1,32 @@
use std::borrow::Cow;
use heed::{types::CowSlice, BytesEncode, BytesDecode};
use sdset::{Set, SetBuf};
use zerocopy::{AsBytes, FromBytes};
pub struct CowSet<T>(std::marker::PhantomData<T>);
impl<'a, T: 'a> BytesEncode<'a> for CowSet<T>
where
T: AsBytes,
{
type EItem = Set<T>;
fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<[u8]>> {
CowSlice::bytes_encode(item.as_slice())
}
}
impl<'a, T: 'a> BytesDecode<'a> for CowSet<T>
where
T: FromBytes + Copy,
{
type DItem = Cow<'a, Set<T>>;
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
match CowSlice::<T>::bytes_decode(bytes)? {
Cow::Owned(vec) => Some(Cow::Owned(SetBuf::new_unchecked(vec))),
Cow::Borrowed(slice) => Some(Cow::Borrowed(Set::new_unchecked(slice))),
}
}
}

View file

@ -0,0 +1,53 @@
use std::borrow::Cow;
use std::collections::HashMap;
use heed::{RwTxn, RoTxn, Result as ZResult};
use sdset::{SetBuf, Set, SetOperation};
use meilisearch_types::DocumentId;
use crate::database::MainT;
use crate::facets::FacetKey;
use super::cow_set::CowSet;
/// contains facet info
#[derive(Clone, Copy)]
pub struct Facets {
pub(crate) facets: heed::Database<FacetKey, CowSet<DocumentId>>,
}
impl Facets {
// we use sdset::SetBuf to ensure the docids are sorted.
pub fn put_facet_document_ids(&self, writer: &mut RwTxn<MainT>, facet_key: FacetKey, doc_ids: &Set<DocumentId>) -> ZResult<()> {
self.facets.put(writer, &facet_key, doc_ids)
}
pub fn facet_document_ids<'txn>(&self, reader: &'txn RoTxn<MainT>, facet_key: &FacetKey) -> ZResult<Option<Cow<'txn, Set<DocumentId>>>> {
self.facets.get(reader, &facet_key)
}
/// updates the facets store, revmoving the documents from the facets provided in the
/// `facet_map` argument
pub fn remove(&self, writer: &mut RwTxn<MainT>, facet_map: HashMap<FacetKey, Vec<DocumentId>>) -> ZResult<()> {
for (key, document_ids) in facet_map {
if let Some(old) = self.facets.get(writer, &key)? {
let to_remove = SetBuf::from_dirty(document_ids);
let new = sdset::duo::OpBuilder::new(old.as_ref(), to_remove.as_set()).difference().into_set_buf();
self.facets.put(writer, &key, new.as_set())?;
}
}
Ok(())
}
pub fn add(&self, writer: &mut RwTxn<MainT>, facet_map: HashMap<FacetKey, Vec<DocumentId>>) -> ZResult<()> {
for (key, document_ids) in facet_map {
let set = SetBuf::from_dirty(document_ids);
self.put_facet_document_ids(writer, key, set.as_set())?;
}
Ok(())
}
pub fn clear(self, writer: &mut heed::RwTxn<MainT>) -> ZResult<()> {
self.facets.clear(writer)
}
}

View file

@ -4,13 +4,16 @@ use std::collections::HashMap;
use chrono::{DateTime, Utc};
use heed::types::{ByteSlice, OwnedType, SerdeBincode, Str};
use heed::Result as ZResult;
use meilisearch_schema::Schema;
use meilisearch_schema::{FieldId, Schema};
use sdset::Set;
use crate::database::MainT;
use crate::RankedMap;
use crate::settings::RankingRule;
use super::cow_set::CowSet;
const CREATED_AT_KEY: &str = "created-at";
const ATTRIBUTES_FOR_FACETING: &str = "attributes-for-faceting";
const RANKING_RULES_KEY: &str = "ranking-rules";
const DISTINCT_ATTRIBUTE_KEY: &str = "distinct-attribute";
const STOP_WORDS_KEY: &str = "stop-words";
@ -188,6 +191,18 @@ impl Main {
}
}
pub fn attributes_for_faceting<'txn>(&self, reader: &'txn heed::RoTxn<MainT>) -> ZResult<Option<Cow<'txn, Set<FieldId>>>> {
self.main.get::<_, Str, CowSet<FieldId>>(reader, ATTRIBUTES_FOR_FACETING)
}
pub fn put_attributes_for_faceting(self, writer: &mut heed::RwTxn<MainT>, attributes: &Set<FieldId>) -> ZResult<()> {
self.main.put::<_, Str, CowSet<FieldId>>(writer, ATTRIBUTES_FOR_FACETING, attributes)
}
pub fn delete_attributes_for_faceting(self, writer: &mut heed::RwTxn<MainT>) -> ZResult<bool> {
self.main.delete::<_, Str>(writer, ATTRIBUTES_FOR_FACETING)
}
pub fn ranking_rules(&self, reader: &heed::RoTxn<MainT>) -> ZResult<Option<Vec<RankingRule>>> {
self.main.get::<_, Str, SerdeBincode<Vec<RankingRule>>>(reader, RANKING_RULES_KEY)
}

View file

@ -1,3 +1,4 @@
mod cow_set;
mod docs_words;
mod prefix_documents_cache;
mod prefix_postings_lists_cache;
@ -8,8 +9,10 @@ mod postings_lists;
mod synonyms;
mod updates;
mod updates_results;
mod facets;
pub use self::docs_words::DocsWords;
pub use self::facets::Facets;
pub use self::prefix_documents_cache::PrefixDocumentsCache;
pub use self::prefix_postings_lists_cache::PrefixPostingsListsCache;
pub use self::documents_fields::{DocumentFieldsIter, DocumentsFields};
@ -42,7 +45,7 @@ use crate::settings::SettingsUpdate;
use crate::{query_builder::QueryBuilder, update, DocIndex, DocumentId, Error, MResult};
type BEU64 = zerocopy::U64<byteorder::BigEndian>;
type BEU16 = zerocopy::U16<byteorder::BigEndian>;
pub type BEU16 = zerocopy::U16<byteorder::BigEndian>;
#[derive(Debug, Copy, Clone, AsBytes, FromBytes)]
#[repr(C)]
@ -197,12 +200,17 @@ fn updates_results_name(name: &str) -> String {
format!("store-{}-updates-results", name)
}
fn facets_name(name: &str) -> String {
format!("store-{}-facets", name)
}
#[derive(Clone)]
pub struct Index {
pub main: Main,
pub postings_lists: PostingsLists,
pub documents_fields: DocumentsFields,
pub documents_fields_counts: DocumentsFieldsCounts,
pub facets: Facets,
pub synonyms: Synonyms,
pub docs_words: DocsWords,
pub prefix_documents_cache: PrefixDocumentsCache,
@ -352,29 +360,14 @@ impl Index {
}
pub fn query_builder(&self) -> QueryBuilder {
QueryBuilder::new(
self.main,
self.postings_lists,
self.documents_fields_counts,
self.synonyms,
self.prefix_documents_cache,
self.prefix_postings_lists_cache,
)
QueryBuilder::new(self)
}
pub fn query_builder_with_criteria<'c, 'f, 'd>(
&self,
pub fn query_builder_with_criteria<'c, 'f, 'd, 'fa, 'i>(
&'i self,
criteria: Criteria<'c>,
) -> QueryBuilder<'c, 'f, 'd> {
QueryBuilder::with_criteria(
self.main,
self.postings_lists,
self.documents_fields_counts,
self.synonyms,
self.prefix_documents_cache,
self.prefix_postings_lists_cache,
criteria,
)
) -> QueryBuilder<'c, 'f, 'd, 'fa, 'i> {
QueryBuilder::with_criteria(self, criteria)
}
}
@ -395,12 +388,14 @@ pub fn create(
let prefix_postings_lists_cache_name = prefix_postings_lists_cache_name(name);
let updates_name = updates_name(name);
let updates_results_name = updates_results_name(name);
let facets_name = facets_name(name);
// open all the stores
let main = env.create_poly_database(Some(&main_name))?;
let postings_lists = env.create_database(Some(&postings_lists_name))?;
let documents_fields = env.create_database(Some(&documents_fields_name))?;
let documents_fields_counts = env.create_database(Some(&documents_fields_counts_name))?;
let facets = env.create_database(Some(&facets_name))?;
let synonyms = env.create_database(Some(&synonyms_name))?;
let docs_words = env.create_database(Some(&docs_words_name))?;
let prefix_documents_cache = env.create_database(Some(&prefix_documents_cache_name))?;
@ -417,6 +412,8 @@ pub fn create(
docs_words: DocsWords { docs_words },
prefix_postings_lists_cache: PrefixPostingsListsCache { prefix_postings_lists_cache },
prefix_documents_cache: PrefixDocumentsCache { prefix_documents_cache },
facets: Facets { facets },
updates: Updates { updates },
updates_results: UpdatesResults { updates_results },
updates_notifier,
@ -437,6 +434,7 @@ pub fn open(
let synonyms_name = synonyms_name(name);
let docs_words_name = docs_words_name(name);
let prefix_documents_cache_name = prefix_documents_cache_name(name);
let facets_name = facets_name(name);
let prefix_postings_lists_cache_name = prefix_postings_lists_cache_name(name);
let updates_name = updates_name(name);
let updates_results_name = updates_results_name(name);
@ -470,6 +468,10 @@ pub fn open(
Some(prefix_documents_cache) => prefix_documents_cache,
None => return Ok(None),
};
let facets = match env.open_database(Some(&facets_name))? {
Some(facets) => facets,
None => return Ok(None),
};
let prefix_postings_lists_cache = match env.open_database(Some(&prefix_postings_lists_cache_name))? {
Some(prefix_postings_lists_cache) => prefix_postings_lists_cache,
None => return Ok(None),
@ -491,6 +493,7 @@ pub fn open(
synonyms: Synonyms { synonyms },
docs_words: DocsWords { docs_words },
prefix_documents_cache: PrefixDocumentsCache { prefix_documents_cache },
facets: Facets { facets },
prefix_postings_lists_cache: PrefixPostingsListsCache { prefix_postings_lists_cache },
updates: Updates { updates },
updates_results: UpdatesResults { updates_results },