mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 04:17:10 +02:00
setting up facets
This commit is contained in:
parent
dd08cfc6a3
commit
59c67f6bc8
11 changed files with 633 additions and 139 deletions
32
meilisearch-core/src/store/cow_set.rs
Normal file
32
meilisearch-core/src/store/cow_set.rs
Normal file
|
@ -0,0 +1,32 @@
|
|||
use std::borrow::Cow;
|
||||
|
||||
use heed::{types::CowSlice, BytesEncode, BytesDecode};
|
||||
use sdset::{Set, SetBuf};
|
||||
use zerocopy::{AsBytes, FromBytes};
|
||||
|
||||
pub struct CowSet<T>(std::marker::PhantomData<T>);
|
||||
|
||||
impl<'a, T: 'a> BytesEncode<'a> for CowSet<T>
|
||||
where
|
||||
T: AsBytes,
|
||||
{
|
||||
type EItem = Set<T>;
|
||||
|
||||
fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<[u8]>> {
|
||||
CowSlice::bytes_encode(item.as_slice())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T: 'a> BytesDecode<'a> for CowSet<T>
|
||||
where
|
||||
T: FromBytes + Copy,
|
||||
{
|
||||
type DItem = Cow<'a, Set<T>>;
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
match CowSlice::<T>::bytes_decode(bytes)? {
|
||||
Cow::Owned(vec) => Some(Cow::Owned(SetBuf::new_unchecked(vec))),
|
||||
Cow::Borrowed(slice) => Some(Cow::Borrowed(Set::new_unchecked(slice))),
|
||||
}
|
||||
}
|
||||
}
|
53
meilisearch-core/src/store/facets.rs
Normal file
53
meilisearch-core/src/store/facets.rs
Normal file
|
@ -0,0 +1,53 @@
|
|||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
|
||||
use heed::{RwTxn, RoTxn, Result as ZResult};
|
||||
use sdset::{SetBuf, Set, SetOperation};
|
||||
|
||||
use meilisearch_types::DocumentId;
|
||||
|
||||
use crate::database::MainT;
|
||||
use crate::facets::FacetKey;
|
||||
use super::cow_set::CowSet;
|
||||
|
||||
/// contains facet info
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct Facets {
|
||||
pub(crate) facets: heed::Database<FacetKey, CowSet<DocumentId>>,
|
||||
}
|
||||
|
||||
impl Facets {
|
||||
// we use sdset::SetBuf to ensure the docids are sorted.
|
||||
pub fn put_facet_document_ids(&self, writer: &mut RwTxn<MainT>, facet_key: FacetKey, doc_ids: &Set<DocumentId>) -> ZResult<()> {
|
||||
self.facets.put(writer, &facet_key, doc_ids)
|
||||
}
|
||||
|
||||
pub fn facet_document_ids<'txn>(&self, reader: &'txn RoTxn<MainT>, facet_key: &FacetKey) -> ZResult<Option<Cow<'txn, Set<DocumentId>>>> {
|
||||
self.facets.get(reader, &facet_key)
|
||||
}
|
||||
|
||||
/// updates the facets store, revmoving the documents from the facets provided in the
|
||||
/// `facet_map` argument
|
||||
pub fn remove(&self, writer: &mut RwTxn<MainT>, facet_map: HashMap<FacetKey, Vec<DocumentId>>) -> ZResult<()> {
|
||||
for (key, document_ids) in facet_map {
|
||||
if let Some(old) = self.facets.get(writer, &key)? {
|
||||
let to_remove = SetBuf::from_dirty(document_ids);
|
||||
let new = sdset::duo::OpBuilder::new(old.as_ref(), to_remove.as_set()).difference().into_set_buf();
|
||||
self.facets.put(writer, &key, new.as_set())?;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn add(&self, writer: &mut RwTxn<MainT>, facet_map: HashMap<FacetKey, Vec<DocumentId>>) -> ZResult<()> {
|
||||
for (key, document_ids) in facet_map {
|
||||
let set = SetBuf::from_dirty(document_ids);
|
||||
self.put_facet_document_ids(writer, key, set.as_set())?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn clear(self, writer: &mut heed::RwTxn<MainT>) -> ZResult<()> {
|
||||
self.facets.clear(writer)
|
||||
}
|
||||
}
|
|
@ -4,13 +4,16 @@ use std::collections::HashMap;
|
|||
use chrono::{DateTime, Utc};
|
||||
use heed::types::{ByteSlice, OwnedType, SerdeBincode, Str};
|
||||
use heed::Result as ZResult;
|
||||
use meilisearch_schema::Schema;
|
||||
use meilisearch_schema::{FieldId, Schema};
|
||||
use sdset::Set;
|
||||
|
||||
use crate::database::MainT;
|
||||
use crate::RankedMap;
|
||||
use crate::settings::RankingRule;
|
||||
use super::cow_set::CowSet;
|
||||
|
||||
const CREATED_AT_KEY: &str = "created-at";
|
||||
const ATTRIBUTES_FOR_FACETING: &str = "attributes-for-faceting";
|
||||
const RANKING_RULES_KEY: &str = "ranking-rules";
|
||||
const DISTINCT_ATTRIBUTE_KEY: &str = "distinct-attribute";
|
||||
const STOP_WORDS_KEY: &str = "stop-words";
|
||||
|
@ -188,6 +191,18 @@ impl Main {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn attributes_for_faceting<'txn>(&self, reader: &'txn heed::RoTxn<MainT>) -> ZResult<Option<Cow<'txn, Set<FieldId>>>> {
|
||||
self.main.get::<_, Str, CowSet<FieldId>>(reader, ATTRIBUTES_FOR_FACETING)
|
||||
}
|
||||
|
||||
pub fn put_attributes_for_faceting(self, writer: &mut heed::RwTxn<MainT>, attributes: &Set<FieldId>) -> ZResult<()> {
|
||||
self.main.put::<_, Str, CowSet<FieldId>>(writer, ATTRIBUTES_FOR_FACETING, attributes)
|
||||
}
|
||||
|
||||
pub fn delete_attributes_for_faceting(self, writer: &mut heed::RwTxn<MainT>) -> ZResult<bool> {
|
||||
self.main.delete::<_, Str>(writer, ATTRIBUTES_FOR_FACETING)
|
||||
}
|
||||
|
||||
pub fn ranking_rules(&self, reader: &heed::RoTxn<MainT>) -> ZResult<Option<Vec<RankingRule>>> {
|
||||
self.main.get::<_, Str, SerdeBincode<Vec<RankingRule>>>(reader, RANKING_RULES_KEY)
|
||||
}
|
||||
|
|
|
@ -1,3 +1,4 @@
|
|||
mod cow_set;
|
||||
mod docs_words;
|
||||
mod prefix_documents_cache;
|
||||
mod prefix_postings_lists_cache;
|
||||
|
@ -8,8 +9,10 @@ mod postings_lists;
|
|||
mod synonyms;
|
||||
mod updates;
|
||||
mod updates_results;
|
||||
mod facets;
|
||||
|
||||
pub use self::docs_words::DocsWords;
|
||||
pub use self::facets::Facets;
|
||||
pub use self::prefix_documents_cache::PrefixDocumentsCache;
|
||||
pub use self::prefix_postings_lists_cache::PrefixPostingsListsCache;
|
||||
pub use self::documents_fields::{DocumentFieldsIter, DocumentsFields};
|
||||
|
@ -42,7 +45,7 @@ use crate::settings::SettingsUpdate;
|
|||
use crate::{query_builder::QueryBuilder, update, DocIndex, DocumentId, Error, MResult};
|
||||
|
||||
type BEU64 = zerocopy::U64<byteorder::BigEndian>;
|
||||
type BEU16 = zerocopy::U16<byteorder::BigEndian>;
|
||||
pub type BEU16 = zerocopy::U16<byteorder::BigEndian>;
|
||||
|
||||
#[derive(Debug, Copy, Clone, AsBytes, FromBytes)]
|
||||
#[repr(C)]
|
||||
|
@ -197,12 +200,17 @@ fn updates_results_name(name: &str) -> String {
|
|||
format!("store-{}-updates-results", name)
|
||||
}
|
||||
|
||||
fn facets_name(name: &str) -> String {
|
||||
format!("store-{}-facets", name)
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Index {
|
||||
pub main: Main,
|
||||
pub postings_lists: PostingsLists,
|
||||
pub documents_fields: DocumentsFields,
|
||||
pub documents_fields_counts: DocumentsFieldsCounts,
|
||||
pub facets: Facets,
|
||||
pub synonyms: Synonyms,
|
||||
pub docs_words: DocsWords,
|
||||
pub prefix_documents_cache: PrefixDocumentsCache,
|
||||
|
@ -352,29 +360,14 @@ impl Index {
|
|||
}
|
||||
|
||||
pub fn query_builder(&self) -> QueryBuilder {
|
||||
QueryBuilder::new(
|
||||
self.main,
|
||||
self.postings_lists,
|
||||
self.documents_fields_counts,
|
||||
self.synonyms,
|
||||
self.prefix_documents_cache,
|
||||
self.prefix_postings_lists_cache,
|
||||
)
|
||||
QueryBuilder::new(self)
|
||||
}
|
||||
|
||||
pub fn query_builder_with_criteria<'c, 'f, 'd>(
|
||||
&self,
|
||||
pub fn query_builder_with_criteria<'c, 'f, 'd, 'fa, 'i>(
|
||||
&'i self,
|
||||
criteria: Criteria<'c>,
|
||||
) -> QueryBuilder<'c, 'f, 'd> {
|
||||
QueryBuilder::with_criteria(
|
||||
self.main,
|
||||
self.postings_lists,
|
||||
self.documents_fields_counts,
|
||||
self.synonyms,
|
||||
self.prefix_documents_cache,
|
||||
self.prefix_postings_lists_cache,
|
||||
criteria,
|
||||
)
|
||||
) -> QueryBuilder<'c, 'f, 'd, 'fa, 'i> {
|
||||
QueryBuilder::with_criteria(self, criteria)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -395,12 +388,14 @@ pub fn create(
|
|||
let prefix_postings_lists_cache_name = prefix_postings_lists_cache_name(name);
|
||||
let updates_name = updates_name(name);
|
||||
let updates_results_name = updates_results_name(name);
|
||||
let facets_name = facets_name(name);
|
||||
|
||||
// open all the stores
|
||||
let main = env.create_poly_database(Some(&main_name))?;
|
||||
let postings_lists = env.create_database(Some(&postings_lists_name))?;
|
||||
let documents_fields = env.create_database(Some(&documents_fields_name))?;
|
||||
let documents_fields_counts = env.create_database(Some(&documents_fields_counts_name))?;
|
||||
let facets = env.create_database(Some(&facets_name))?;
|
||||
let synonyms = env.create_database(Some(&synonyms_name))?;
|
||||
let docs_words = env.create_database(Some(&docs_words_name))?;
|
||||
let prefix_documents_cache = env.create_database(Some(&prefix_documents_cache_name))?;
|
||||
|
@ -417,6 +412,8 @@ pub fn create(
|
|||
docs_words: DocsWords { docs_words },
|
||||
prefix_postings_lists_cache: PrefixPostingsListsCache { prefix_postings_lists_cache },
|
||||
prefix_documents_cache: PrefixDocumentsCache { prefix_documents_cache },
|
||||
facets: Facets { facets },
|
||||
|
||||
updates: Updates { updates },
|
||||
updates_results: UpdatesResults { updates_results },
|
||||
updates_notifier,
|
||||
|
@ -437,6 +434,7 @@ pub fn open(
|
|||
let synonyms_name = synonyms_name(name);
|
||||
let docs_words_name = docs_words_name(name);
|
||||
let prefix_documents_cache_name = prefix_documents_cache_name(name);
|
||||
let facets_name = facets_name(name);
|
||||
let prefix_postings_lists_cache_name = prefix_postings_lists_cache_name(name);
|
||||
let updates_name = updates_name(name);
|
||||
let updates_results_name = updates_results_name(name);
|
||||
|
@ -470,6 +468,10 @@ pub fn open(
|
|||
Some(prefix_documents_cache) => prefix_documents_cache,
|
||||
None => return Ok(None),
|
||||
};
|
||||
let facets = match env.open_database(Some(&facets_name))? {
|
||||
Some(facets) => facets,
|
||||
None => return Ok(None),
|
||||
};
|
||||
let prefix_postings_lists_cache = match env.open_database(Some(&prefix_postings_lists_cache_name))? {
|
||||
Some(prefix_postings_lists_cache) => prefix_postings_lists_cache,
|
||||
None => return Ok(None),
|
||||
|
@ -491,6 +493,7 @@ pub fn open(
|
|||
synonyms: Synonyms { synonyms },
|
||||
docs_words: DocsWords { docs_words },
|
||||
prefix_documents_cache: PrefixDocumentsCache { prefix_documents_cache },
|
||||
facets: Facets { facets },
|
||||
prefix_postings_lists_cache: PrefixPostingsListsCache { prefix_postings_lists_cache },
|
||||
updates: Updates { updates },
|
||||
updates_results: UpdatesResults { updates_results },
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue