MeiliSearch/meilisearch-core/src/store/main.rs

321 lines
13 KiB
Rust
Raw Normal View History

2020-05-05 22:28:46 +02:00
use std::borrow::Cow;
2020-01-18 16:12:02 +01:00
use std::collections::HashMap;
2020-01-13 19:10:58 +01:00
2019-11-20 11:24:08 +01:00
use chrono::{DateTime, Utc};
2020-05-28 19:35:34 +02:00
use heed::types::{ByteSlice, OwnedType, SerdeBincode, Str, CowSlice};
2020-05-05 22:19:34 +02:00
use meilisearch_schema::{FieldId, Schema};
use meilisearch_types::DocumentId;
2020-05-05 22:19:34 +02:00
use sdset::Set;
2020-01-13 19:10:58 +01:00
use crate::database::MainT;
use crate::{RankedMap, MResult};
use crate::settings::RankingRule;
2020-05-22 15:00:50 +02:00
use crate::{FstSetCow, FstMapCow};
use super::{CowSet, DocumentsIds};
2019-10-03 15:04:11 +02:00
const ATTRIBUTES_FOR_FACETING_KEY: &str = "attributes-for-faceting";
const CREATED_AT_KEY: &str = "created-at";
2020-01-29 18:30:21 +01:00
const CUSTOMS_KEY: &str = "customs";
const DISTINCT_ATTRIBUTE_KEY: &str = "distinct-attribute";
2020-05-20 14:49:41 +02:00
const EXTERNAL_DOCIDS_KEY: &str = "external-docids";
const FIELDS_DISTRIBUTION_KEY: &str = "fields-distribution";
2020-05-20 14:49:41 +02:00
const INTERNAL_DOCIDS_KEY: &str = "internal-docids";
2019-11-20 17:28:46 +01:00
const NAME_KEY: &str = "name";
2019-10-03 16:13:09 +02:00
const NUMBER_OF_DOCUMENTS_KEY: &str = "number-of-documents";
2019-10-18 13:05:28 +02:00
const RANKED_MAP_KEY: &str = "ranked-map";
const RANKING_RULES_KEY: &str = "ranking-rules";
2019-10-18 13:05:28 +02:00
const SCHEMA_KEY: &str = "schema";
2020-05-28 19:35:34 +02:00
const SORTED_DOCUMENT_IDS_CACHE_KEY: &str = "sorted-document-ids-cache";
const STOP_WORDS_KEY: &str = "stop-words";
const SYNONYMS_KEY: &str = "synonyms";
2019-11-20 17:28:46 +01:00
const UPDATED_AT_KEY: &str = "updated-at";
2019-10-18 13:05:28 +02:00
const WORDS_KEY: &str = "words";
2019-10-03 16:13:09 +02:00
pub type FreqsMap = HashMap<String, usize>;
type SerdeFreqsMap = SerdeBincode<FreqsMap>;
type SerdeDatetime = SerdeBincode<DateTime<Utc>>;
2019-10-03 15:04:11 +02:00
#[derive(Copy, Clone)]
pub struct Main {
2019-10-21 12:05:53 +02:00
pub(crate) main: heed::PolyDatabase,
2019-10-03 15:04:11 +02:00
}
impl Main {
pub fn clear(self, writer: &mut heed::RwTxn<MainT>) -> MResult<()> {
Ok(self.main.clear(writer)?)
2019-11-06 10:49:13 +01:00
}
pub fn put_name(self, writer: &mut heed::RwTxn<MainT>, name: &str) -> MResult<()> {
Ok(self.main.put::<_, Str, Str>(writer, NAME_KEY, name)?)
2019-11-20 11:24:08 +01:00
}
pub fn name(self, reader: &heed::RoTxn<MainT>) -> MResult<Option<String>> {
2019-11-20 11:24:08 +01:00
Ok(self
.main
.get::<_, Str, Str>(reader, NAME_KEY)?
2019-11-20 11:24:08 +01:00
.map(|name| name.to_owned()))
}
pub fn put_created_at(self, writer: &mut heed::RwTxn<MainT>) -> MResult<()> {
Ok(self.main.put::<_, Str, SerdeDatetime>(writer, CREATED_AT_KEY, &Utc::now())?)
}
pub fn created_at(self, reader: &heed::RoTxn<MainT>) -> MResult<Option<DateTime<Utc>>> {
Ok(self.main.get::<_, Str, SerdeDatetime>(reader, CREATED_AT_KEY)?)
}
2019-11-20 14:12:12 +01:00
pub fn put_updated_at(self, writer: &mut heed::RwTxn<MainT>) -> MResult<()> {
Ok(self.main.put::<_, Str, SerdeDatetime>(writer, UPDATED_AT_KEY, &Utc::now())?)
}
pub fn updated_at(self, reader: &heed::RoTxn<MainT>) -> MResult<Option<DateTime<Utc>>> {
Ok(self.main.get::<_, Str, SerdeDatetime>(reader, UPDATED_AT_KEY)?)
2019-11-19 16:18:01 +01:00
}
2020-05-22 18:04:23 +02:00
pub fn put_internal_docids(self, writer: &mut heed::RwTxn<MainT>, ids: &sdset::Set<DocumentId>) -> MResult<()> {
Ok(self.main.put::<_, Str, DocumentsIds>(writer, INTERNAL_DOCIDS_KEY, ids)?)
}
2020-05-22 18:04:23 +02:00
pub fn internal_docids<'txn>(self, reader: &'txn heed::RoTxn<MainT>) -> MResult<Cow<'txn, sdset::Set<DocumentId>>> {
2020-05-20 14:49:41 +02:00
match self.main.get::<_, Str, DocumentsIds>(reader, INTERNAL_DOCIDS_KEY)? {
Some(ids) => Ok(ids),
None => Ok(Cow::default()),
}
}
2020-05-22 18:04:23 +02:00
pub fn merge_internal_docids(self, writer: &mut heed::RwTxn<MainT>, new_ids: &sdset::Set<DocumentId>) -> MResult<()> {
2020-05-19 11:45:46 +02:00
use sdset::SetOperation;
// We do an union of the old and new internal ids.
2020-05-20 14:49:41 +02:00
let internal_docids = self.internal_docids(writer)?;
let internal_docids = sdset::duo::Union::new(&internal_docids, new_ids).into_set_buf();
2020-05-22 18:04:23 +02:00
Ok(self.put_internal_docids(writer, &internal_docids)?)
2020-05-19 13:12:02 +02:00
}
2020-05-22 18:04:23 +02:00
pub fn remove_internal_docids(self, writer: &mut heed::RwTxn<MainT>, ids: &sdset::Set<DocumentId>) -> MResult<()> {
2020-05-19 13:12:02 +02:00
use sdset::SetOperation;
// We do a difference of the old and new internal ids.
2020-05-20 14:49:41 +02:00
let internal_docids = self.internal_docids(writer)?;
let internal_docids = sdset::duo::Difference::new(&internal_docids, ids).into_set_buf();
2020-05-22 18:04:23 +02:00
Ok(self.put_internal_docids(writer, &internal_docids)?)
2020-05-19 11:45:46 +02:00
}
2020-05-22 18:04:23 +02:00
pub fn put_external_docids<A>(self, writer: &mut heed::RwTxn<MainT>, ids: &fst::Map<A>) -> MResult<()>
2020-05-22 15:00:50 +02:00
where A: AsRef<[u8]>,
{
2020-05-22 18:04:23 +02:00
Ok(self.main.put::<_, Str, ByteSlice>(writer, EXTERNAL_DOCIDS_KEY, ids.as_fst().as_bytes())?)
}
2020-05-22 18:04:23 +02:00
pub fn merge_external_docids<A>(self, writer: &mut heed::RwTxn<MainT>, new_docids: &fst::Map<A>) -> MResult<()>
2020-05-22 15:00:50 +02:00
where A: AsRef<[u8]>,
{
2020-05-19 11:45:46 +02:00
use fst::{Streamer, IntoStreamer};
2020-05-20 15:21:08 +02:00
// Do an union of the old and the new set of external docids.
2020-05-20 14:49:41 +02:00
let external_docids = self.external_docids(writer)?;
2020-05-20 15:21:08 +02:00
let mut op = external_docids.op().add(new_docids.into_stream()).r#union();
2020-05-19 13:12:02 +02:00
let mut build = fst::MapBuilder::memory();
2020-05-20 15:21:08 +02:00
while let Some((docid, values)) = op.next() {
build.insert(docid, values[0].value).unwrap();
2020-05-19 13:12:02 +02:00
}
2020-05-22 15:00:50 +02:00
drop(op);
2020-05-19 13:12:02 +02:00
2020-05-22 15:00:50 +02:00
let external_docids = build.into_map();
2020-05-22 18:04:23 +02:00
Ok(self.put_external_docids(writer, &external_docids)?)
2020-05-19 13:12:02 +02:00
}
2020-05-22 18:04:23 +02:00
pub fn remove_external_docids<A>(self, writer: &mut heed::RwTxn<MainT>, ids: &fst::Map<A>) -> MResult<()>
2020-05-22 15:00:50 +02:00
where A: AsRef<[u8]>,
{
2020-05-19 13:12:02 +02:00
use fst::{Streamer, IntoStreamer};
2020-05-19 11:45:46 +02:00
2020-05-20 15:21:08 +02:00
// Do an union of the old and the new set of external docids.
2020-05-20 14:49:41 +02:00
let external_docids = self.external_docids(writer)?;
let mut op = external_docids.op().add(ids.into_stream()).difference();
2020-05-19 11:45:46 +02:00
let mut build = fst::MapBuilder::memory();
2020-05-20 15:21:08 +02:00
while let Some((docid, values)) = op.next() {
build.insert(docid, values[0].value).unwrap();
2020-05-19 11:45:46 +02:00
}
2020-05-22 15:00:50 +02:00
drop(op);
2020-05-19 11:45:46 +02:00
2020-05-22 15:00:50 +02:00
let external_docids = build.into_map();
self.put_external_docids(writer, &external_docids)
2020-05-19 11:45:46 +02:00
}
2020-05-22 18:04:23 +02:00
pub fn external_docids(self, reader: &heed::RoTxn<MainT>) -> MResult<FstMapCow> {
2020-05-20 14:49:41 +02:00
match self.main.get::<_, Str, ByteSlice>(reader, EXTERNAL_DOCIDS_KEY)? {
2020-05-22 15:00:50 +02:00
Some(bytes) => Ok(fst::Map::new(bytes).unwrap().map_data(Cow::Borrowed).unwrap()),
None => Ok(fst::Map::default().map_data(Cow::Owned).unwrap()),
}
}
2020-05-22 18:04:23 +02:00
pub fn external_to_internal_docid(self, reader: &heed::RoTxn<MainT>, external_docid: &str) -> MResult<Option<DocumentId>> {
2020-05-20 14:49:41 +02:00
let external_ids = self.external_docids(reader)?;
Ok(external_ids.get(external_docid).map(|id| DocumentId(id as u32)))
2020-05-19 13:12:02 +02:00
}
2020-05-22 18:04:23 +02:00
pub fn words_fst(self, reader: &heed::RoTxn<MainT>) -> MResult<FstSetCow> {
match self.main.get::<_, Str, ByteSlice>(reader, WORDS_KEY)? {
2020-05-22 15:00:50 +02:00
Some(bytes) => Ok(fst::Set::new(bytes).unwrap().map_data(Cow::Borrowed).unwrap()),
None => Ok(fst::Set::default().map_data(Cow::Owned).unwrap()),
2019-10-03 15:04:11 +02:00
}
}
2020-05-22 18:04:23 +02:00
pub fn put_words_fst<A: AsRef<[u8]>>(self, writer: &mut heed::RwTxn<MainT>, fst: &fst::Set<A>) -> MResult<()> {
Ok(self.main.put::<_, Str, ByteSlice>(writer, WORDS_KEY, fst.as_fst().as_bytes())?)
}
2020-05-28 19:35:34 +02:00
pub fn put_sorted_document_ids_cache(self, writer: &mut heed::RwTxn<MainT>, documents_ids: &[DocumentId]) -> MResult<()> {
Ok(self.main.put::<_, Str, CowSlice<DocumentId>>(writer, SORTED_DOCUMENT_IDS_CACHE_KEY, documents_ids)?)
}
pub fn sorted_document_ids_cache(self, reader: &heed::RoTxn<MainT>) -> MResult<Option<Cow<[DocumentId]>>> {
Ok(self.main.get::<_, Str, CowSlice<DocumentId>>(reader, SORTED_DOCUMENT_IDS_CACHE_KEY)?)
}
pub fn put_schema(self, writer: &mut heed::RwTxn<MainT>, schema: &Schema) -> MResult<()> {
Ok(self.main.put::<_, Str, SerdeBincode<Schema>>(writer, SCHEMA_KEY, schema)?)
2019-10-04 17:23:46 +02:00
}
pub fn schema(self, reader: &heed::RoTxn<MainT>) -> MResult<Option<Schema>> {
Ok(self.main.get::<_, Str, SerdeBincode<Schema>>(reader, SCHEMA_KEY)?)
}
pub fn delete_schema(self, writer: &mut heed::RwTxn<MainT>) -> MResult<bool> {
Ok(self.main.delete::<_, Str>(writer, SCHEMA_KEY)?)
2019-10-04 17:23:46 +02:00
}
pub fn put_ranked_map(self, writer: &mut heed::RwTxn<MainT>, ranked_map: &RankedMap) -> MResult<()> {
Ok(self.main.put::<_, Str, SerdeBincode<RankedMap>>(writer, RANKED_MAP_KEY, &ranked_map)?)
2019-10-03 15:04:11 +02:00
}
pub fn ranked_map(self, reader: &heed::RoTxn<MainT>) -> MResult<Option<RankedMap>> {
Ok(self.main.get::<_, Str, SerdeBincode<RankedMap>>(reader, RANKED_MAP_KEY)?)
2019-10-03 15:04:11 +02:00
}
pub fn put_synonyms_fst<A: AsRef<[u8]>>(self, writer: &mut heed::RwTxn<MainT>, fst: &fst::Set<A>) -> MResult<()> {
2019-10-16 17:05:24 +02:00
let bytes = fst.as_fst().as_bytes();
Ok(self.main.put::<_, Str, ByteSlice>(writer, SYNONYMS_KEY, bytes)?)
}
2020-05-22 18:04:23 +02:00
pub(crate) fn synonyms_fst(self, reader: &heed::RoTxn<MainT>) -> MResult<FstSetCow> {
match self.main.get::<_, Str, ByteSlice>(reader, SYNONYMS_KEY)? {
2020-05-22 15:00:50 +02:00
Some(bytes) => Ok(fst::Set::new(bytes).unwrap().map_data(Cow::Borrowed).unwrap()),
None => Ok(fst::Set::default().map_data(Cow::Owned).unwrap()),
}
}
2020-05-27 12:04:35 +02:00
pub fn synonyms(self, reader: &heed::RoTxn<MainT>) -> MResult<Vec<String>> {
let synonyms = self
.synonyms_fst(&reader)?
.stream()
.into_strs()?;
Ok(synonyms)
}
pub fn put_stop_words_fst<A: AsRef<[u8]>>(self, writer: &mut heed::RwTxn<MainT>, fst: &fst::Set<A>) -> MResult<()> {
let bytes = fst.as_fst().as_bytes();
Ok(self.main.put::<_, Str, ByteSlice>(writer, STOP_WORDS_KEY, bytes)?)
}
pub(crate) fn stop_words_fst(self, reader: &heed::RoTxn<MainT>) -> MResult<FstSetCow> {
match self.main.get::<_, Str, ByteSlice>(reader, STOP_WORDS_KEY)? {
2020-05-22 15:00:50 +02:00
Some(bytes) => Ok(fst::Set::new(bytes).unwrap().map_data(Cow::Borrowed).unwrap()),
None => Ok(fst::Set::default().map_data(Cow::Owned).unwrap()),
}
}
2020-05-27 12:04:35 +02:00
pub fn stop_words(self, reader: &heed::RoTxn<MainT>) -> MResult<Vec<String>> {
let stop_word_list = self
.stop_words_fst(reader)?
.stream()
.into_strs()?;
Ok(stop_word_list)
}
pub fn put_number_of_documents<F>(self, writer: &mut heed::RwTxn<MainT>, f: F) -> MResult<u64>
2019-10-18 13:05:28 +02:00
where
F: Fn(u64) -> u64,
2019-10-03 15:04:11 +02:00
{
let new = self.number_of_documents(&*writer).map(f)?;
2019-10-18 13:05:28 +02:00
self.main
.put::<_, Str, OwnedType<u64>>(writer, NUMBER_OF_DOCUMENTS_KEY, &new)?;
2019-10-03 16:13:09 +02:00
Ok(new)
}
pub fn number_of_documents(self, reader: &heed::RoTxn<MainT>) -> MResult<u64> {
2019-10-18 13:05:28 +02:00
match self
.main
.get::<_, Str, OwnedType<u64>>(reader, NUMBER_OF_DOCUMENTS_KEY)? {
2019-10-16 17:05:24 +02:00
Some(value) => Ok(value),
2019-10-03 16:13:09 +02:00
None => Ok(0),
}
2019-10-03 15:04:11 +02:00
}
2019-10-11 15:33:35 +02:00
pub fn put_fields_distribution(
2019-11-20 11:24:08 +01:00
self,
writer: &mut heed::RwTxn<MainT>,
fields_frequency: &FreqsMap,
) -> MResult<()> {
2020-05-27 12:04:35 +02:00
Ok(self.main.put::<_, Str, SerdeFreqsMap>(writer, FIELDS_DISTRIBUTION_KEY, fields_frequency)?)
}
pub fn fields_distribution(&self, reader: &heed::RoTxn<MainT>) -> MResult<Option<FreqsMap>> {
match self
.main
.get::<_, Str, SerdeFreqsMap>(reader, FIELDS_DISTRIBUTION_KEY)?
{
Some(freqs) => Ok(Some(freqs)),
None => Ok(None),
}
}
pub fn attributes_for_faceting<'txn>(&self, reader: &'txn heed::RoTxn<MainT>) -> MResult<Option<Cow<'txn, Set<FieldId>>>> {
Ok(self.main.get::<_, Str, CowSet<FieldId>>(reader, ATTRIBUTES_FOR_FACETING_KEY)?)
2020-05-05 22:19:34 +02:00
}
pub fn put_attributes_for_faceting(self, writer: &mut heed::RwTxn<MainT>, attributes: &Set<FieldId>) -> MResult<()> {
Ok(self.main.put::<_, Str, CowSet<FieldId>>(writer, ATTRIBUTES_FOR_FACETING_KEY, attributes)?)
2020-05-05 22:19:34 +02:00
}
pub fn delete_attributes_for_faceting(self, writer: &mut heed::RwTxn<MainT>) -> MResult<bool> {
Ok(self.main.delete::<_, Str>(writer, ATTRIBUTES_FOR_FACETING_KEY)?)
2020-05-05 22:19:34 +02:00
}
pub fn ranking_rules(&self, reader: &heed::RoTxn<MainT>) -> MResult<Option<Vec<RankingRule>>> {
Ok(self.main.get::<_, Str, SerdeBincode<Vec<RankingRule>>>(reader, RANKING_RULES_KEY)?)
2020-01-08 14:17:38 +01:00
}
pub fn put_ranking_rules(self, writer: &mut heed::RwTxn<MainT>, value: &[RankingRule]) -> MResult<()> {
Ok(self.main.put::<_, Str, SerdeBincode<Vec<RankingRule>>>(writer, RANKING_RULES_KEY, &value.to_vec())?)
2020-01-08 14:17:38 +01:00
}
pub fn delete_ranking_rules(self, writer: &mut heed::RwTxn<MainT>) -> MResult<bool> {
Ok(self.main.delete::<_, Str>(writer, RANKING_RULES_KEY)?)
2020-01-08 14:17:38 +01:00
}
2020-06-16 10:45:17 +02:00
pub fn distinct_attribute(&self, reader: &heed::RoTxn<MainT>) -> MResult<Option<FieldId>> {
2020-06-26 22:09:34 +02:00
match self.main.get::<_, Str, OwnedType<u16>>(reader, DISTINCT_ATTRIBUTE_KEY)? {
Some(value) => Ok(Some(FieldId(value.to_owned()))),
None => Ok(None),
2020-02-11 15:16:02 +01:00
}
2020-01-08 14:17:38 +01:00
}
2020-06-16 10:45:17 +02:00
pub fn put_distinct_attribute(self, writer: &mut heed::RwTxn<MainT>, value: FieldId) -> MResult<()> {
Ok(self.main.put::<_, Str, OwnedType<u16>>(writer, DISTINCT_ATTRIBUTE_KEY, &value.0)?)
2020-01-08 14:17:38 +01:00
}
pub fn delete_distinct_attribute(self, writer: &mut heed::RwTxn<MainT>) -> MResult<bool> {
Ok(self.main.delete::<_, Str>(writer, DISTINCT_ATTRIBUTE_KEY)?)
2020-01-08 14:17:38 +01:00
}
pub fn put_customs(self, writer: &mut heed::RwTxn<MainT>, customs: &[u8]) -> MResult<()> {
Ok(self.main.put::<_, Str, ByteSlice>(writer, CUSTOMS_KEY, customs)?)
2019-10-11 15:33:35 +02:00
}
pub fn customs<'txn>(self, reader: &'txn heed::RoTxn<MainT>) -> MResult<Option<&'txn [u8]>> {
Ok(self.main.get::<_, Str, ByteSlice>(reader, CUSTOMS_KEY)?)
2019-10-11 15:33:35 +02:00
}
2019-10-03 15:04:11 +02:00
}