mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-23 21:20:24 +01:00
Merge pull request #158 from meilisearch/moving-back-to-rocksdb
Moving back to RocksDB
This commit is contained in:
commit
ab2ca15c5c
@ -16,7 +16,7 @@ ordered-float = { version = "1.0.2", features = ["serde"] }
|
||||
sdset = "0.3.2"
|
||||
serde = { version = "1.0.91", features = ["derive"] }
|
||||
serde_json = { version = "1.0.39", features = ["preserve_order"] }
|
||||
sled = "0.23.0"
|
||||
rocksdb = { version = "0.12.2", default-features = false }
|
||||
toml = { version = "0.5.0", features = ["preserve_order"] }
|
||||
zerocopy = "0.2.2"
|
||||
|
||||
@ -28,9 +28,5 @@ rev = "40b3d48"
|
||||
git = "https://github.com/Kerollmops/fst.git"
|
||||
branch = "arc-byte-slice"
|
||||
|
||||
[features]
|
||||
default = []
|
||||
compression = ["sled/compression"]
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = "3.0.7"
|
||||
|
@ -1,13 +1,13 @@
|
||||
use std::sync::Arc;
|
||||
use std::ops::Deref;
|
||||
use crate::database::raw_index::InnerRawIndex;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct CustomSettings(pub Arc<sled::Tree>);
|
||||
pub struct CustomSettings(pub(crate) InnerRawIndex);
|
||||
|
||||
impl Deref for CustomSettings {
|
||||
type Target = sled::Tree;
|
||||
type Target = InnerRawIndex;
|
||||
|
||||
fn deref(&self) -> &sled::Tree {
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
@ -1,17 +1,20 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use meilidb_core::DocumentId;
|
||||
|
||||
use crate::database::raw_index::InnerRawIndex;
|
||||
use super::Error;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct DocsWordsIndex(pub Arc<sled::Tree>);
|
||||
pub struct DocsWordsIndex(pub(crate) InnerRawIndex);
|
||||
|
||||
impl DocsWordsIndex {
|
||||
pub fn doc_words(&self, id: DocumentId) -> Result<Option<fst::Set>, Error> {
|
||||
let key = id.0.to_be_bytes();
|
||||
match self.0.get(key)? {
|
||||
match self.0.get_pinned(key)? {
|
||||
Some(bytes) => {
|
||||
let len = bytes.len();
|
||||
let value = bytes.into();
|
||||
let value = Arc::from(bytes.as_ref());
|
||||
let fst = fst::raw::Fst::from_shared_bytes(value, 0, len)?;
|
||||
Ok(Some(fst::Set::from(fst)))
|
||||
},
|
||||
@ -27,7 +30,7 @@ impl DocsWordsIndex {
|
||||
|
||||
pub fn del_doc_words(&self, id: DocumentId) -> Result<(), Error> {
|
||||
let key = id.0.to_be_bytes();
|
||||
self.0.del(key)?;
|
||||
self.0.delete(key)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
@ -122,6 +122,7 @@ impl<'a> DocumentsAddition<'a> {
|
||||
let ranked_map = self.ranked_map;
|
||||
let schema = lease_inner.schema.clone();
|
||||
let raw = lease_inner.raw.clone();
|
||||
lease_inner.raw.compact();
|
||||
|
||||
let inner = InnerIndex { words, schema, ranked_map, raw };
|
||||
self.inner.0.store(Arc::new(inner));
|
||||
|
@ -121,6 +121,7 @@ impl<'a> DocumentsDeletion<'a> {
|
||||
let ranked_map = lease_inner.ranked_map.clone();
|
||||
let schema = lease_inner.schema.clone();
|
||||
let raw = lease_inner.raw.clone();
|
||||
lease_inner.raw.compact();
|
||||
|
||||
let inner = InnerIndex { words, schema, ranked_map, raw };
|
||||
self.inner.0.store(Arc::new(inner));
|
||||
|
@ -1,70 +1,89 @@
|
||||
use std::sync::Arc;
|
||||
use std::convert::TryInto;
|
||||
|
||||
use meilidb_core::DocumentId;
|
||||
use sled::IVec;
|
||||
use rocksdb::DBVector;
|
||||
|
||||
use crate::database::raw_index::InnerRawIndex;
|
||||
use crate::document_attr_key::DocumentAttrKey;
|
||||
use crate::schema::SchemaAttr;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct DocumentsIndex(pub Arc<sled::Tree>);
|
||||
pub struct DocumentsIndex(pub(crate) InnerRawIndex);
|
||||
|
||||
impl DocumentsIndex {
|
||||
pub fn document_field(&self, id: DocumentId, attr: SchemaAttr) -> sled::Result<Option<IVec>> {
|
||||
pub fn document_field(&self, id: DocumentId, attr: SchemaAttr) -> Result<Option<DBVector>, rocksdb::Error> {
|
||||
let key = DocumentAttrKey::new(id, attr).to_be_bytes();
|
||||
self.0.get(key)
|
||||
}
|
||||
|
||||
pub fn set_document_field(&self, id: DocumentId, attr: SchemaAttr, value: Vec<u8>) -> sled::Result<()> {
|
||||
pub fn set_document_field(&self, id: DocumentId, attr: SchemaAttr, value: Vec<u8>) -> Result<(), rocksdb::Error> {
|
||||
let key = DocumentAttrKey::new(id, attr).to_be_bytes();
|
||||
self.0.set(key, value)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn del_document_field(&self, id: DocumentId, attr: SchemaAttr) -> sled::Result<()> {
|
||||
pub fn del_document_field(&self, id: DocumentId, attr: SchemaAttr) -> Result<(), rocksdb::Error> {
|
||||
let key = DocumentAttrKey::new(id, attr).to_be_bytes();
|
||||
self.0.del(key)?;
|
||||
self.0.delete(key)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn del_all_document_fields(&self, id: DocumentId) -> sled::Result<()> {
|
||||
pub fn del_all_document_fields(&self, id: DocumentId) -> Result<(), rocksdb::Error> {
|
||||
let start = DocumentAttrKey::new(id, SchemaAttr::min()).to_be_bytes();
|
||||
let end = DocumentAttrKey::new(id, SchemaAttr::max()).to_be_bytes();
|
||||
let document_attrs = self.0.range(start..=end).keys();
|
||||
|
||||
for key in document_attrs {
|
||||
self.0.del(key?)?;
|
||||
}
|
||||
|
||||
self.0.delete_range(start, end)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn document_fields(&self, id: DocumentId) -> DocumentFieldsIter {
|
||||
let start = DocumentAttrKey::new(id, SchemaAttr::min());
|
||||
let start = start.to_be_bytes();
|
||||
let start = DocumentAttrKey::new(id, SchemaAttr::min()).to_be_bytes();
|
||||
let end = DocumentAttrKey::new(id, SchemaAttr::max()).to_be_bytes();
|
||||
|
||||
let end = DocumentAttrKey::new(id, SchemaAttr::max());
|
||||
let end = end.to_be_bytes();
|
||||
let from = rocksdb::IteratorMode::From(&start[..], rocksdb::Direction::Forward);
|
||||
let iter = self.0.iterator(from).unwrap();
|
||||
|
||||
DocumentFieldsIter(self.0.range(start..=end))
|
||||
DocumentFieldsIter(iter, end.to_vec())
|
||||
}
|
||||
|
||||
pub fn len(&self) -> Result<usize, rocksdb::Error> {
|
||||
let mut last_document_id = None;
|
||||
let mut count = 0;
|
||||
|
||||
let from = rocksdb::IteratorMode::Start;
|
||||
let iterator = self.0.iterator(from)?;
|
||||
|
||||
for (key, value) in iterator {
|
||||
let slice = key.as_ref().try_into().unwrap();
|
||||
let document_id = DocumentAttrKey::from_be_bytes(slice).document_id;
|
||||
|
||||
if Some(document_id) != last_document_id {
|
||||
last_document_id = Some(document_id);
|
||||
count += 1;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(count)
|
||||
}
|
||||
}
|
||||
|
||||
pub struct DocumentFieldsIter<'a>(sled::Iter<'a>);
|
||||
pub struct DocumentFieldsIter<'a>(rocksdb::DBIterator<'a>, Vec<u8>);
|
||||
|
||||
impl<'a> Iterator for DocumentFieldsIter<'a> {
|
||||
type Item = sled::Result<(SchemaAttr, IVec)>;
|
||||
type Item = Result<(SchemaAttr, Box<[u8]>), rocksdb::Error>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
match self.0.next() {
|
||||
Some(Ok((key, value))) => {
|
||||
Some((key, value)) => {
|
||||
|
||||
if key.as_ref() > self.1.as_ref() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let slice: &[u8] = key.as_ref();
|
||||
let array = slice.try_into().unwrap();
|
||||
let key = DocumentAttrKey::from_be_bytes(array);
|
||||
Some(Ok((key.attribute, value)))
|
||||
},
|
||||
Some(Err(e)) => Some(Err(e)),
|
||||
None => None,
|
||||
}
|
||||
}
|
||||
|
@ -7,15 +7,15 @@ pub enum Error {
|
||||
SchemaMissing,
|
||||
WordIndexMissing,
|
||||
MissingDocumentId,
|
||||
SledError(sled::Error),
|
||||
RocksdbError(rocksdb::Error),
|
||||
FstError(fst::Error),
|
||||
BincodeError(bincode::Error),
|
||||
SerializerError(SerializerError),
|
||||
}
|
||||
|
||||
impl From<sled::Error> for Error {
|
||||
fn from(error: sled::Error) -> Error {
|
||||
Error::SledError(error)
|
||||
impl From<rocksdb::Error> for Error {
|
||||
fn from(error: rocksdb::Error) -> Error {
|
||||
Error::RocksdbError(error)
|
||||
}
|
||||
}
|
||||
|
||||
@ -45,7 +45,7 @@ impl fmt::Display for Error {
|
||||
SchemaMissing => write!(f, "this index does not have a schema"),
|
||||
WordIndexMissing => write!(f, "this index does not have a word index"),
|
||||
MissingDocumentId => write!(f, "document id is missing"),
|
||||
SledError(e) => write!(f, "sled error; {}", e),
|
||||
RocksdbError(e) => write!(f, "RocksDB error; {}", e),
|
||||
FstError(e) => write!(f, "fst error; {}", e),
|
||||
BincodeError(e) => write!(f, "bincode error; {}", e),
|
||||
SerializerError(e) => write!(f, "serializer error; {}", e),
|
||||
|
@ -15,6 +15,13 @@ use crate::serde::Deserializer;
|
||||
use super::{Error, CustomSettings};
|
||||
use super::{RawIndex, DocumentsAddition, DocumentsDeletion};
|
||||
|
||||
#[derive(Copy, Clone)]
|
||||
pub struct IndexStats {
|
||||
pub number_of_words: usize,
|
||||
pub number_of_documents: usize,
|
||||
pub number_attrs_in_ranked_map: usize,
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct Index(pub ArcSwap<InnerIndex>);
|
||||
|
||||
@ -48,6 +55,16 @@ impl Index {
|
||||
Ok(index)
|
||||
}
|
||||
|
||||
pub fn stats(&self) -> Result<IndexStats, rocksdb::Error> {
|
||||
let lease = self.0.lease();
|
||||
|
||||
Ok(IndexStats {
|
||||
number_of_words: lease.words.len(),
|
||||
number_of_documents: lease.raw.documents.len()?,
|
||||
number_attrs_in_ranked_map: lease.ranked_map.len(),
|
||||
})
|
||||
}
|
||||
|
||||
pub fn query_builder(&self) -> QueryBuilder<IndexLease> {
|
||||
let lease = IndexLease(self.0.lease());
|
||||
QueryBuilder::new(lease)
|
||||
|
@ -1,16 +1,17 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::database::raw_index::InnerRawIndex;
|
||||
use crate::ranked_map::RankedMap;
|
||||
use crate::schema::Schema;
|
||||
|
||||
use super::Error;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct MainIndex(pub Arc<sled::Tree>);
|
||||
pub struct MainIndex(pub(crate) InnerRawIndex);
|
||||
|
||||
impl MainIndex {
|
||||
pub fn schema(&self) -> Result<Option<Schema>, Error> {
|
||||
match self.0.get("schema")? {
|
||||
match self.0.get_pinned("schema")? {
|
||||
Some(bytes) => {
|
||||
let schema = Schema::read_from_bin(bytes.as_ref())?;
|
||||
Ok(Some(schema))
|
||||
@ -27,10 +28,10 @@ impl MainIndex {
|
||||
}
|
||||
|
||||
pub fn words_set(&self) -> Result<Option<fst::Set>, Error> {
|
||||
match self.0.get("words")? {
|
||||
match self.0.get_pinned("words")? {
|
||||
Some(bytes) => {
|
||||
let len = bytes.len();
|
||||
let value = bytes.into();
|
||||
let value = Arc::from(bytes.as_ref());
|
||||
let fst = fst::raw::Fst::from_shared_bytes(value, 0, len)?;
|
||||
Ok(Some(fst::Set::from(fst)))
|
||||
},
|
||||
@ -39,12 +40,11 @@ impl MainIndex {
|
||||
}
|
||||
|
||||
pub fn set_words_set(&self, value: &fst::Set) -> Result<(), Error> {
|
||||
self.0.set("words", value.as_fst().as_bytes())?;
|
||||
Ok(())
|
||||
self.0.set("words", value.as_fst().as_bytes()).map_err(Into::into)
|
||||
}
|
||||
|
||||
pub fn ranked_map(&self) -> Result<Option<RankedMap>, Error> {
|
||||
match self.0.get("ranked-map")? {
|
||||
match self.0.get_pinned("ranked-map")? {
|
||||
Some(bytes) => {
|
||||
let ranked_map = RankedMap::read_from_bin(bytes.as_ref())?;
|
||||
Ok(Some(ranked_map))
|
||||
|
@ -26,32 +26,39 @@ use self::documents_deletion::DocumentsDeletion;
|
||||
use self::documents_index::DocumentsIndex;
|
||||
use self::index::InnerIndex;
|
||||
use self::main_index::MainIndex;
|
||||
use self::raw_index::RawIndex;
|
||||
use self::raw_index::{RawIndex, InnerRawIndex};
|
||||
use self::words_index::WordsIndex;
|
||||
|
||||
pub struct Database {
|
||||
cache: RwLock<HashMap<String, Arc<Index>>>,
|
||||
inner: sled::Db,
|
||||
inner: Arc<rocksdb::DB>,
|
||||
}
|
||||
|
||||
impl Database {
|
||||
pub fn start_default<P: AsRef<Path>>(path: P) -> Result<Database, Error> {
|
||||
let path = path.as_ref();
|
||||
let cache = RwLock::new(HashMap::new());
|
||||
let config = sled::ConfigBuilder::new().path(path).print_profile_on_drop(true).build();
|
||||
let inner = sled::Db::start(config)?;
|
||||
Ok(Database { cache, inner })
|
||||
}
|
||||
|
||||
pub fn start_with_compression<P: AsRef<Path>>(path: P, factor: i32) -> Result<Database, Error> {
|
||||
let config = sled::ConfigBuilder::default()
|
||||
.use_compression(true)
|
||||
.compression_factor(factor)
|
||||
.path(path)
|
||||
.build();
|
||||
let options = {
|
||||
let mut options = rocksdb::Options::default();
|
||||
options.create_if_missing(true);
|
||||
options
|
||||
};
|
||||
let cfs = rocksdb::DB::list_cf(&options, path).unwrap_or(Vec::new());
|
||||
let inner = Arc::new(rocksdb::DB::open_cf(&options, path, &cfs)?);
|
||||
let database = Database { cache, inner };
|
||||
|
||||
let cache = RwLock::new(HashMap::new());
|
||||
let inner = sled::Db::start(config)?;
|
||||
Ok(Database { cache, inner })
|
||||
let mut indexes: Vec<_> = cfs.iter()
|
||||
.filter_map(|c| c.split('-').nth(0).filter(|&c| c != "default"))
|
||||
.collect();
|
||||
indexes.sort_unstable();
|
||||
indexes.dedup();
|
||||
|
||||
for index in indexes {
|
||||
database.open_index(index)?;
|
||||
}
|
||||
|
||||
Ok(database)
|
||||
}
|
||||
|
||||
pub fn indexes(&self) -> Result<Option<HashSet<String>>, Error> {
|
||||
@ -66,7 +73,7 @@ impl Database {
|
||||
|
||||
fn set_indexes(&self, value: &HashSet<String>) -> Result<(), Error> {
|
||||
let bytes = bincode::serialize(value)?;
|
||||
self.inner.set("indexes", bytes)?;
|
||||
self.inner.put("indexes", bytes)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
@ -89,32 +96,32 @@ impl Database {
|
||||
}
|
||||
|
||||
let main = {
|
||||
let tree = self.inner.open_tree(name)?;
|
||||
MainIndex(tree)
|
||||
self.inner.cf_handle(name).expect("cf not found");
|
||||
MainIndex(InnerRawIndex::new(self.inner.clone(), Arc::from(name)))
|
||||
};
|
||||
|
||||
let words = {
|
||||
let tree_name = format!("{}-words", name);
|
||||
let tree = self.inner.open_tree(tree_name)?;
|
||||
WordsIndex(tree)
|
||||
let cf_name = format!("{}-words", name);
|
||||
self.inner.cf_handle(&cf_name).expect("cf not found");
|
||||
WordsIndex(InnerRawIndex::new(self.inner.clone(), Arc::from(cf_name)))
|
||||
};
|
||||
|
||||
let docs_words = {
|
||||
let tree_name = format!("{}-docs-words", name);
|
||||
let tree = self.inner.open_tree(tree_name)?;
|
||||
DocsWordsIndex(tree)
|
||||
let cf_name = format!("{}-docs-words", name);
|
||||
self.inner.cf_handle(&cf_name).expect("cf not found");
|
||||
DocsWordsIndex(InnerRawIndex::new(self.inner.clone(), Arc::from(cf_name)))
|
||||
};
|
||||
|
||||
let documents = {
|
||||
let tree_name = format!("{}-documents", name);
|
||||
let tree = self.inner.open_tree(tree_name)?;
|
||||
DocumentsIndex(tree)
|
||||
let cf_name = format!("{}-documents", name);
|
||||
self.inner.cf_handle(&cf_name).expect("cf not found");
|
||||
DocumentsIndex(InnerRawIndex::new(self.inner.clone(), Arc::from(cf_name)))
|
||||
};
|
||||
|
||||
let custom = {
|
||||
let tree_name = format!("{}-custom", name);
|
||||
let tree = self.inner.open_tree(tree_name)?;
|
||||
CustomSettings(tree)
|
||||
let cf_name = format!("{}-custom", name);
|
||||
self.inner.cf_handle(&cf_name).expect("cf not found");
|
||||
CustomSettings(InnerRawIndex::new(self.inner.clone(), Arc::from(cf_name)))
|
||||
};
|
||||
|
||||
let raw_index = RawIndex { main, words, docs_words, documents, custom };
|
||||
@ -136,8 +143,8 @@ impl Database {
|
||||
},
|
||||
Entry::Vacant(vacant) => {
|
||||
let main = {
|
||||
let tree = self.inner.open_tree(name)?;
|
||||
MainIndex(tree)
|
||||
self.inner.create_cf(name, &rocksdb::Options::default())?;
|
||||
MainIndex(InnerRawIndex::new(self.inner.clone(), Arc::from(name)))
|
||||
};
|
||||
|
||||
if let Some(prev_schema) = main.schema()? {
|
||||
@ -149,27 +156,27 @@ impl Database {
|
||||
main.set_schema(&schema)?;
|
||||
|
||||
let words = {
|
||||
let tree_name = format!("{}-words", name);
|
||||
let tree = self.inner.open_tree(tree_name)?;
|
||||
WordsIndex(tree)
|
||||
let cf_name = format!("{}-words", name);
|
||||
self.inner.create_cf(&cf_name, &rocksdb::Options::default())?;
|
||||
WordsIndex(InnerRawIndex::new(self.inner.clone(), Arc::from(cf_name)))
|
||||
};
|
||||
|
||||
let docs_words = {
|
||||
let tree_name = format!("{}-docs-words", name);
|
||||
let tree = self.inner.open_tree(tree_name)?;
|
||||
DocsWordsIndex(tree)
|
||||
let cf_name = format!("{}-docs-words", name);
|
||||
self.inner.create_cf(&cf_name, &rocksdb::Options::default())?;
|
||||
DocsWordsIndex(InnerRawIndex::new(self.inner.clone(), Arc::from(cf_name)))
|
||||
};
|
||||
|
||||
let documents = {
|
||||
let tree_name = format!("{}-documents", name);
|
||||
let tree = self.inner.open_tree(tree_name)?;
|
||||
DocumentsIndex(tree)
|
||||
let cf_name = format!("{}-documents", name);
|
||||
self.inner.create_cf(&cf_name, &rocksdb::Options::default())?;
|
||||
DocumentsIndex(InnerRawIndex::new(self.inner.clone(), Arc::from(cf_name)))
|
||||
};
|
||||
|
||||
let custom = {
|
||||
let tree_name = format!("{}-custom", name);
|
||||
let tree = self.inner.open_tree(tree_name)?;
|
||||
CustomSettings(tree)
|
||||
let cf_name = format!("{}-custom", name);
|
||||
self.inner.create_cf(&cf_name, &rocksdb::Options::default())?;
|
||||
CustomSettings(InnerRawIndex::new(self.inner.clone(), Arc::from(cf_name)))
|
||||
};
|
||||
|
||||
let mut indexes = self.indexes()?.unwrap_or_else(HashSet::new);
|
||||
|
@ -1,3 +1,4 @@
|
||||
use std::sync::Arc;
|
||||
use super::{MainIndex, WordsIndex, DocsWordsIndex, DocumentsIndex, CustomSettings};
|
||||
|
||||
#[derive(Clone)]
|
||||
@ -8,3 +9,78 @@ pub struct RawIndex {
|
||||
pub documents: DocumentsIndex,
|
||||
pub custom: CustomSettings,
|
||||
}
|
||||
|
||||
impl RawIndex {
|
||||
pub(crate) fn compact(&self) {
|
||||
self.main.0.compact_range(None::<&[u8]>, None::<&[u8]>);
|
||||
self.words.0.compact_range(None::<&[u8]>, None::<&[u8]>);
|
||||
self.docs_words.0.compact_range(None::<&[u8]>, None::<&[u8]>);
|
||||
self.documents.0.compact_range(None::<&[u8]>, None::<&[u8]>);
|
||||
self.custom.0.compact_range(None::<&[u8]>, None::<&[u8]>);
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct InnerRawIndex {
|
||||
database: Arc<rocksdb::DB>,
|
||||
name: Arc<str>,
|
||||
}
|
||||
|
||||
impl InnerRawIndex {
|
||||
pub fn new(database: Arc<rocksdb::DB>, name: Arc<str>) -> InnerRawIndex {
|
||||
InnerRawIndex { database, name }
|
||||
}
|
||||
|
||||
pub fn get<K>(&self, key: K) -> Result<Option<rocksdb::DBVector>, rocksdb::Error>
|
||||
where K: AsRef<[u8]>,
|
||||
{
|
||||
let cf = self.database.cf_handle(&self.name).expect("cf not found");
|
||||
self.database.get_cf(cf, key)
|
||||
}
|
||||
|
||||
pub fn get_pinned<K>(&self, key: K) -> Result<Option<rocksdb::DBPinnableSlice>, rocksdb::Error>
|
||||
where K: AsRef<[u8]>,
|
||||
{
|
||||
let cf = self.database.cf_handle(&self.name).expect("cf not found");
|
||||
self.database.get_pinned_cf(cf, key)
|
||||
}
|
||||
|
||||
pub fn iterator(&self, from: rocksdb::IteratorMode) -> Result<rocksdb::DBIterator, rocksdb::Error> {
|
||||
let cf = self.database.cf_handle(&self.name).expect("cf not found");
|
||||
self.database.iterator_cf(cf, from)
|
||||
}
|
||||
|
||||
pub fn set<K, V>(&self, key: K, value: V) -> Result<(), rocksdb::Error>
|
||||
where K: AsRef<[u8]>,
|
||||
V: AsRef<[u8]>,
|
||||
{
|
||||
let cf = self.database.cf_handle(&self.name).expect("cf not found");
|
||||
self.database.put_cf(cf, key, value)
|
||||
}
|
||||
|
||||
pub fn delete<K>(&self, key: K) -> Result<(), rocksdb::Error>
|
||||
where K: AsRef<[u8]>
|
||||
{
|
||||
let cf = self.database.cf_handle(&self.name).expect("cf not found");
|
||||
self.database.delete_cf(cf, key)
|
||||
}
|
||||
|
||||
pub fn delete_range<K>(&self, start: K, end: K) -> Result<(), rocksdb::Error>
|
||||
where K: AsRef<[u8]>,
|
||||
{
|
||||
let mut batch = rocksdb::WriteBatch::default();
|
||||
|
||||
let cf = self.database.cf_handle(&self.name).expect("cf not found");
|
||||
batch.delete_range_cf(cf, start, end)?;
|
||||
|
||||
self.database.write(batch)
|
||||
}
|
||||
|
||||
pub fn compact_range<S, E>(&self, start: Option<S>, end: Option<E>)
|
||||
where S: AsRef<[u8]>,
|
||||
E: AsRef<[u8]>,
|
||||
{
|
||||
let cf = self.database.cf_handle(&self.name).expect("cf not found");
|
||||
self.database.compact_range_cf(cf, start, end)
|
||||
}
|
||||
}
|
||||
|
@ -1,32 +1,48 @@
|
||||
use std::sync::Arc;
|
||||
|
||||
use meilidb_core::DocIndex;
|
||||
use sdset::{Set, SetBuf};
|
||||
use zerocopy::{LayoutVerified, AsBytes};
|
||||
|
||||
use crate::database::raw_index::InnerRawIndex;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct WordsIndex(pub Arc<sled::Tree>);
|
||||
pub struct WordsIndex(pub(crate) InnerRawIndex);
|
||||
|
||||
impl WordsIndex {
|
||||
pub fn doc_indexes(&self, word: &[u8]) -> sled::Result<Option<SetBuf<DocIndex>>> {
|
||||
pub fn doc_indexes(&self, word: &[u8]) -> Result<Option<SetBuf<DocIndex>>, rocksdb::Error> {
|
||||
// we must force an allocation to make the memory aligned
|
||||
match self.0.get(word)? {
|
||||
Some(bytes) => {
|
||||
let layout = LayoutVerified::new_slice(bytes.as_ref()).expect("invalid layout");
|
||||
let slice = layout.into_slice();
|
||||
let setbuf = SetBuf::new_unchecked(slice.to_vec());
|
||||
let vec = match LayoutVerified::new_slice(bytes.as_ref()) {
|
||||
Some(layout) => layout.into_slice().to_vec(),
|
||||
None => {
|
||||
let len = bytes.as_ref().len();
|
||||
let count = len / std::mem::size_of::<DocIndex>();
|
||||
let mut buf: Vec<DocIndex> = Vec::with_capacity(count);
|
||||
unsafe {
|
||||
let src = bytes.as_ref().as_ptr();
|
||||
let dst = buf.as_mut_ptr() as *mut u8;
|
||||
std::ptr::copy_nonoverlapping(src, dst, len);
|
||||
buf.set_len(count);
|
||||
}
|
||||
buf
|
||||
}
|
||||
};
|
||||
|
||||
let setbuf = SetBuf::new_unchecked(vec);
|
||||
|
||||
Ok(Some(setbuf))
|
||||
},
|
||||
None => Ok(None),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_doc_indexes(&self, word: &[u8], set: &Set<DocIndex>) -> sled::Result<()> {
|
||||
pub fn set_doc_indexes(&self, word: &[u8], set: &Set<DocIndex>) -> Result<(), rocksdb::Error> {
|
||||
self.0.set(word, set.as_bytes())?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn del_doc_indexes(&self, word: &[u8]) -> sled::Result<()> {
|
||||
self.0.del(word)?;
|
||||
pub fn del_doc_indexes(&self, word: &[u8]) -> Result<(), rocksdb::Error> {
|
||||
self.0.delete(word)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
@ -6,7 +6,7 @@ mod ranked_map;
|
||||
mod serde;
|
||||
pub mod schema;
|
||||
|
||||
pub use sled;
|
||||
pub use rocksdb;
|
||||
pub use self::database::{Database, Index, CustomSettings};
|
||||
pub use self::number::Number;
|
||||
pub use self::ranked_map::RankedMap;
|
||||
|
@ -9,6 +9,10 @@ use crate::{SchemaAttr, Number};
|
||||
pub struct RankedMap(HashMap<(DocumentId, SchemaAttr), Number>);
|
||||
|
||||
impl RankedMap {
|
||||
pub fn len(&self) -> usize {
|
||||
self.0.len()
|
||||
}
|
||||
|
||||
pub fn insert(&mut self, document: DocumentId, attribute: SchemaAttr, number: Number) {
|
||||
self.0.insert((document, attribute), number);
|
||||
}
|
||||
|
@ -36,7 +36,7 @@ use crate::schema::SchemaAttr;
|
||||
pub enum SerializerError {
|
||||
DocumentIdNotFound,
|
||||
RmpError(RmpError),
|
||||
SledError(sled::Error),
|
||||
RocksdbError(rocksdb::Error),
|
||||
ParseNumberError(ParseNumberError),
|
||||
UnserializableType { type_name: &'static str },
|
||||
UnindexableType { type_name: &'static str },
|
||||
@ -57,7 +57,7 @@ impl fmt::Display for SerializerError {
|
||||
write!(f, "serialized document does not have an id according to the schema")
|
||||
}
|
||||
SerializerError::RmpError(e) => write!(f, "rmp serde related error: {}", e),
|
||||
SerializerError::SledError(e) => write!(f, "sled related error: {}", e),
|
||||
SerializerError::RocksdbError(e) => write!(f, "RocksDB related error: {}", e),
|
||||
SerializerError::ParseNumberError(e) => {
|
||||
write!(f, "error while trying to parse a number: {}", e)
|
||||
},
|
||||
@ -89,9 +89,9 @@ impl From<RmpError> for SerializerError {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<sled::Error> for SerializerError {
|
||||
fn from(error: sled::Error) -> SerializerError {
|
||||
SerializerError::SledError(error)
|
||||
impl From<rocksdb::Error> for SerializerError {
|
||||
fn from(error: rocksdb::Error) -> SerializerError {
|
||||
SerializerError::RocksdbError(error)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -59,7 +59,7 @@ fn index(
|
||||
|
||||
let mut system = sysinfo::System::new();
|
||||
|
||||
let index = database.create_index("default", schema.clone())?;
|
||||
let index = database.create_index("test", schema.clone())?;
|
||||
|
||||
let mut rdr = csv::Reader::from_path(csv_data_path)?;
|
||||
let mut raw_record = csv::StringRecord::new();
|
||||
|
@ -143,7 +143,7 @@ fn main() -> Result<(), Box<Error>> {
|
||||
let mut buffer = String::new();
|
||||
let input = io::stdin();
|
||||
|
||||
let index = database.open_index("default")?.unwrap();
|
||||
let index = database.open_index("test")?.unwrap();
|
||||
let schema = index.schema();
|
||||
|
||||
println!("database prepared for you in {:.2?}", start.elapsed());
|
||||
|
Loading…
x
Reference in New Issue
Block a user