From 4c973238a19dded9bc315db0e3e6b78be9c56520 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 23 May 2019 14:47:10 +0200 Subject: [PATCH 1/8] feat: Introduce a basic RocksDB based version --- meilidb-data/Cargo.toml | 6 +- meilidb-data/src/database/custom_settings.rs | 6 +- meilidb-data/src/database/docs_words_index.rs | 13 +-- meilidb-data/src/database/documents_index.rs | 52 +++++++----- meilidb-data/src/database/error.rs | 10 +-- meilidb-data/src/database/main_index.rs | 22 +++-- meilidb-data/src/database/mod.rs | 82 +++++++++---------- meilidb-data/src/database/words_index.rs | 17 ++-- meilidb-data/src/lib.rs | 2 +- meilidb-data/src/serde/mod.rs | 10 +-- meilidb/examples/create-database.rs | 2 +- meilidb/examples/query-database.rs | 2 +- 12 files changed, 119 insertions(+), 105 deletions(-) diff --git a/meilidb-data/Cargo.toml b/meilidb-data/Cargo.toml index 03e6f0074..8c6fe5845 100644 --- a/meilidb-data/Cargo.toml +++ b/meilidb-data/Cargo.toml @@ -16,7 +16,7 @@ ordered-float = { version = "1.0.2", features = ["serde"] } sdset = "0.3.2" serde = { version = "1.0.91", features = ["derive"] } serde_json = { version = "1.0.39", features = ["preserve_order"] } -sled = "0.23.0" +rocksdb = "0.12.2" toml = { version = "0.5.0", features = ["preserve_order"] } zerocopy = "0.2.2" @@ -28,9 +28,5 @@ rev = "40b3d48" git = "https://github.com/Kerollmops/fst.git" branch = "arc-byte-slice" -[features] -default = [] -compression = ["sled/compression"] - [dev-dependencies] tempfile = "3.0.7" diff --git a/meilidb-data/src/database/custom_settings.rs b/meilidb-data/src/database/custom_settings.rs index 565151aaa..b9227d0bb 100644 --- a/meilidb-data/src/database/custom_settings.rs +++ b/meilidb-data/src/database/custom_settings.rs @@ -2,12 +2,12 @@ use std::sync::Arc; use std::ops::Deref; #[derive(Clone)] -pub struct CustomSettings(pub Arc); +pub struct CustomSettings(pub Arc, pub String); impl Deref for CustomSettings { - type Target = sled::Tree; + type Target = rocksdb::DB; - fn deref(&self) -> &sled::Tree { + fn deref(&self) -> &Self::Target { &self.0 } } diff --git a/meilidb-data/src/database/docs_words_index.rs b/meilidb-data/src/database/docs_words_index.rs index 6b7de15a2..38430a9d1 100644 --- a/meilidb-data/src/database/docs_words_index.rs +++ b/meilidb-data/src/database/docs_words_index.rs @@ -3,15 +3,16 @@ use meilidb_core::DocumentId; use super::Error; #[derive(Clone)] -pub struct DocsWordsIndex(pub Arc); +pub struct DocsWordsIndex(pub Arc, pub String); impl DocsWordsIndex { pub fn doc_words(&self, id: DocumentId) -> Result, Error> { let key = id.0.to_be_bytes(); - match self.0.get(key)? { + let cf = self.0.cf_handle(&self.1).unwrap(); + match self.0.get_pinned_cf(cf, key)? { Some(bytes) => { let len = bytes.len(); - let value = bytes.into(); + let value = Arc::from(bytes.as_ref()); let fst = fst::raw::Fst::from_shared_bytes(value, 0, len)?; Ok(Some(fst::Set::from(fst))) }, @@ -21,13 +22,15 @@ impl DocsWordsIndex { pub fn set_doc_words(&self, id: DocumentId, words: &fst::Set) -> Result<(), Error> { let key = id.0.to_be_bytes(); - self.0.set(key, words.as_fst().as_bytes())?; + let cf = self.0.cf_handle(&self.1).unwrap(); + self.0.put_cf(cf, key, words.as_fst().as_bytes())?; Ok(()) } pub fn del_doc_words(&self, id: DocumentId) -> Result<(), Error> { let key = id.0.to_be_bytes(); - self.0.del(key)?; + let cf = self.0.cf_handle(&self.1).unwrap(); + self.0.delete_cf(cf, key)?; Ok(()) } } diff --git a/meilidb-data/src/database/documents_index.rs b/meilidb-data/src/database/documents_index.rs index 5fd276bb3..1326a480d 100644 --- a/meilidb-data/src/database/documents_index.rs +++ b/meilidb-data/src/database/documents_index.rs @@ -2,69 +2,77 @@ use std::sync::Arc; use std::convert::TryInto; use meilidb_core::DocumentId; -use sled::IVec; +use rocksdb::DBVector; use crate::document_attr_key::DocumentAttrKey; use crate::schema::SchemaAttr; #[derive(Clone)] -pub struct DocumentsIndex(pub Arc); +pub struct DocumentsIndex(pub Arc, pub String); impl DocumentsIndex { - pub fn document_field(&self, id: DocumentId, attr: SchemaAttr) -> sled::Result> { + pub fn document_field(&self, id: DocumentId, attr: SchemaAttr) -> Result, rocksdb::Error> { let key = DocumentAttrKey::new(id, attr).to_be_bytes(); - self.0.get(key) + let cf = self.0.cf_handle(&self.1).unwrap(); + self.0.get_cf(cf, key) } - pub fn set_document_field(&self, id: DocumentId, attr: SchemaAttr, value: Vec) -> sled::Result<()> { + pub fn set_document_field(&self, id: DocumentId, attr: SchemaAttr, value: Vec) -> Result<(), rocksdb::Error> { let key = DocumentAttrKey::new(id, attr).to_be_bytes(); - self.0.set(key, value)?; + let cf = self.0.cf_handle(&self.1).unwrap(); + self.0.put_cf(cf, key, value)?; Ok(()) } - pub fn del_document_field(&self, id: DocumentId, attr: SchemaAttr) -> sled::Result<()> { + pub fn del_document_field(&self, id: DocumentId, attr: SchemaAttr) -> Result<(), rocksdb::Error> { let key = DocumentAttrKey::new(id, attr).to_be_bytes(); - self.0.del(key)?; + let cf = self.0.cf_handle(&self.1).unwrap(); + self.0.delete_cf(cf, key)?; Ok(()) } - pub fn del_all_document_fields(&self, id: DocumentId) -> sled::Result<()> { + pub fn del_all_document_fields(&self, id: DocumentId) -> Result<(), rocksdb::Error> { let start = DocumentAttrKey::new(id, SchemaAttr::min()).to_be_bytes(); let end = DocumentAttrKey::new(id, SchemaAttr::max()).to_be_bytes(); - let document_attrs = self.0.range(start..=end).keys(); - for key in document_attrs { - self.0.del(key?)?; - } + let cf = self.0.cf_handle(&self.1).unwrap(); + let mut batch = rocksdb::WriteBatch::default(); + batch.delete_range_cf(cf, start, end)?; + self.0.write(batch)?; Ok(()) } pub fn document_fields(&self, id: DocumentId) -> DocumentFieldsIter { - let start = DocumentAttrKey::new(id, SchemaAttr::min()); - let start = start.to_be_bytes(); + let start = DocumentAttrKey::new(id, SchemaAttr::min()).to_be_bytes(); + let end = DocumentAttrKey::new(id, SchemaAttr::max()).to_be_bytes(); - let end = DocumentAttrKey::new(id, SchemaAttr::max()); - let end = end.to_be_bytes(); + let cf = self.0.cf_handle(&self.1).unwrap(); + let from = rocksdb::IteratorMode::From(&start[..], rocksdb::Direction::Forward); + let iter = self.0.iterator_cf(cf, from).unwrap(); - DocumentFieldsIter(self.0.range(start..=end)) + DocumentFieldsIter(iter, end.to_vec()) } } -pub struct DocumentFieldsIter<'a>(sled::Iter<'a>); +pub struct DocumentFieldsIter<'a>(rocksdb::DBIterator<'a>, Vec); impl<'a> Iterator for DocumentFieldsIter<'a> { - type Item = sled::Result<(SchemaAttr, IVec)>; + type Item = Result<(SchemaAttr, Box<[u8]>), rocksdb::Error>; fn next(&mut self) -> Option { match self.0.next() { - Some(Ok((key, value))) => { + Some((key, value)) => { + + if key.as_ref() > self.1.as_ref() { + return None; + } + let slice: &[u8] = key.as_ref(); let array = slice.try_into().unwrap(); let key = DocumentAttrKey::from_be_bytes(array); Some(Ok((key.attribute, value))) }, - Some(Err(e)) => Some(Err(e)), None => None, } } diff --git a/meilidb-data/src/database/error.rs b/meilidb-data/src/database/error.rs index 3e1b48235..99b90e056 100644 --- a/meilidb-data/src/database/error.rs +++ b/meilidb-data/src/database/error.rs @@ -7,15 +7,15 @@ pub enum Error { SchemaMissing, WordIndexMissing, MissingDocumentId, - SledError(sled::Error), + RocksdbError(rocksdb::Error), FstError(fst::Error), BincodeError(bincode::Error), SerializerError(SerializerError), } -impl From for Error { - fn from(error: sled::Error) -> Error { - Error::SledError(error) +impl From for Error { + fn from(error: rocksdb::Error) -> Error { + Error::RocksdbError(error) } } @@ -45,7 +45,7 @@ impl fmt::Display for Error { SchemaMissing => write!(f, "this index does not have a schema"), WordIndexMissing => write!(f, "this index does not have a word index"), MissingDocumentId => write!(f, "document id is missing"), - SledError(e) => write!(f, "sled error; {}", e), + RocksdbError(e) => write!(f, "RocksDB error; {}", e), FstError(e) => write!(f, "fst error; {}", e), BincodeError(e) => write!(f, "bincode error; {}", e), SerializerError(e) => write!(f, "serializer error; {}", e), diff --git a/meilidb-data/src/database/main_index.rs b/meilidb-data/src/database/main_index.rs index b1d8edc81..251dd78e7 100644 --- a/meilidb-data/src/database/main_index.rs +++ b/meilidb-data/src/database/main_index.rs @@ -6,11 +6,12 @@ use crate::schema::Schema; use super::Error; #[derive(Clone)] -pub struct MainIndex(pub Arc); +pub struct MainIndex(pub Arc, pub String); impl MainIndex { pub fn schema(&self) -> Result, Error> { - match self.0.get("schema")? { + let cf = self.0.cf_handle(&self.1).unwrap(); + match self.0.get_cf(cf, "schema")? { Some(bytes) => { let schema = Schema::read_from_bin(bytes.as_ref())?; Ok(Some(schema)) @@ -22,15 +23,17 @@ impl MainIndex { pub fn set_schema(&self, schema: &Schema) -> Result<(), Error> { let mut bytes = Vec::new(); schema.write_to_bin(&mut bytes)?; - self.0.set("schema", bytes)?; + let cf = self.0.cf_handle(&self.1).unwrap(); + self.0.put_cf(cf, "schema", bytes)?; Ok(()) } pub fn words_set(&self) -> Result, Error> { - match self.0.get("words")? { + let cf = self.0.cf_handle(&self.1).unwrap(); + match self.0.get_pinned_cf(cf, "words")? { Some(bytes) => { let len = bytes.len(); - let value = bytes.into(); + let value = Arc::from(bytes.as_ref()); let fst = fst::raw::Fst::from_shared_bytes(value, 0, len)?; Ok(Some(fst::Set::from(fst))) }, @@ -39,12 +42,14 @@ impl MainIndex { } pub fn set_words_set(&self, value: &fst::Set) -> Result<(), Error> { - self.0.set("words", value.as_fst().as_bytes())?; + let cf = self.0.cf_handle(&self.1).unwrap(); + self.0.put_cf(cf, "words", value.as_fst().as_bytes())?; Ok(()) } pub fn ranked_map(&self) -> Result, Error> { - match self.0.get("ranked-map")? { + let cf = self.0.cf_handle(&self.1).unwrap(); + match self.0.get_cf(cf, "ranked-map")? { Some(bytes) => { let ranked_map = RankedMap::read_from_bin(bytes.as_ref())?; Ok(Some(ranked_map)) @@ -56,7 +61,8 @@ impl MainIndex { pub fn set_ranked_map(&self, value: &RankedMap) -> Result<(), Error> { let mut bytes = Vec::new(); value.write_to_bin(&mut bytes)?; - self.0.set("ranked_map", bytes)?; + let cf = self.0.cf_handle(&self.1).unwrap(); + self.0.put_cf(cf, "ranked_map", bytes)?; Ok(()) } } diff --git a/meilidb-data/src/database/mod.rs b/meilidb-data/src/database/mod.rs index 9e14f8168..0231a6d2f 100644 --- a/meilidb-data/src/database/mod.rs +++ b/meilidb-data/src/database/mod.rs @@ -31,26 +31,24 @@ use self::words_index::WordsIndex; pub struct Database { cache: RwLock>>, - inner: sled::Db, + inner: Arc, } impl Database { pub fn start_default>(path: P) -> Result { + let path = path.as_ref(); let cache = RwLock::new(HashMap::new()); - let config = sled::ConfigBuilder::new().path(path).print_profile_on_drop(true).build(); - let inner = sled::Db::start(config)?; - Ok(Database { cache, inner }) - } - pub fn start_with_compression>(path: P, factor: i32) -> Result { - let config = sled::ConfigBuilder::default() - .use_compression(true) - .compression_factor(factor) - .path(path) - .build(); + let inner = { + let options = { + let mut options = rocksdb::Options::default(); + options.create_if_missing(true); + options + }; + let cfs = rocksdb::DB::list_cf(&options, path).unwrap_or(Vec::new()); + Arc::new(rocksdb::DB::open_cf(&options, path, cfs)?) + }; - let cache = RwLock::new(HashMap::new()); - let inner = sled::Db::start(config)?; Ok(Database { cache, inner }) } @@ -66,7 +64,7 @@ impl Database { fn set_indexes(&self, value: &HashSet) -> Result<(), Error> { let bytes = bincode::serialize(value)?; - self.inner.set("indexes", bytes)?; + self.inner.put("indexes", bytes)?; Ok(()) } @@ -89,32 +87,32 @@ impl Database { } let main = { - let tree = self.inner.open_tree(name)?; - MainIndex(tree) + self.inner.cf_handle(name).expect("cf not found"); + MainIndex(self.inner.clone(), name.to_owned()) }; let words = { - let tree_name = format!("{}-words", name); - let tree = self.inner.open_tree(tree_name)?; - WordsIndex(tree) + let cf_name = format!("{}-words", name); + self.inner.cf_handle(&cf_name).expect("cf not found"); + WordsIndex(self.inner.clone(), cf_name) }; let docs_words = { - let tree_name = format!("{}-docs-words", name); - let tree = self.inner.open_tree(tree_name)?; - DocsWordsIndex(tree) + let cf_name = format!("{}-docs-words", name); + self.inner.cf_handle(&cf_name).expect("cf not found"); + DocsWordsIndex(self.inner.clone(), cf_name) }; let documents = { - let tree_name = format!("{}-documents", name); - let tree = self.inner.open_tree(tree_name)?; - DocumentsIndex(tree) + let cf_name = format!("{}-documents", name); + self.inner.cf_handle(&cf_name).expect("cf not found"); + DocumentsIndex(self.inner.clone(), cf_name) }; let custom = { - let tree_name = format!("{}-custom", name); - let tree = self.inner.open_tree(tree_name)?; - CustomSettings(tree) + let cf_name = format!("{}-custom", name); + self.inner.cf_handle(&cf_name).expect("cf not found"); + CustomSettings(self.inner.clone(), cf_name) }; let raw_index = RawIndex { main, words, docs_words, documents, custom }; @@ -136,8 +134,8 @@ impl Database { }, Entry::Vacant(vacant) => { let main = { - let tree = self.inner.open_tree(name)?; - MainIndex(tree) + self.inner.create_cf(name, &rocksdb::Options::default())?; + MainIndex(self.inner.clone(), name.to_owned()) }; if let Some(prev_schema) = main.schema()? { @@ -149,27 +147,27 @@ impl Database { main.set_schema(&schema)?; let words = { - let tree_name = format!("{}-words", name); - let tree = self.inner.open_tree(tree_name)?; - WordsIndex(tree) + let cf_name = format!("{}-words", name); + self.inner.create_cf(&cf_name, &rocksdb::Options::default())?; + WordsIndex(self.inner.clone(), cf_name) }; let docs_words = { - let tree_name = format!("{}-docs-words", name); - let tree = self.inner.open_tree(tree_name)?; - DocsWordsIndex(tree) + let cf_name = format!("{}-docs-words", name); + self.inner.create_cf(&cf_name, &rocksdb::Options::default())?; + DocsWordsIndex(self.inner.clone(), cf_name) }; let documents = { - let tree_name = format!("{}-documents", name); - let tree = self.inner.open_tree(tree_name)?; - DocumentsIndex(tree) + let cf_name = format!("{}-documents", name); + self.inner.create_cf(&cf_name, &rocksdb::Options::default())?; + DocumentsIndex(self.inner.clone(), cf_name) }; let custom = { - let tree_name = format!("{}-custom", name); - let tree = self.inner.open_tree(tree_name)?; - CustomSettings(tree) + let cf_name = format!("{}-custom", name); + self.inner.create_cf(&cf_name, &rocksdb::Options::default())?; + CustomSettings(self.inner.clone(), cf_name) }; let mut indexes = self.indexes()?.unwrap_or_else(HashSet::new); diff --git a/meilidb-data/src/database/words_index.rs b/meilidb-data/src/database/words_index.rs index 3b2598186..862a918c4 100644 --- a/meilidb-data/src/database/words_index.rs +++ b/meilidb-data/src/database/words_index.rs @@ -5,11 +5,12 @@ use sdset::{Set, SetBuf}; use zerocopy::{LayoutVerified, AsBytes}; #[derive(Clone)] -pub struct WordsIndex(pub Arc); +pub struct WordsIndex(pub Arc, pub String); impl WordsIndex { - pub fn doc_indexes(&self, word: &[u8]) -> sled::Result>> { - match self.0.get(word)? { + pub fn doc_indexes(&self, word: &[u8]) -> Result>, rocksdb::Error> { + let cf = self.0.cf_handle(&self.1).unwrap(); + match self.0.get_cf(cf, word)? { Some(bytes) => { let layout = LayoutVerified::new_slice(bytes.as_ref()).expect("invalid layout"); let slice = layout.into_slice(); @@ -20,13 +21,15 @@ impl WordsIndex { } } - pub fn set_doc_indexes(&self, word: &[u8], set: &Set) -> sled::Result<()> { - self.0.set(word, set.as_bytes())?; + pub fn set_doc_indexes(&self, word: &[u8], set: &Set) -> Result<(), rocksdb::Error> { + let cf = self.0.cf_handle(&self.1).unwrap(); + self.0.put_cf(cf, word, set.as_bytes())?; Ok(()) } - pub fn del_doc_indexes(&self, word: &[u8]) -> sled::Result<()> { - self.0.del(word)?; + pub fn del_doc_indexes(&self, word: &[u8]) -> Result<(), rocksdb::Error> { + let cf = self.0.cf_handle(&self.1).unwrap(); + self.0.delete_cf(cf, word)?; Ok(()) } } diff --git a/meilidb-data/src/lib.rs b/meilidb-data/src/lib.rs index 79cc3a3e9..520a3e6e2 100644 --- a/meilidb-data/src/lib.rs +++ b/meilidb-data/src/lib.rs @@ -6,7 +6,7 @@ mod ranked_map; mod serde; pub mod schema; -pub use sled; +pub use rocksdb; pub use self::database::{Database, Index, CustomSettings}; pub use self::number::Number; pub use self::ranked_map::RankedMap; diff --git a/meilidb-data/src/serde/mod.rs b/meilidb-data/src/serde/mod.rs index 1e2854c36..0fe5918cc 100644 --- a/meilidb-data/src/serde/mod.rs +++ b/meilidb-data/src/serde/mod.rs @@ -36,7 +36,7 @@ use crate::schema::SchemaAttr; pub enum SerializerError { DocumentIdNotFound, RmpError(RmpError), - SledError(sled::Error), + RocksdbError(rocksdb::Error), ParseNumberError(ParseNumberError), UnserializableType { type_name: &'static str }, UnindexableType { type_name: &'static str }, @@ -57,7 +57,7 @@ impl fmt::Display for SerializerError { write!(f, "serialized document does not have an id according to the schema") } SerializerError::RmpError(e) => write!(f, "rmp serde related error: {}", e), - SerializerError::SledError(e) => write!(f, "sled related error: {}", e), + SerializerError::RocksdbError(e) => write!(f, "RocksDB related error: {}", e), SerializerError::ParseNumberError(e) => { write!(f, "error while trying to parse a number: {}", e) }, @@ -89,9 +89,9 @@ impl From for SerializerError { } } -impl From for SerializerError { - fn from(error: sled::Error) -> SerializerError { - SerializerError::SledError(error) +impl From for SerializerError { + fn from(error: rocksdb::Error) -> SerializerError { + SerializerError::RocksdbError(error) } } diff --git a/meilidb/examples/create-database.rs b/meilidb/examples/create-database.rs index f19c32a31..1518b4298 100644 --- a/meilidb/examples/create-database.rs +++ b/meilidb/examples/create-database.rs @@ -59,7 +59,7 @@ fn index( let mut system = sysinfo::System::new(); - let index = database.create_index("default", schema.clone())?; + let index = database.create_index("test", schema.clone())?; let mut rdr = csv::Reader::from_path(csv_data_path)?; let mut raw_record = csv::StringRecord::new(); diff --git a/meilidb/examples/query-database.rs b/meilidb/examples/query-database.rs index 1be27dce1..4fd529c27 100644 --- a/meilidb/examples/query-database.rs +++ b/meilidb/examples/query-database.rs @@ -143,7 +143,7 @@ fn main() -> Result<(), Box> { let mut buffer = String::new(); let input = io::stdin(); - let index = database.open_index("default")?.unwrap(); + let index = database.open_index("test")?.unwrap(); let schema = index.schema(); println!("database prepared for you in {:.2?}", start.elapsed()); From ce61c16dbe578212ec788b629d2383825fa79a73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 23 May 2019 15:35:53 +0200 Subject: [PATCH 2/8] feat: Disable all the default RocksDB compression features --- meilidb-data/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/meilidb-data/Cargo.toml b/meilidb-data/Cargo.toml index 8c6fe5845..88c917017 100644 --- a/meilidb-data/Cargo.toml +++ b/meilidb-data/Cargo.toml @@ -16,7 +16,7 @@ ordered-float = { version = "1.0.2", features = ["serde"] } sdset = "0.3.2" serde = { version = "1.0.91", features = ["derive"] } serde_json = { version = "1.0.39", features = ["preserve_order"] } -rocksdb = "0.12.2" +rocksdb = { version = "0.12.2", default-features = false } toml = { version = "0.5.0", features = ["preserve_order"] } zerocopy = "0.2.2" From 6f258f71d590a257a2713dd447b82dad627a6328 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 23 May 2019 15:36:28 +0200 Subject: [PATCH 3/8] feat: Implement some convenient accessors for custom settings --- meilidb-data/src/database/custom_settings.rs | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/meilidb-data/src/database/custom_settings.rs b/meilidb-data/src/database/custom_settings.rs index b9227d0bb..112d1b327 100644 --- a/meilidb-data/src/database/custom_settings.rs +++ b/meilidb-data/src/database/custom_settings.rs @@ -1,13 +1,22 @@ use std::sync::Arc; -use std::ops::Deref; +use rocksdb::DBVector; #[derive(Clone)] pub struct CustomSettings(pub Arc, pub String); -impl Deref for CustomSettings { - type Target = rocksdb::DB; +impl CustomSettings { + pub fn set(&self, key: K, value: V) -> Result<(), rocksdb::Error> + where K: AsRef<[u8]>, + V: AsRef<[u8]>, + { + let cf = self.0.cf_handle(&self.1).unwrap(); + self.0.put_cf(cf, key, value) + } - fn deref(&self) -> &Self::Target { - &self.0 + pub fn get(&self, key: K) -> Result, rocksdb::Error> + where K: AsRef<[u8]>, + { + let cf = self.0.cf_handle(&self.1).unwrap(); + self.0.get_cf(cf, key) } } From 9fca74443ebe35c59d24b6b47a75c17c7c6fe7df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Fri, 24 May 2019 14:26:05 +0200 Subject: [PATCH 4/8] feat: Wrap the database index access to improve usability --- meilidb-data/src/database/custom_settings.rs | 10 ++-- meilidb-data/src/database/docs_words_index.rs | 14 ++--- meilidb-data/src/database/documents_index.rs | 23 +++----- meilidb-data/src/database/main_index.rs | 22 +++---- meilidb-data/src/database/mod.rs | 22 +++---- meilidb-data/src/database/raw_index.rs | 58 +++++++++++++++++++ meilidb-data/src/database/words_index.rs | 16 +++-- 7 files changed, 102 insertions(+), 63 deletions(-) diff --git a/meilidb-data/src/database/custom_settings.rs b/meilidb-data/src/database/custom_settings.rs index 112d1b327..7649d2b36 100644 --- a/meilidb-data/src/database/custom_settings.rs +++ b/meilidb-data/src/database/custom_settings.rs @@ -1,22 +1,20 @@ -use std::sync::Arc; use rocksdb::DBVector; +use crate::database::raw_index::InnerRawIndex; #[derive(Clone)] -pub struct CustomSettings(pub Arc, pub String); +pub struct CustomSettings(pub(crate) InnerRawIndex); impl CustomSettings { pub fn set(&self, key: K, value: V) -> Result<(), rocksdb::Error> where K: AsRef<[u8]>, V: AsRef<[u8]>, { - let cf = self.0.cf_handle(&self.1).unwrap(); - self.0.put_cf(cf, key, value) + self.0.set(key, value) } pub fn get(&self, key: K) -> Result, rocksdb::Error> where K: AsRef<[u8]>, { - let cf = self.0.cf_handle(&self.1).unwrap(); - self.0.get_cf(cf, key) + self.0.get(key) } } diff --git a/meilidb-data/src/database/docs_words_index.rs b/meilidb-data/src/database/docs_words_index.rs index 38430a9d1..f4af69ee8 100644 --- a/meilidb-data/src/database/docs_words_index.rs +++ b/meilidb-data/src/database/docs_words_index.rs @@ -1,15 +1,17 @@ use std::sync::Arc; + use meilidb_core::DocumentId; + +use crate::database::raw_index::InnerRawIndex; use super::Error; #[derive(Clone)] -pub struct DocsWordsIndex(pub Arc, pub String); +pub struct DocsWordsIndex(pub(crate) InnerRawIndex); impl DocsWordsIndex { pub fn doc_words(&self, id: DocumentId) -> Result, Error> { let key = id.0.to_be_bytes(); - let cf = self.0.cf_handle(&self.1).unwrap(); - match self.0.get_pinned_cf(cf, key)? { + match self.0.get_pinned(key)? { Some(bytes) => { let len = bytes.len(); let value = Arc::from(bytes.as_ref()); @@ -22,15 +24,13 @@ impl DocsWordsIndex { pub fn set_doc_words(&self, id: DocumentId, words: &fst::Set) -> Result<(), Error> { let key = id.0.to_be_bytes(); - let cf = self.0.cf_handle(&self.1).unwrap(); - self.0.put_cf(cf, key, words.as_fst().as_bytes())?; + self.0.set(key, words.as_fst().as_bytes())?; Ok(()) } pub fn del_doc_words(&self, id: DocumentId) -> Result<(), Error> { let key = id.0.to_be_bytes(); - let cf = self.0.cf_handle(&self.1).unwrap(); - self.0.delete_cf(cf, key)?; + self.0.delete(key)?; Ok(()) } } diff --git a/meilidb-data/src/database/documents_index.rs b/meilidb-data/src/database/documents_index.rs index 1326a480d..706fa9d84 100644 --- a/meilidb-data/src/database/documents_index.rs +++ b/meilidb-data/src/database/documents_index.rs @@ -1,45 +1,37 @@ -use std::sync::Arc; use std::convert::TryInto; use meilidb_core::DocumentId; use rocksdb::DBVector; +use crate::database::raw_index::InnerRawIndex; use crate::document_attr_key::DocumentAttrKey; use crate::schema::SchemaAttr; #[derive(Clone)] -pub struct DocumentsIndex(pub Arc, pub String); +pub struct DocumentsIndex(pub(crate) InnerRawIndex); impl DocumentsIndex { pub fn document_field(&self, id: DocumentId, attr: SchemaAttr) -> Result, rocksdb::Error> { let key = DocumentAttrKey::new(id, attr).to_be_bytes(); - let cf = self.0.cf_handle(&self.1).unwrap(); - self.0.get_cf(cf, key) + self.0.get(key) } pub fn set_document_field(&self, id: DocumentId, attr: SchemaAttr, value: Vec) -> Result<(), rocksdb::Error> { let key = DocumentAttrKey::new(id, attr).to_be_bytes(); - let cf = self.0.cf_handle(&self.1).unwrap(); - self.0.put_cf(cf, key, value)?; + self.0.set(key, value)?; Ok(()) } pub fn del_document_field(&self, id: DocumentId, attr: SchemaAttr) -> Result<(), rocksdb::Error> { let key = DocumentAttrKey::new(id, attr).to_be_bytes(); - let cf = self.0.cf_handle(&self.1).unwrap(); - self.0.delete_cf(cf, key)?; + self.0.delete(key)?; Ok(()) } pub fn del_all_document_fields(&self, id: DocumentId) -> Result<(), rocksdb::Error> { let start = DocumentAttrKey::new(id, SchemaAttr::min()).to_be_bytes(); let end = DocumentAttrKey::new(id, SchemaAttr::max()).to_be_bytes(); - - let cf = self.0.cf_handle(&self.1).unwrap(); - let mut batch = rocksdb::WriteBatch::default(); - batch.delete_range_cf(cf, start, end)?; - self.0.write(batch)?; - + self.0.delete_range(start, end)?; Ok(()) } @@ -47,9 +39,8 @@ impl DocumentsIndex { let start = DocumentAttrKey::new(id, SchemaAttr::min()).to_be_bytes(); let end = DocumentAttrKey::new(id, SchemaAttr::max()).to_be_bytes(); - let cf = self.0.cf_handle(&self.1).unwrap(); let from = rocksdb::IteratorMode::From(&start[..], rocksdb::Direction::Forward); - let iter = self.0.iterator_cf(cf, from).unwrap(); + let iter = self.0.iterator(from).unwrap(); DocumentFieldsIter(iter, end.to_vec()) } diff --git a/meilidb-data/src/database/main_index.rs b/meilidb-data/src/database/main_index.rs index 251dd78e7..4625450c5 100644 --- a/meilidb-data/src/database/main_index.rs +++ b/meilidb-data/src/database/main_index.rs @@ -1,17 +1,17 @@ use std::sync::Arc; +use crate::database::raw_index::InnerRawIndex; use crate::ranked_map::RankedMap; use crate::schema::Schema; use super::Error; #[derive(Clone)] -pub struct MainIndex(pub Arc, pub String); +pub struct MainIndex(pub(crate) InnerRawIndex); impl MainIndex { pub fn schema(&self) -> Result, Error> { - let cf = self.0.cf_handle(&self.1).unwrap(); - match self.0.get_cf(cf, "schema")? { + match self.0.get_pinned("schema")? { Some(bytes) => { let schema = Schema::read_from_bin(bytes.as_ref())?; Ok(Some(schema)) @@ -23,14 +23,12 @@ impl MainIndex { pub fn set_schema(&self, schema: &Schema) -> Result<(), Error> { let mut bytes = Vec::new(); schema.write_to_bin(&mut bytes)?; - let cf = self.0.cf_handle(&self.1).unwrap(); - self.0.put_cf(cf, "schema", bytes)?; + self.0.set("schema", bytes)?; Ok(()) } pub fn words_set(&self) -> Result, Error> { - let cf = self.0.cf_handle(&self.1).unwrap(); - match self.0.get_pinned_cf(cf, "words")? { + match self.0.get_pinned("words")? { Some(bytes) => { let len = bytes.len(); let value = Arc::from(bytes.as_ref()); @@ -42,14 +40,11 @@ impl MainIndex { } pub fn set_words_set(&self, value: &fst::Set) -> Result<(), Error> { - let cf = self.0.cf_handle(&self.1).unwrap(); - self.0.put_cf(cf, "words", value.as_fst().as_bytes())?; - Ok(()) + self.0.set("words", value.as_fst().as_bytes()).map_err(Into::into) } pub fn ranked_map(&self) -> Result, Error> { - let cf = self.0.cf_handle(&self.1).unwrap(); - match self.0.get_cf(cf, "ranked-map")? { + match self.0.get_pinned("ranked-map")? { Some(bytes) => { let ranked_map = RankedMap::read_from_bin(bytes.as_ref())?; Ok(Some(ranked_map)) @@ -61,8 +56,7 @@ impl MainIndex { pub fn set_ranked_map(&self, value: &RankedMap) -> Result<(), Error> { let mut bytes = Vec::new(); value.write_to_bin(&mut bytes)?; - let cf = self.0.cf_handle(&self.1).unwrap(); - self.0.put_cf(cf, "ranked_map", bytes)?; + self.0.set("ranked_map", bytes)?; Ok(()) } } diff --git a/meilidb-data/src/database/mod.rs b/meilidb-data/src/database/mod.rs index 0231a6d2f..c2c38d62b 100644 --- a/meilidb-data/src/database/mod.rs +++ b/meilidb-data/src/database/mod.rs @@ -26,7 +26,7 @@ use self::documents_deletion::DocumentsDeletion; use self::documents_index::DocumentsIndex; use self::index::InnerIndex; use self::main_index::MainIndex; -use self::raw_index::RawIndex; +use self::raw_index::{RawIndex, InnerRawIndex}; use self::words_index::WordsIndex; pub struct Database { @@ -88,31 +88,31 @@ impl Database { let main = { self.inner.cf_handle(name).expect("cf not found"); - MainIndex(self.inner.clone(), name.to_owned()) + MainIndex(InnerRawIndex::new(self.inner.clone(), Arc::from(name))) }; let words = { let cf_name = format!("{}-words", name); self.inner.cf_handle(&cf_name).expect("cf not found"); - WordsIndex(self.inner.clone(), cf_name) + WordsIndex(InnerRawIndex::new(self.inner.clone(), Arc::from(cf_name))) }; let docs_words = { let cf_name = format!("{}-docs-words", name); self.inner.cf_handle(&cf_name).expect("cf not found"); - DocsWordsIndex(self.inner.clone(), cf_name) + DocsWordsIndex(InnerRawIndex::new(self.inner.clone(), Arc::from(cf_name))) }; let documents = { let cf_name = format!("{}-documents", name); self.inner.cf_handle(&cf_name).expect("cf not found"); - DocumentsIndex(self.inner.clone(), cf_name) + DocumentsIndex(InnerRawIndex::new(self.inner.clone(), Arc::from(cf_name))) }; let custom = { let cf_name = format!("{}-custom", name); self.inner.cf_handle(&cf_name).expect("cf not found"); - CustomSettings(self.inner.clone(), cf_name) + CustomSettings(InnerRawIndex::new(self.inner.clone(), Arc::from(cf_name))) }; let raw_index = RawIndex { main, words, docs_words, documents, custom }; @@ -135,7 +135,7 @@ impl Database { Entry::Vacant(vacant) => { let main = { self.inner.create_cf(name, &rocksdb::Options::default())?; - MainIndex(self.inner.clone(), name.to_owned()) + MainIndex(InnerRawIndex::new(self.inner.clone(), Arc::from(name))) }; if let Some(prev_schema) = main.schema()? { @@ -149,25 +149,25 @@ impl Database { let words = { let cf_name = format!("{}-words", name); self.inner.create_cf(&cf_name, &rocksdb::Options::default())?; - WordsIndex(self.inner.clone(), cf_name) + WordsIndex(InnerRawIndex::new(self.inner.clone(), Arc::from(cf_name))) }; let docs_words = { let cf_name = format!("{}-docs-words", name); self.inner.create_cf(&cf_name, &rocksdb::Options::default())?; - DocsWordsIndex(self.inner.clone(), cf_name) + DocsWordsIndex(InnerRawIndex::new(self.inner.clone(), Arc::from(cf_name))) }; let documents = { let cf_name = format!("{}-documents", name); self.inner.create_cf(&cf_name, &rocksdb::Options::default())?; - DocumentsIndex(self.inner.clone(), cf_name) + DocumentsIndex(InnerRawIndex::new(self.inner.clone(), Arc::from(cf_name))) }; let custom = { let cf_name = format!("{}-custom", name); self.inner.create_cf(&cf_name, &rocksdb::Options::default())?; - CustomSettings(self.inner.clone(), cf_name) + CustomSettings(InnerRawIndex::new(self.inner.clone(), Arc::from(cf_name))) }; let mut indexes = self.indexes()?.unwrap_or_else(HashSet::new); diff --git a/meilidb-data/src/database/raw_index.rs b/meilidb-data/src/database/raw_index.rs index ada0fd357..793203a8d 100644 --- a/meilidb-data/src/database/raw_index.rs +++ b/meilidb-data/src/database/raw_index.rs @@ -1,3 +1,4 @@ +use std::sync::Arc; use super::{MainIndex, WordsIndex, DocsWordsIndex, DocumentsIndex, CustomSettings}; #[derive(Clone)] @@ -8,3 +9,60 @@ pub struct RawIndex { pub documents: DocumentsIndex, pub custom: CustomSettings, } + +#[derive(Clone)] +pub struct InnerRawIndex { + database: Arc, + name: Arc, +} + +impl InnerRawIndex { + pub fn new(database: Arc, name: Arc) -> InnerRawIndex { + InnerRawIndex { database, name } + } + + pub fn get(&self, key: K) -> Result, rocksdb::Error> + where K: AsRef<[u8]>, + { + let cf = self.database.cf_handle(&self.name).expect("cf not found"); + self.database.get_cf(cf, key) + } + + pub fn get_pinned(&self, key: K) -> Result, rocksdb::Error> + where K: AsRef<[u8]>, + { + let cf = self.database.cf_handle(&self.name).expect("cf not found"); + self.database.get_pinned_cf(cf, key) + } + + pub fn iterator(&self, from: rocksdb::IteratorMode) -> Result { + let cf = self.database.cf_handle(&self.name).expect("cf not found"); + self.database.iterator_cf(cf, from) + } + + pub fn set(&self, key: K, value: V) -> Result<(), rocksdb::Error> + where K: AsRef<[u8]>, + V: AsRef<[u8]>, + { + let cf = self.database.cf_handle(&self.name).expect("cf not found"); + self.database.put_cf(cf, key, value) + } + + pub fn delete(&self, key: K) -> Result<(), rocksdb::Error> + where K: AsRef<[u8]> + { + let cf = self.database.cf_handle(&self.name).expect("cf not found"); + self.database.delete_cf(cf, key) + } + + pub fn delete_range(&self, start: K, end: K) -> Result<(), rocksdb::Error> + where K: AsRef<[u8]>, + { + let mut batch = rocksdb::WriteBatch::default(); + + let cf = self.database.cf_handle(&self.name).expect("cf not found"); + batch.delete_range_cf(cf, start, end)?; + + self.database.write(batch) + } +} diff --git a/meilidb-data/src/database/words_index.rs b/meilidb-data/src/database/words_index.rs index 862a918c4..432a294e5 100644 --- a/meilidb-data/src/database/words_index.rs +++ b/meilidb-data/src/database/words_index.rs @@ -1,16 +1,16 @@ -use std::sync::Arc; - use meilidb_core::DocIndex; use sdset::{Set, SetBuf}; use zerocopy::{LayoutVerified, AsBytes}; +use crate::database::raw_index::InnerRawIndex; + #[derive(Clone)] -pub struct WordsIndex(pub Arc, pub String); +pub struct WordsIndex(pub(crate) InnerRawIndex); impl WordsIndex { pub fn doc_indexes(&self, word: &[u8]) -> Result>, rocksdb::Error> { - let cf = self.0.cf_handle(&self.1).unwrap(); - match self.0.get_cf(cf, word)? { + // we must force an allocation to make the memory aligned + match self.0.get(word)? { Some(bytes) => { let layout = LayoutVerified::new_slice(bytes.as_ref()).expect("invalid layout"); let slice = layout.into_slice(); @@ -22,14 +22,12 @@ impl WordsIndex { } pub fn set_doc_indexes(&self, word: &[u8], set: &Set) -> Result<(), rocksdb::Error> { - let cf = self.0.cf_handle(&self.1).unwrap(); - self.0.put_cf(cf, word, set.as_bytes())?; + self.0.set(word, set.as_bytes())?; Ok(()) } pub fn del_doc_indexes(&self, word: &[u8]) -> Result<(), rocksdb::Error> { - let cf = self.0.cf_handle(&self.1).unwrap(); - self.0.delete_cf(cf, word)?; + self.0.delete(word)?; Ok(()) } } From a147c09b068a1948f0be17594d8d4c4df6d0ef73 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Fri, 24 May 2019 14:37:04 +0200 Subject: [PATCH 5/8] feat: Make more functions accessible on the custom settings --- meilidb-data/src/database/custom_settings.rs | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/meilidb-data/src/database/custom_settings.rs b/meilidb-data/src/database/custom_settings.rs index 7649d2b36..46653bfb0 100644 --- a/meilidb-data/src/database/custom_settings.rs +++ b/meilidb-data/src/database/custom_settings.rs @@ -1,20 +1,13 @@ -use rocksdb::DBVector; +use std::ops::Deref; use crate::database::raw_index::InnerRawIndex; #[derive(Clone)] pub struct CustomSettings(pub(crate) InnerRawIndex); -impl CustomSettings { - pub fn set(&self, key: K, value: V) -> Result<(), rocksdb::Error> - where K: AsRef<[u8]>, - V: AsRef<[u8]>, - { - self.0.set(key, value) - } +impl Deref for CustomSettings { + type Target = InnerRawIndex; - pub fn get(&self, key: K) -> Result, rocksdb::Error> - where K: AsRef<[u8]>, - { - self.0.get(key) + fn deref(&self) -> &Self::Target { + &self.0 } } From e08edc2d6bea07f3827b1a412bc0b0ebd899bd0d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sat, 25 May 2019 12:12:24 +0200 Subject: [PATCH 6/8] feat: Introduce some stats to ease debugging --- meilidb-data/src/database/documents_index.rs | 20 ++++++++++++++++++++ meilidb-data/src/database/index.rs | 17 +++++++++++++++++ meilidb-data/src/ranked_map.rs | 4 ++++ 3 files changed, 41 insertions(+) diff --git a/meilidb-data/src/database/documents_index.rs b/meilidb-data/src/database/documents_index.rs index 706fa9d84..60420a8a9 100644 --- a/meilidb-data/src/database/documents_index.rs +++ b/meilidb-data/src/database/documents_index.rs @@ -44,6 +44,26 @@ impl DocumentsIndex { DocumentFieldsIter(iter, end.to_vec()) } + + pub fn len(&self) -> Result { + let mut last_document_id = None; + let mut count = 0; + + let from = rocksdb::IteratorMode::Start; + let iterator = self.0.iterator(from)?; + + for (key, value) in iterator { + let slice = key.as_ref().try_into().unwrap(); + let document_id = DocumentAttrKey::from_be_bytes(slice).document_id; + + if Some(document_id) != last_document_id { + last_document_id = Some(document_id); + count += 1; + } + } + + Ok(count) + } } pub struct DocumentFieldsIter<'a>(rocksdb::DBIterator<'a>, Vec); diff --git a/meilidb-data/src/database/index.rs b/meilidb-data/src/database/index.rs index 5db72e085..fce37ace6 100644 --- a/meilidb-data/src/database/index.rs +++ b/meilidb-data/src/database/index.rs @@ -15,6 +15,13 @@ use crate::serde::Deserializer; use super::{Error, CustomSettings}; use super::{RawIndex, DocumentsAddition, DocumentsDeletion}; +#[derive(Copy, Clone)] +pub struct IndexStats { + pub number_of_words: usize, + pub number_of_documents: usize, + pub number_attrs_in_ranked_map: usize, +} + #[derive(Clone)] pub struct Index(pub ArcSwap); @@ -48,6 +55,16 @@ impl Index { Ok(index) } + pub fn stats(&self) -> Result { + let lease = self.0.lease(); + + Ok(IndexStats { + number_of_words: lease.words.len(), + number_of_documents: lease.raw.documents.len()?, + number_attrs_in_ranked_map: lease.ranked_map.len(), + }) + } + pub fn query_builder(&self) -> QueryBuilder { let lease = IndexLease(self.0.lease()); QueryBuilder::new(lease) diff --git a/meilidb-data/src/ranked_map.rs b/meilidb-data/src/ranked_map.rs index 7c10a0649..609cee954 100644 --- a/meilidb-data/src/ranked_map.rs +++ b/meilidb-data/src/ranked_map.rs @@ -9,6 +9,10 @@ use crate::{SchemaAttr, Number}; pub struct RankedMap(HashMap<(DocumentId, SchemaAttr), Number>); impl RankedMap { + pub fn len(&self) -> usize { + self.0.len() + } + pub fn insert(&mut self, document: DocumentId, attribute: SchemaAttr, number: Number) { self.0.insert((document, attribute), number); } From 62c8f1ba04b4c511648e149eb4061f96851c0607 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sun, 26 May 2019 11:36:47 +0200 Subject: [PATCH 7/8] feat: Fix the index opening when index already exists --- meilidb-data/src/database/mod.rs | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/meilidb-data/src/database/mod.rs b/meilidb-data/src/database/mod.rs index c2c38d62b..3c66db026 100644 --- a/meilidb-data/src/database/mod.rs +++ b/meilidb-data/src/database/mod.rs @@ -39,17 +39,26 @@ impl Database { let path = path.as_ref(); let cache = RwLock::new(HashMap::new()); - let inner = { - let options = { - let mut options = rocksdb::Options::default(); - options.create_if_missing(true); - options - }; - let cfs = rocksdb::DB::list_cf(&options, path).unwrap_or(Vec::new()); - Arc::new(rocksdb::DB::open_cf(&options, path, cfs)?) + let options = { + let mut options = rocksdb::Options::default(); + options.create_if_missing(true); + options }; + let cfs = rocksdb::DB::list_cf(&options, path).unwrap_or(Vec::new()); + let inner = Arc::new(rocksdb::DB::open_cf(&options, path, &cfs)?); + let database = Database { cache, inner }; - Ok(Database { cache, inner }) + let mut indexes: Vec<_> = cfs.iter() + .filter_map(|c| c.split('-').nth(0).filter(|&c| c != "default")) + .collect(); + indexes.sort_unstable(); + indexes.dedup(); + + for index in indexes { + database.open_index(index)?; + } + + Ok(database) } pub fn indexes(&self) -> Result>, Error> { From 07f447c45743522ab3cc99b727c001fbe1d1dcc4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Tue, 28 May 2019 17:38:59 +0200 Subject: [PATCH 8/8] feat: Force RocksDB compaction --- .../src/database/documents_addition.rs | 1 + .../src/database/documents_deletion.rs | 1 + meilidb-data/src/database/raw_index.rs | 18 ++++++++++++++++ meilidb-data/src/database/words_index.rs | 21 ++++++++++++++++--- 4 files changed, 38 insertions(+), 3 deletions(-) diff --git a/meilidb-data/src/database/documents_addition.rs b/meilidb-data/src/database/documents_addition.rs index 9f0794346..177d1975c 100644 --- a/meilidb-data/src/database/documents_addition.rs +++ b/meilidb-data/src/database/documents_addition.rs @@ -122,6 +122,7 @@ impl<'a> DocumentsAddition<'a> { let ranked_map = self.ranked_map; let schema = lease_inner.schema.clone(); let raw = lease_inner.raw.clone(); + lease_inner.raw.compact(); let inner = InnerIndex { words, schema, ranked_map, raw }; self.inner.0.store(Arc::new(inner)); diff --git a/meilidb-data/src/database/documents_deletion.rs b/meilidb-data/src/database/documents_deletion.rs index 46b423c7a..e89923199 100644 --- a/meilidb-data/src/database/documents_deletion.rs +++ b/meilidb-data/src/database/documents_deletion.rs @@ -121,6 +121,7 @@ impl<'a> DocumentsDeletion<'a> { let ranked_map = lease_inner.ranked_map.clone(); let schema = lease_inner.schema.clone(); let raw = lease_inner.raw.clone(); + lease_inner.raw.compact(); let inner = InnerIndex { words, schema, ranked_map, raw }; self.inner.0.store(Arc::new(inner)); diff --git a/meilidb-data/src/database/raw_index.rs b/meilidb-data/src/database/raw_index.rs index 793203a8d..8c129ac2d 100644 --- a/meilidb-data/src/database/raw_index.rs +++ b/meilidb-data/src/database/raw_index.rs @@ -10,6 +10,16 @@ pub struct RawIndex { pub custom: CustomSettings, } +impl RawIndex { + pub(crate) fn compact(&self) { + self.main.0.compact_range(None::<&[u8]>, None::<&[u8]>); + self.words.0.compact_range(None::<&[u8]>, None::<&[u8]>); + self.docs_words.0.compact_range(None::<&[u8]>, None::<&[u8]>); + self.documents.0.compact_range(None::<&[u8]>, None::<&[u8]>); + self.custom.0.compact_range(None::<&[u8]>, None::<&[u8]>); + } +} + #[derive(Clone)] pub struct InnerRawIndex { database: Arc, @@ -65,4 +75,12 @@ impl InnerRawIndex { self.database.write(batch) } + + pub fn compact_range(&self, start: Option, end: Option) + where S: AsRef<[u8]>, + E: AsRef<[u8]>, + { + let cf = self.database.cf_handle(&self.name).expect("cf not found"); + self.database.compact_range_cf(cf, start, end) + } } diff --git a/meilidb-data/src/database/words_index.rs b/meilidb-data/src/database/words_index.rs index 432a294e5..4f2163650 100644 --- a/meilidb-data/src/database/words_index.rs +++ b/meilidb-data/src/database/words_index.rs @@ -12,9 +12,24 @@ impl WordsIndex { // we must force an allocation to make the memory aligned match self.0.get(word)? { Some(bytes) => { - let layout = LayoutVerified::new_slice(bytes.as_ref()).expect("invalid layout"); - let slice = layout.into_slice(); - let setbuf = SetBuf::new_unchecked(slice.to_vec()); + let vec = match LayoutVerified::new_slice(bytes.as_ref()) { + Some(layout) => layout.into_slice().to_vec(), + None => { + let len = bytes.as_ref().len(); + let count = len / std::mem::size_of::(); + let mut buf: Vec = Vec::with_capacity(count); + unsafe { + let src = bytes.as_ref().as_ptr(); + let dst = buf.as_mut_ptr() as *mut u8; + std::ptr::copy_nonoverlapping(src, dst, len); + buf.set_len(count); + } + buf + } + }; + + let setbuf = SetBuf::new_unchecked(vec); + Ok(Some(setbuf)) }, None => Ok(None),