From 700d625c9385ef2aa007b595d645628358da172d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sun, 2 Dec 2018 16:45:17 +0100 Subject: [PATCH] feat: Introduce the Database and DatabaseView --- src/database/mod.rs | 391 ++++++++++++++++++++++++++++++++++++++++++++ src/index/schema.rs | 4 + src/lib.rs | 1 + 3 files changed, 396 insertions(+) create mode 100644 src/database/mod.rs diff --git a/src/database/mod.rs b/src/database/mod.rs new file mode 100644 index 000000000..961b98ab2 --- /dev/null +++ b/src/database/mod.rs @@ -0,0 +1,391 @@ +use std::io::{Cursor, Write}; +use std::{fmt, marker}; +use std::error::Error; +use std::path::Path; + +use rocksdb::rocksdb::{DB, Snapshot, DBVector}; +use rocksdb::rocksdb_options::ReadOptions; +use byteorder::{NetworkEndian, WriteBytesExt}; +use serde::de::{DeserializeOwned, Visitor}; +use serde::de::value::MapDeserializer; + +use crate::index::schema::{Schema, SchemaAttr}; +use crate::blob::positive::PositiveBlob; +use crate::index::update::Update; +use crate::DocumentId; + +const DATA_INDEX: &[u8] = b"data-index"; +const DATA_SCHEMA: &[u8] = b"data-schema"; + +const DOC_KEY_LEN: usize = 4 + std::mem::size_of::(); +const DOC_KEY_ATTR_LEN: usize = DOC_KEY_LEN + 1 + std::mem::size_of::(); + +// FIXME Do not panic! +fn retrieve_data_schema(snapshot: &Snapshot<&DB>) -> Result> { + match snapshot.get(DATA_SCHEMA)? { + Some(vector) => Ok(Schema::read_from(&*vector)?), + None => panic!("BUG: no schema found in the database"), + } +} + +fn retrieve_data_index(snapshot: &Snapshot<&DB>) -> Result> { + match snapshot.get(DATA_INDEX)? { + Some(vector) => Ok(bincode::deserialize(&*vector)?), + None => Ok(PositiveBlob::default()), + } +} + +fn retrieve_document_attribute( + snapshot: &Snapshot<&DB>, + id: DocumentId, + attr: SchemaAttr +) -> Result, Box> +{ + let attribute_key = document_key_attr(id, attr); + Ok(snapshot.get(&attribute_key)?) +} + +fn document_key(id: DocumentId) -> [u8; DOC_KEY_LEN] { + let mut key = [0; DOC_KEY_LEN]; + + let mut wtr = Cursor::new(&mut key[..]); + wtr.write_all(b"doc-").unwrap(); + wtr.write_u64::(id).unwrap(); + + key +} + +fn document_key_attr(id: DocumentId, attr: SchemaAttr) -> [u8; DOC_KEY_ATTR_LEN] { + let mut key = [0; DOC_KEY_ATTR_LEN]; + let raw_key = document_key(id); + + let mut wtr = Cursor::new(&mut key[..]); + wtr.write_all(&raw_key).unwrap(); + wtr.write_all(b"-").unwrap(); + wtr.write_u32::(attr.as_u32()).unwrap(); + + key +} + +pub struct Database(DB); + +impl Database { + pub fn create(path: &Path) -> Result { + unimplemented!() + } + + pub fn open(path: &Path) -> Result { + unimplemented!() + } + + pub fn ingest_update_file(&self, update: Update) -> Result<(), ()> { + unimplemented!() + } + + pub fn view(&self) -> Result> { + let snapshot = self.0.snapshot(); + DatabaseView::new(snapshot) + } +} + +pub struct DatabaseView<'a> { + snapshot: Snapshot<&'a DB>, + schema: Schema, +} + +impl<'a> DatabaseView<'a> { + pub fn new(snapshot: Snapshot<&'a DB>) -> Result> { + let schema = retrieve_data_schema(&snapshot)?; + Ok(DatabaseView { snapshot, schema }) + } + + pub fn into_snapshot(self) -> Snapshot<&'a DB> { + self.snapshot + } + + // TODO create an enum error type + pub fn retrieve_document(&self, id: DocumentId) -> Result> + where D: DeserializeOwned + { + let mut deserializer = Deserializer::new(&self.snapshot, &self.schema, id); + Ok(D::deserialize(&mut deserializer)?) + } + + pub fn retrieve_documents(&self, ids: I) -> DocumentIter + where D: DeserializeOwned, + I: IntoIterator, + { + DocumentIter { + database_view: self, + document_ids: ids.into_iter(), + _phantom: marker::PhantomData, + } + } +} + +// TODO impl ExactSizeIterator, DoubleEndedIterator +pub struct DocumentIter<'a, D, I> { + database_view: &'a DatabaseView<'a>, + document_ids: I, + _phantom: marker::PhantomData, +} + +impl<'a, D, I> Iterator for DocumentIter<'a, D, I> +where D: DeserializeOwned, + I: Iterator, +{ + type Item = Result>; + + fn next(&mut self) -> Option { + match self.document_ids.next() { + Some(id) => Some(self.database_view.retrieve_document(id)), + None => None + } + } +} + +struct Deserializer<'a> { + snapshot: &'a Snapshot<&'a DB>, + schema: &'a Schema, + document_id: DocumentId, +} + +impl<'a> Deserializer<'a> { + fn new(snapshot: &'a Snapshot<&DB>, schema: &'a Schema, doc: DocumentId) -> Self { + Deserializer { snapshot, schema, document_id: doc } + } +} + +impl<'de, 'a, 'b> serde::de::Deserializer<'de> for &'b mut Deserializer<'a> { + type Error = DeserializerError; + + fn deserialize_any(self, visitor: V) -> Result + where V: Visitor<'de> + { + unimplemented!() + } + + fn deserialize_bool(self, visitor: V) -> Result + where V: Visitor<'de> + { + unimplemented!() + } + + fn deserialize_i8(self, visitor: V) -> Result + where V: Visitor<'de> + { + unimplemented!() + } + + fn deserialize_i16(self, visitor: V) -> Result + where V: Visitor<'de> + { + unimplemented!() + } + + fn deserialize_i32(self, visitor: V) -> Result + where V: Visitor<'de> + { + unimplemented!() + } + + fn deserialize_i64(self, visitor: V) -> Result + where V: Visitor<'de> + { + unimplemented!() + } + + fn deserialize_u8(self, visitor: V) -> Result + where V: Visitor<'de> + { + unimplemented!() + } + + fn deserialize_u16(self, visitor: V) -> Result + where V: Visitor<'de> + { + unimplemented!() + } + + fn deserialize_u32(self, visitor: V) -> Result + where V: Visitor<'de> + { + unimplemented!() + } + + fn deserialize_u64(self, visitor: V) -> Result + where V: Visitor<'de> + { + unimplemented!() + } + + fn deserialize_f32(self, visitor: V) -> Result + where V: Visitor<'de> + { + unimplemented!() + } + + fn deserialize_f64(self, visitor: V) -> Result + where V: Visitor<'de> + { + unimplemented!() + } + + fn deserialize_char(self, visitor: V) -> Result + where V: Visitor<'de> + { + unimplemented!() + } + + fn deserialize_str(self, visitor: V) -> Result + where V: Visitor<'de> + { + unimplemented!() + } + + fn deserialize_string(self, visitor: V) -> Result + where V: Visitor<'de> + { + unimplemented!() + } + + fn deserialize_bytes(self, visitor: V) -> Result + where V: Visitor<'de> + { + unimplemented!() + } + + fn deserialize_byte_buf(self, visitor: V) -> Result + where V: Visitor<'de> + { + unimplemented!() + } + + fn deserialize_option(self, visitor: V) -> Result + where V: Visitor<'de> + { + unimplemented!() + } + + fn deserialize_unit(self, visitor: V) -> Result + where V: Visitor<'de> + { + unimplemented!() + } + + fn deserialize_unit_struct( + self, + name: &'static str, + visitor: V + ) -> Result + where V: Visitor<'de> + { + unimplemented!() + } + + fn deserialize_newtype_struct( + self, + name: &'static str, + visitor: V + ) -> Result + where V: Visitor<'de> + { + unimplemented!() + } + + fn deserialize_seq(self, visitor: V) -> Result + where V: Visitor<'de> + { + unimplemented!() + } + + fn deserialize_tuple( + self, + len: usize, + visitor: V + ) -> Result + where V: Visitor<'de> + { + unimplemented!() + } + + fn deserialize_tuple_struct( + self, + name: &'static str, + len: usize, + visitor: V + ) -> Result + where V: Visitor<'de> + { + unimplemented!() + } + + fn deserialize_map(self, visitor: V) -> Result + where V: Visitor<'de> + { + unimplemented!() + } + + fn deserialize_struct( + self, + name: &'static str, + fields: &'static [&'static str], + visitor: V + ) -> Result + where V: Visitor<'de> + { + let mut options = ReadOptions::new(); + options.set_iterate_lower_bound(&document_key(self.document_id)); + options.set_iterate_upper_bound(&document_key(self.document_id + 1)); + + let mut db_iter = self.snapshot.iter_opt(options); + let iter = db_iter.map(|(key, value)| ("hello", "ok")); + + // Create the DocumentKey and DocumentKeyAttr types + // to help create and parse document keys attributes... + unimplemented!(); + + let map_deserializer = MapDeserializer::new(iter); + visitor.visit_map(map_deserializer) + } + + fn deserialize_enum( + self, + name: &'static str, + variants: &'static [&'static str], + visitor: V + ) -> Result + where V: Visitor<'de> + { + unimplemented!() + } + + fn deserialize_identifier(self, visitor: V) -> Result + where V: Visitor<'de>, + { + unimplemented!() + } + + fn deserialize_ignored_any(self, visitor: V) -> Result + where V: Visitor<'de> + { + unimplemented!() + } +} + +#[derive(Debug)] +struct DeserializerError; + +impl serde::de::Error for DeserializerError { + fn custom(msg: T) -> Self { + unimplemented!() + } +} + +impl fmt::Display for DeserializerError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + unimplemented!() + } +} + +impl Error for DeserializerError {} diff --git a/src/index/schema.rs b/src/index/schema.rs index 426b4df8d..286d48398 100644 --- a/src/index/schema.rs +++ b/src/index/schema.rs @@ -108,6 +108,10 @@ impl Schema { pub fn attribute>(&self, name: S) -> Option { self.attrs.get(name.as_ref()).cloned() } + + pub fn attribute_name(&self, attr: SchemaAttr) -> &str { + unimplemented!("cannot retrieve the attribute name by its attribute number") + } } #[derive(Debug, Copy, Clone, PartialOrd, Ord, PartialEq, Eq)] diff --git a/src/lib.rs b/src/lib.rs index 96a3f5d2f..9c8f84c8a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,6 +3,7 @@ pub mod automaton; pub mod blob; +pub mod database; pub mod data; pub mod retrieve; pub mod index;