From ea0ee070ef4fb2b228603ea9382317e4c53a0cd5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Fri, 19 Apr 2019 15:50:53 +0200 Subject: [PATCH] feat: Introduce the Serializer Which will serialize documents fields as message pack in the kv-store --- meilidb-data/src/database.rs | 24 +- meilidb-data/src/indexer.rs | 7 + meilidb-data/src/serde/extract_string.rs | 146 +++++++++++++ meilidb-data/src/serde/mod.rs | 70 ++++++ meilidb-data/src/serde/serializer.rs | 266 +++++++++++++++++++++++ 5 files changed, 508 insertions(+), 5 deletions(-) create mode 100644 meilidb-data/src/serde/extract_string.rs create mode 100644 meilidb-data/src/serde/serializer.rs diff --git a/meilidb-data/src/database.rs b/meilidb-data/src/database.rs index fea4d84d1..ef67227bd 100644 --- a/meilidb-data/src/database.rs +++ b/meilidb-data/src/database.rs @@ -19,6 +19,7 @@ use sled::IVec; use crate::{Schema, SchemaAttr, RankedMap}; use crate::serde::Deserializer; +use crate::indexer::Indexer; #[derive(Debug)] pub enum Error { @@ -240,7 +241,7 @@ impl RawIndex { id: DocumentId, attr: SchemaAttr, value: V, - ) -> Result, Error> + ) -> Result, sled::Error> where IVec: From, { let key = document_key(id, attr); @@ -251,7 +252,7 @@ impl RawIndex { &self, id: DocumentId, attr: SchemaAttr - ) -> Result, Error> + ) -> Result, sled::Error> { let key = document_key(id, attr); Ok(self.inner.get(key)?) @@ -267,7 +268,7 @@ impl RawIndex { &self, id: DocumentId, attr: SchemaAttr - ) -> Result, Error> + ) -> Result, sled::Error> { let key = document_key(id, attr); Ok(self.inner.del(key)?) @@ -358,10 +359,23 @@ impl Index { } } -pub struct DocumentsAddition(RawIndex); +pub struct DocumentsAddition { + inner: RawIndex, + indexer: Indexer, +} impl DocumentsAddition { pub fn from_raw(inner: RawIndex) -> DocumentsAddition { + DocumentsAddition { inner, indexer: Indexer::new() } + } + + pub fn update_document(&mut self, document: D) -> Result<(), Error> + where D: serde::Serialize, + { + unimplemented!() + } + + pub fn finalize(self) -> sled::Result<()> { unimplemented!() } } @@ -380,7 +394,7 @@ impl DocumentsDeletion { self.documents.push(id); } - pub fn commit(mut self) -> Result<(), Error> { + pub fn finalize(mut self) -> Result<(), Error> { self.documents.sort_unstable(); self.documents.dedup(); diff --git a/meilidb-data/src/indexer.rs b/meilidb-data/src/indexer.rs index 82a4ae156..a1be35a93 100644 --- a/meilidb-data/src/indexer.rs +++ b/meilidb-data/src/indexer.rs @@ -23,6 +23,13 @@ impl Indexer { } } + pub fn with_word_limit(limit: usize) -> Indexer { + Indexer { + word_limit: limit, + indexed: BTreeMap::new(), + } + } + pub fn index_text(&mut self, id: DocumentId, attr: SchemaAttr, text: &str) { for token in Tokenizer::new(text) { if token.word_index >= self.word_limit { break } diff --git a/meilidb-data/src/serde/extract_string.rs b/meilidb-data/src/serde/extract_string.rs new file mode 100644 index 000000000..9c3ef25e5 --- /dev/null +++ b/meilidb-data/src/serde/extract_string.rs @@ -0,0 +1,146 @@ +use serde::Serialize; +use serde::ser; + +use super::SerializerError; + +pub struct ExtractString; + +impl ser::Serializer for ExtractString { + type Ok = String; + type Error = SerializerError; + type SerializeSeq = ser::Impossible; + type SerializeTuple = ser::Impossible; + type SerializeTupleStruct = ser::Impossible; + type SerializeTupleVariant = ser::Impossible; + type SerializeMap = ser::Impossible; + type SerializeStruct = ser::Impossible; + type SerializeStructVariant = ser::Impossible; + + forward_to_unserializable_type! { + bool => serialize_bool, + char => serialize_char, + + i8 => serialize_i8, + i16 => serialize_i16, + i32 => serialize_i32, + i64 => serialize_i64, + + u8 => serialize_u8, + u16 => serialize_u16, + u32 => serialize_u32, + u64 => serialize_u64, + + f32 => serialize_f32, + f64 => serialize_f64, + } + + fn serialize_str(self, value: &str) -> Result { + Ok(value.to_string()) + } + + fn serialize_bytes(self, _v: &[u8]) -> Result { + Err(SerializerError::UnserializableType { name: "&[u8]" }) + } + + fn serialize_none(self) -> Result { + Err(SerializerError::UnserializableType { name: "Option" }) + } + + fn serialize_some(self, _value: &T) -> Result + where T: Serialize, + { + Err(SerializerError::UnserializableType { name: "Option" }) + } + + fn serialize_unit(self) -> Result { + Err(SerializerError::UnserializableType { name: "()" }) + } + + fn serialize_unit_struct(self, _name: &'static str) -> Result { + Err(SerializerError::UnserializableType { name: "unit struct" }) + } + + fn serialize_unit_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str + ) -> Result + { + Err(SerializerError::UnserializableType { name: "unit variant" }) + } + + fn serialize_newtype_struct( + self, + _name: &'static str, + value: &T + ) -> Result + where T: Serialize, + { + value.serialize(self) + } + + fn serialize_newtype_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _value: &T + ) -> Result + where T: Serialize, + { + Err(SerializerError::UnserializableType { name: "newtype variant" }) + } + + fn serialize_seq(self, _len: Option) -> Result { + Err(SerializerError::UnserializableType { name: "sequence" }) + } + + fn serialize_tuple(self, _len: usize) -> Result { + Err(SerializerError::UnserializableType { name: "tuple" }) + } + + fn serialize_tuple_struct( + self, + _name: &'static str, + _len: usize + ) -> Result + { + Err(SerializerError::UnserializableType { name: "tuple struct" }) + } + + fn serialize_tuple_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _len: usize + ) -> Result + { + Err(SerializerError::UnserializableType { name: "tuple variant" }) + } + + fn serialize_map(self, _len: Option) -> Result { + Err(SerializerError::UnserializableType { name: "map" }) + } + + fn serialize_struct( + self, + _name: &'static str, + _len: usize + ) -> Result + { + Err(SerializerError::UnserializableType { name: "struct" }) + } + + fn serialize_struct_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _len: usize + ) -> Result + { + Err(SerializerError::UnserializableType { name: "struct variant" }) + } +} diff --git a/meilidb-data/src/serde/mod.rs b/meilidb-data/src/serde/mod.rs index beb7660e9..284c970cf 100644 --- a/meilidb-data/src/serde/mod.rs +++ b/meilidb-data/src/serde/mod.rs @@ -1,3 +1,73 @@ +macro_rules! forward_to_unserializable_type { + ($($ty:ident => $se_method:ident,)*) => { + $( + fn $se_method(self, _v: $ty) -> Result { + Err(SerializerError::UnserializableType { name: "$ty" }) + } + )* + } +} + mod deserializer; +mod serializer; +mod extract_string; pub use self::deserializer::Deserializer; +pub use self::serializer::Serializer; +pub use self::extract_string::ExtractString; + +use std::{fmt, error::Error}; +use rmp_serde::encode::Error as RmpError; +use serde::ser; + +#[derive(Debug)] +pub enum SerializerError { + DocumentIdNotFound, + RmpError(RmpError), + SledError(sled::Error), + UnserializableType { name: &'static str }, + Custom(String), +} + +impl ser::Error for SerializerError { + fn custom(msg: T) -> Self { + SerializerError::Custom(msg.to_string()) + } +} + +impl fmt::Display for SerializerError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + SerializerError::DocumentIdNotFound => { + write!(f, "serialized document does not have an id according to the schema") + } + SerializerError::RmpError(e) => write!(f, "rmp serde related error: {}", e), + SerializerError::SledError(e) => write!(f, "sled related error: {}", e), + SerializerError::UnserializableType { name } => { + write!(f, "Only struct and map types are considered valid documents and + can be serialized, not {} types directly.", name) + }, + SerializerError::Custom(s) => f.write_str(&s), + } + } +} + +impl Error for SerializerError {} + +impl From for SerializerError { + fn from(value: String) -> SerializerError { + SerializerError::Custom(value) + } +} + +impl From for SerializerError { + fn from(error: RmpError) -> SerializerError { + SerializerError::RmpError(error) + } +} + +impl From for SerializerError { + fn from(error: sled::Error) -> SerializerError { + SerializerError::SledError(error) + } +} diff --git a/meilidb-data/src/serde/serializer.rs b/meilidb-data/src/serde/serializer.rs new file mode 100644 index 000000000..7a5808cfd --- /dev/null +++ b/meilidb-data/src/serde/serializer.rs @@ -0,0 +1,266 @@ +use std::collections::{HashSet, HashMap}; +use std::fmt; +use std::error::Error; + +use meilidb_core::DocumentId; +use serde::{de, ser}; + +use crate::schema::Schema; +use crate::database::RawIndex; +use super::{SerializerError, ExtractString}; + +pub struct Serializer<'a> { + pub schema: &'a Schema, + pub index: &'a RawIndex, + pub document_id: DocumentId, +} + +impl<'a> ser::Serializer for Serializer<'a> { + type Ok = (); + type Error = SerializerError; + type SerializeSeq = ser::Impossible; + type SerializeTuple = ser::Impossible; + type SerializeTupleStruct = ser::Impossible; + type SerializeTupleVariant = ser::Impossible; + type SerializeMap = MapSerializer<'a>; + type SerializeStruct = StructSerializer<'a>; + type SerializeStructVariant = ser::Impossible; + + forward_to_unserializable_type! { + bool => serialize_bool, + char => serialize_char, + + i8 => serialize_i8, + i16 => serialize_i16, + i32 => serialize_i32, + i64 => serialize_i64, + + u8 => serialize_u8, + u16 => serialize_u16, + u32 => serialize_u32, + u64 => serialize_u64, + + f32 => serialize_f32, + f64 => serialize_f64, + } + + fn serialize_str(self, _v: &str) -> Result { + Err(SerializerError::UnserializableType { name: "str" }) + } + + fn serialize_bytes(self, _v: &[u8]) -> Result { + Err(SerializerError::UnserializableType { name: "&[u8]" }) + } + + fn serialize_none(self) -> Result { + Err(SerializerError::UnserializableType { name: "Option" }) + } + + fn serialize_some(self, _value: &T) -> Result + where T: ser::Serialize, + { + Err(SerializerError::UnserializableType { name: "Option" }) + } + + fn serialize_unit(self) -> Result { + Err(SerializerError::UnserializableType { name: "()" }) + } + + fn serialize_unit_struct(self, _name: &'static str) -> Result { + Err(SerializerError::UnserializableType { name: "unit struct" }) + } + + fn serialize_unit_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str + ) -> Result + { + Err(SerializerError::UnserializableType { name: "unit variant" }) + } + + fn serialize_newtype_struct( + self, + _name: &'static str, + value: &T + ) -> Result + where T: ser::Serialize, + { + value.serialize(self) + } + + fn serialize_newtype_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _value: &T + ) -> Result + where T: ser::Serialize, + { + Err(SerializerError::UnserializableType { name: "newtype variant" }) + } + + fn serialize_seq(self, _len: Option) -> Result { + Err(SerializerError::UnserializableType { name: "sequence" }) + } + + fn serialize_tuple(self, _len: usize) -> Result { + Err(SerializerError::UnserializableType { name: "tuple" }) + } + + fn serialize_tuple_struct( + self, + _name: &'static str, + _len: usize + ) -> Result + { + Err(SerializerError::UnserializableType { name: "tuple struct" }) + } + + fn serialize_tuple_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _len: usize + ) -> Result + { + Err(SerializerError::UnserializableType { name: "tuple variant" }) + } + + fn serialize_map(self, _len: Option) -> Result { + Ok(MapSerializer { + schema: self.schema, + document_id: self.document_id, + index: self.index, + current_key_name: None, + }) + } + + fn serialize_struct( + self, + _name: &'static str, + _len: usize + ) -> Result + { + Ok(StructSerializer { + schema: self.schema, + document_id: self.document_id, + index: self.index, + }) + } + + fn serialize_struct_variant( + self, + _name: &'static str, + _variant_index: u32, + _variant: &'static str, + _len: usize + ) -> Result + { + Err(SerializerError::UnserializableType { name: "struct variant" }) + } +} + +pub struct MapSerializer<'a> { + pub schema: &'a Schema, + pub document_id: DocumentId, + pub index: &'a RawIndex, + pub current_key_name: Option, +} + +impl<'a> ser::SerializeMap for MapSerializer<'a> { + type Ok = (); + type Error = SerializerError; + + fn serialize_key(&mut self, key: &T) -> Result<(), Self::Error> + where T: ser::Serialize, + { + let key = key.serialize(ExtractString)?; + self.current_key_name = Some(key); + Ok(()) + } + + fn serialize_value(&mut self, value: &T) -> Result<(), Self::Error> + where T: ser::Serialize, + { + let key = self.current_key_name.take().unwrap(); + self.serialize_entry(&key, value) + } + + fn serialize_entry( + &mut self, + key: &K, + value: &V, + ) -> Result<(), Self::Error> + where K: ser::Serialize, V: ser::Serialize, + { + let key = key.serialize(ExtractString)?; + + serialize_value( + self.schema, + self.document_id, + self.index, + &key, + value, + ) + } + + fn end(self) -> Result { + Ok(()) + } +} + +pub struct StructSerializer<'a> { + pub schema: &'a Schema, + pub document_id: DocumentId, + pub index: &'a RawIndex, +} + +impl<'a> ser::SerializeStruct for StructSerializer<'a> { + type Ok = (); + type Error = SerializerError; + + fn serialize_field( + &mut self, + key: &'static str, + value: &T, + ) -> Result<(), Self::Error> + where T: ser::Serialize, + { + serialize_value( + self.schema, + self.document_id, + self.index, + key, + value, + ) + } + + fn end(self) -> Result { + Ok(()) + } +} + +fn serialize_value( + schema: &Schema, + document_id: DocumentId, + index: &RawIndex, + key: &str, + value: &T, +) -> Result<(), SerializerError> +where T: ser::Serialize, +{ + if let Some(attr) = schema.attribute(key) { + let props = schema.props(attr); + + if props.is_stored() { + let value = rmp_serde::to_vec_named(value)?; + index.set_document_attribute(document_id, attr, value)?; + } + } + + Ok(()) +}