feat: Introduce the Serializer

Which will serialize documents fields as message pack in the kv-store
This commit is contained in:
Clément Renault 2019-04-19 15:50:53 +02:00
parent 2a69170f14
commit ea0ee070ef
No known key found for this signature in database
GPG Key ID: 0151CDAB43460DAE
5 changed files with 508 additions and 5 deletions

View File

@ -19,6 +19,7 @@ use sled::IVec;
use crate::{Schema, SchemaAttr, RankedMap}; use crate::{Schema, SchemaAttr, RankedMap};
use crate::serde::Deserializer; use crate::serde::Deserializer;
use crate::indexer::Indexer;
#[derive(Debug)] #[derive(Debug)]
pub enum Error { pub enum Error {
@ -240,7 +241,7 @@ impl RawIndex {
id: DocumentId, id: DocumentId,
attr: SchemaAttr, attr: SchemaAttr,
value: V, value: V,
) -> Result<Option<IVec>, Error> ) -> Result<Option<IVec>, sled::Error>
where IVec: From<V>, where IVec: From<V>,
{ {
let key = document_key(id, attr); let key = document_key(id, attr);
@ -251,7 +252,7 @@ impl RawIndex {
&self, &self,
id: DocumentId, id: DocumentId,
attr: SchemaAttr attr: SchemaAttr
) -> Result<Option<IVec>, Error> ) -> Result<Option<IVec>, sled::Error>
{ {
let key = document_key(id, attr); let key = document_key(id, attr);
Ok(self.inner.get(key)?) Ok(self.inner.get(key)?)
@ -267,7 +268,7 @@ impl RawIndex {
&self, &self,
id: DocumentId, id: DocumentId,
attr: SchemaAttr attr: SchemaAttr
) -> Result<Option<IVec>, Error> ) -> Result<Option<IVec>, sled::Error>
{ {
let key = document_key(id, attr); let key = document_key(id, attr);
Ok(self.inner.del(key)?) Ok(self.inner.del(key)?)
@ -358,10 +359,23 @@ impl Index {
} }
} }
pub struct DocumentsAddition(RawIndex); pub struct DocumentsAddition {
inner: RawIndex,
indexer: Indexer,
}
impl DocumentsAddition { impl DocumentsAddition {
pub fn from_raw(inner: RawIndex) -> DocumentsAddition { pub fn from_raw(inner: RawIndex) -> DocumentsAddition {
DocumentsAddition { inner, indexer: Indexer::new() }
}
pub fn update_document<D>(&mut self, document: D) -> Result<(), Error>
where D: serde::Serialize,
{
unimplemented!()
}
pub fn finalize(self) -> sled::Result<()> {
unimplemented!() unimplemented!()
} }
} }
@ -380,7 +394,7 @@ impl DocumentsDeletion {
self.documents.push(id); self.documents.push(id);
} }
pub fn commit(mut self) -> Result<(), Error> { pub fn finalize(mut self) -> Result<(), Error> {
self.documents.sort_unstable(); self.documents.sort_unstable();
self.documents.dedup(); self.documents.dedup();

View File

@ -23,6 +23,13 @@ impl Indexer {
} }
} }
pub fn with_word_limit(limit: usize) -> Indexer {
Indexer {
word_limit: limit,
indexed: BTreeMap::new(),
}
}
pub fn index_text(&mut self, id: DocumentId, attr: SchemaAttr, text: &str) { pub fn index_text(&mut self, id: DocumentId, attr: SchemaAttr, text: &str) {
for token in Tokenizer::new(text) { for token in Tokenizer::new(text) {
if token.word_index >= self.word_limit { break } if token.word_index >= self.word_limit { break }

View File

@ -0,0 +1,146 @@
use serde::Serialize;
use serde::ser;
use super::SerializerError;
pub struct ExtractString;
impl ser::Serializer for ExtractString {
type Ok = String;
type Error = SerializerError;
type SerializeSeq = ser::Impossible<Self::Ok, Self::Error>;
type SerializeTuple = ser::Impossible<Self::Ok, Self::Error>;
type SerializeTupleStruct = ser::Impossible<Self::Ok, Self::Error>;
type SerializeTupleVariant = ser::Impossible<Self::Ok, Self::Error>;
type SerializeMap = ser::Impossible<Self::Ok, Self::Error>;
type SerializeStruct = ser::Impossible<Self::Ok, Self::Error>;
type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>;
forward_to_unserializable_type! {
bool => serialize_bool,
char => serialize_char,
i8 => serialize_i8,
i16 => serialize_i16,
i32 => serialize_i32,
i64 => serialize_i64,
u8 => serialize_u8,
u16 => serialize_u16,
u32 => serialize_u32,
u64 => serialize_u64,
f32 => serialize_f32,
f64 => serialize_f64,
}
fn serialize_str(self, value: &str) -> Result<Self::Ok, Self::Error> {
Ok(value.to_string())
}
fn serialize_bytes(self, _v: &[u8]) -> Result<Self::Ok, Self::Error> {
Err(SerializerError::UnserializableType { name: "&[u8]" })
}
fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
Err(SerializerError::UnserializableType { name: "Option" })
}
fn serialize_some<T: ?Sized>(self, _value: &T) -> Result<Self::Ok, Self::Error>
where T: Serialize,
{
Err(SerializerError::UnserializableType { name: "Option" })
}
fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
Err(SerializerError::UnserializableType { name: "()" })
}
fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> {
Err(SerializerError::UnserializableType { name: "unit struct" })
}
fn serialize_unit_variant(
self,
_name: &'static str,
_variant_index: u32,
_variant: &'static str
) -> Result<Self::Ok, Self::Error>
{
Err(SerializerError::UnserializableType { name: "unit variant" })
}
fn serialize_newtype_struct<T: ?Sized>(
self,
_name: &'static str,
value: &T
) -> Result<Self::Ok, Self::Error>
where T: Serialize,
{
value.serialize(self)
}
fn serialize_newtype_variant<T: ?Sized>(
self,
_name: &'static str,
_variant_index: u32,
_variant: &'static str,
_value: &T
) -> Result<Self::Ok, Self::Error>
where T: Serialize,
{
Err(SerializerError::UnserializableType { name: "newtype variant" })
}
fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
Err(SerializerError::UnserializableType { name: "sequence" })
}
fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> {
Err(SerializerError::UnserializableType { name: "tuple" })
}
fn serialize_tuple_struct(
self,
_name: &'static str,
_len: usize
) -> Result<Self::SerializeTupleStruct, Self::Error>
{
Err(SerializerError::UnserializableType { name: "tuple struct" })
}
fn serialize_tuple_variant(
self,
_name: &'static str,
_variant_index: u32,
_variant: &'static str,
_len: usize
) -> Result<Self::SerializeTupleVariant, Self::Error>
{
Err(SerializerError::UnserializableType { name: "tuple variant" })
}
fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
Err(SerializerError::UnserializableType { name: "map" })
}
fn serialize_struct(
self,
_name: &'static str,
_len: usize
) -> Result<Self::SerializeStruct, Self::Error>
{
Err(SerializerError::UnserializableType { name: "struct" })
}
fn serialize_struct_variant(
self,
_name: &'static str,
_variant_index: u32,
_variant: &'static str,
_len: usize
) -> Result<Self::SerializeStructVariant, Self::Error>
{
Err(SerializerError::UnserializableType { name: "struct variant" })
}
}

View File

@ -1,3 +1,73 @@
macro_rules! forward_to_unserializable_type {
($($ty:ident => $se_method:ident,)*) => {
$(
fn $se_method(self, _v: $ty) -> Result<Self::Ok, Self::Error> {
Err(SerializerError::UnserializableType { name: "$ty" })
}
)*
}
}
mod deserializer; mod deserializer;
mod serializer;
mod extract_string;
pub use self::deserializer::Deserializer; pub use self::deserializer::Deserializer;
pub use self::serializer::Serializer;
pub use self::extract_string::ExtractString;
use std::{fmt, error::Error};
use rmp_serde::encode::Error as RmpError;
use serde::ser;
#[derive(Debug)]
pub enum SerializerError {
DocumentIdNotFound,
RmpError(RmpError),
SledError(sled::Error),
UnserializableType { name: &'static str },
Custom(String),
}
impl ser::Error for SerializerError {
fn custom<T: fmt::Display>(msg: T) -> Self {
SerializerError::Custom(msg.to_string())
}
}
impl fmt::Display for SerializerError {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
SerializerError::DocumentIdNotFound => {
write!(f, "serialized document does not have an id according to the schema")
}
SerializerError::RmpError(e) => write!(f, "rmp serde related error: {}", e),
SerializerError::SledError(e) => write!(f, "sled related error: {}", e),
SerializerError::UnserializableType { name } => {
write!(f, "Only struct and map types are considered valid documents and
can be serialized, not {} types directly.", name)
},
SerializerError::Custom(s) => f.write_str(&s),
}
}
}
impl Error for SerializerError {}
impl From<String> for SerializerError {
fn from(value: String) -> SerializerError {
SerializerError::Custom(value)
}
}
impl From<RmpError> for SerializerError {
fn from(error: RmpError) -> SerializerError {
SerializerError::RmpError(error)
}
}
impl From<sled::Error> for SerializerError {
fn from(error: sled::Error) -> SerializerError {
SerializerError::SledError(error)
}
}

View File

@ -0,0 +1,266 @@
use std::collections::{HashSet, HashMap};
use std::fmt;
use std::error::Error;
use meilidb_core::DocumentId;
use serde::{de, ser};
use crate::schema::Schema;
use crate::database::RawIndex;
use super::{SerializerError, ExtractString};
pub struct Serializer<'a> {
pub schema: &'a Schema,
pub index: &'a RawIndex,
pub document_id: DocumentId,
}
impl<'a> ser::Serializer for Serializer<'a> {
type Ok = ();
type Error = SerializerError;
type SerializeSeq = ser::Impossible<Self::Ok, Self::Error>;
type SerializeTuple = ser::Impossible<Self::Ok, Self::Error>;
type SerializeTupleStruct = ser::Impossible<Self::Ok, Self::Error>;
type SerializeTupleVariant = ser::Impossible<Self::Ok, Self::Error>;
type SerializeMap = MapSerializer<'a>;
type SerializeStruct = StructSerializer<'a>;
type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>;
forward_to_unserializable_type! {
bool => serialize_bool,
char => serialize_char,
i8 => serialize_i8,
i16 => serialize_i16,
i32 => serialize_i32,
i64 => serialize_i64,
u8 => serialize_u8,
u16 => serialize_u16,
u32 => serialize_u32,
u64 => serialize_u64,
f32 => serialize_f32,
f64 => serialize_f64,
}
fn serialize_str(self, _v: &str) -> Result<Self::Ok, Self::Error> {
Err(SerializerError::UnserializableType { name: "str" })
}
fn serialize_bytes(self, _v: &[u8]) -> Result<Self::Ok, Self::Error> {
Err(SerializerError::UnserializableType { name: "&[u8]" })
}
fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
Err(SerializerError::UnserializableType { name: "Option" })
}
fn serialize_some<T: ?Sized>(self, _value: &T) -> Result<Self::Ok, Self::Error>
where T: ser::Serialize,
{
Err(SerializerError::UnserializableType { name: "Option" })
}
fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
Err(SerializerError::UnserializableType { name: "()" })
}
fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> {
Err(SerializerError::UnserializableType { name: "unit struct" })
}
fn serialize_unit_variant(
self,
_name: &'static str,
_variant_index: u32,
_variant: &'static str
) -> Result<Self::Ok, Self::Error>
{
Err(SerializerError::UnserializableType { name: "unit variant" })
}
fn serialize_newtype_struct<T: ?Sized>(
self,
_name: &'static str,
value: &T
) -> Result<Self::Ok, Self::Error>
where T: ser::Serialize,
{
value.serialize(self)
}
fn serialize_newtype_variant<T: ?Sized>(
self,
_name: &'static str,
_variant_index: u32,
_variant: &'static str,
_value: &T
) -> Result<Self::Ok, Self::Error>
where T: ser::Serialize,
{
Err(SerializerError::UnserializableType { name: "newtype variant" })
}
fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
Err(SerializerError::UnserializableType { name: "sequence" })
}
fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> {
Err(SerializerError::UnserializableType { name: "tuple" })
}
fn serialize_tuple_struct(
self,
_name: &'static str,
_len: usize
) -> Result<Self::SerializeTupleStruct, Self::Error>
{
Err(SerializerError::UnserializableType { name: "tuple struct" })
}
fn serialize_tuple_variant(
self,
_name: &'static str,
_variant_index: u32,
_variant: &'static str,
_len: usize
) -> Result<Self::SerializeTupleVariant, Self::Error>
{
Err(SerializerError::UnserializableType { name: "tuple variant" })
}
fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
Ok(MapSerializer {
schema: self.schema,
document_id: self.document_id,
index: self.index,
current_key_name: None,
})
}
fn serialize_struct(
self,
_name: &'static str,
_len: usize
) -> Result<Self::SerializeStruct, Self::Error>
{
Ok(StructSerializer {
schema: self.schema,
document_id: self.document_id,
index: self.index,
})
}
fn serialize_struct_variant(
self,
_name: &'static str,
_variant_index: u32,
_variant: &'static str,
_len: usize
) -> Result<Self::SerializeStructVariant, Self::Error>
{
Err(SerializerError::UnserializableType { name: "struct variant" })
}
}
pub struct MapSerializer<'a> {
pub schema: &'a Schema,
pub document_id: DocumentId,
pub index: &'a RawIndex,
pub current_key_name: Option<String>,
}
impl<'a> ser::SerializeMap for MapSerializer<'a> {
type Ok = ();
type Error = SerializerError;
fn serialize_key<T: ?Sized>(&mut self, key: &T) -> Result<(), Self::Error>
where T: ser::Serialize,
{
let key = key.serialize(ExtractString)?;
self.current_key_name = Some(key);
Ok(())
}
fn serialize_value<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error>
where T: ser::Serialize,
{
let key = self.current_key_name.take().unwrap();
self.serialize_entry(&key, value)
}
fn serialize_entry<K: ?Sized, V: ?Sized>(
&mut self,
key: &K,
value: &V,
) -> Result<(), Self::Error>
where K: ser::Serialize, V: ser::Serialize,
{
let key = key.serialize(ExtractString)?;
serialize_value(
self.schema,
self.document_id,
self.index,
&key,
value,
)
}
fn end(self) -> Result<Self::Ok, Self::Error> {
Ok(())
}
}
pub struct StructSerializer<'a> {
pub schema: &'a Schema,
pub document_id: DocumentId,
pub index: &'a RawIndex,
}
impl<'a> ser::SerializeStruct for StructSerializer<'a> {
type Ok = ();
type Error = SerializerError;
fn serialize_field<T: ?Sized>(
&mut self,
key: &'static str,
value: &T,
) -> Result<(), Self::Error>
where T: ser::Serialize,
{
serialize_value(
self.schema,
self.document_id,
self.index,
key,
value,
)
}
fn end(self) -> Result<Self::Ok, Self::Error> {
Ok(())
}
}
fn serialize_value<T: ?Sized>(
schema: &Schema,
document_id: DocumentId,
index: &RawIndex,
key: &str,
value: &T,
) -> Result<(), SerializerError>
where T: ser::Serialize,
{
if let Some(attr) = schema.attribute(key) {
let props = schema.props(attr);
if props.is_stored() {
let value = rmp_serde::to_vec_named(value)?;
index.set_document_attribute(document_id, attr, value)?;
}
}
Ok(())
}