mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-23 19:57:30 +01:00
feat: Introduce the Serializer
Which will serialize documents fields as message pack in the kv-store
This commit is contained in:
parent
2a69170f14
commit
ea0ee070ef
@ -19,6 +19,7 @@ use sled::IVec;
|
||||
|
||||
use crate::{Schema, SchemaAttr, RankedMap};
|
||||
use crate::serde::Deserializer;
|
||||
use crate::indexer::Indexer;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Error {
|
||||
@ -240,7 +241,7 @@ impl RawIndex {
|
||||
id: DocumentId,
|
||||
attr: SchemaAttr,
|
||||
value: V,
|
||||
) -> Result<Option<IVec>, Error>
|
||||
) -> Result<Option<IVec>, sled::Error>
|
||||
where IVec: From<V>,
|
||||
{
|
||||
let key = document_key(id, attr);
|
||||
@ -251,7 +252,7 @@ impl RawIndex {
|
||||
&self,
|
||||
id: DocumentId,
|
||||
attr: SchemaAttr
|
||||
) -> Result<Option<IVec>, Error>
|
||||
) -> Result<Option<IVec>, sled::Error>
|
||||
{
|
||||
let key = document_key(id, attr);
|
||||
Ok(self.inner.get(key)?)
|
||||
@ -267,7 +268,7 @@ impl RawIndex {
|
||||
&self,
|
||||
id: DocumentId,
|
||||
attr: SchemaAttr
|
||||
) -> Result<Option<IVec>, Error>
|
||||
) -> Result<Option<IVec>, sled::Error>
|
||||
{
|
||||
let key = document_key(id, attr);
|
||||
Ok(self.inner.del(key)?)
|
||||
@ -358,10 +359,23 @@ impl Index {
|
||||
}
|
||||
}
|
||||
|
||||
pub struct DocumentsAddition(RawIndex);
|
||||
pub struct DocumentsAddition {
|
||||
inner: RawIndex,
|
||||
indexer: Indexer,
|
||||
}
|
||||
|
||||
impl DocumentsAddition {
|
||||
pub fn from_raw(inner: RawIndex) -> DocumentsAddition {
|
||||
DocumentsAddition { inner, indexer: Indexer::new() }
|
||||
}
|
||||
|
||||
pub fn update_document<D>(&mut self, document: D) -> Result<(), Error>
|
||||
where D: serde::Serialize,
|
||||
{
|
||||
unimplemented!()
|
||||
}
|
||||
|
||||
pub fn finalize(self) -> sled::Result<()> {
|
||||
unimplemented!()
|
||||
}
|
||||
}
|
||||
@ -380,7 +394,7 @@ impl DocumentsDeletion {
|
||||
self.documents.push(id);
|
||||
}
|
||||
|
||||
pub fn commit(mut self) -> Result<(), Error> {
|
||||
pub fn finalize(mut self) -> Result<(), Error> {
|
||||
self.documents.sort_unstable();
|
||||
self.documents.dedup();
|
||||
|
||||
|
@ -23,6 +23,13 @@ impl Indexer {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn with_word_limit(limit: usize) -> Indexer {
|
||||
Indexer {
|
||||
word_limit: limit,
|
||||
indexed: BTreeMap::new(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn index_text(&mut self, id: DocumentId, attr: SchemaAttr, text: &str) {
|
||||
for token in Tokenizer::new(text) {
|
||||
if token.word_index >= self.word_limit { break }
|
||||
|
146
meilidb-data/src/serde/extract_string.rs
Normal file
146
meilidb-data/src/serde/extract_string.rs
Normal file
@ -0,0 +1,146 @@
|
||||
use serde::Serialize;
|
||||
use serde::ser;
|
||||
|
||||
use super::SerializerError;
|
||||
|
||||
pub struct ExtractString;
|
||||
|
||||
impl ser::Serializer for ExtractString {
|
||||
type Ok = String;
|
||||
type Error = SerializerError;
|
||||
type SerializeSeq = ser::Impossible<Self::Ok, Self::Error>;
|
||||
type SerializeTuple = ser::Impossible<Self::Ok, Self::Error>;
|
||||
type SerializeTupleStruct = ser::Impossible<Self::Ok, Self::Error>;
|
||||
type SerializeTupleVariant = ser::Impossible<Self::Ok, Self::Error>;
|
||||
type SerializeMap = ser::Impossible<Self::Ok, Self::Error>;
|
||||
type SerializeStruct = ser::Impossible<Self::Ok, Self::Error>;
|
||||
type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>;
|
||||
|
||||
forward_to_unserializable_type! {
|
||||
bool => serialize_bool,
|
||||
char => serialize_char,
|
||||
|
||||
i8 => serialize_i8,
|
||||
i16 => serialize_i16,
|
||||
i32 => serialize_i32,
|
||||
i64 => serialize_i64,
|
||||
|
||||
u8 => serialize_u8,
|
||||
u16 => serialize_u16,
|
||||
u32 => serialize_u32,
|
||||
u64 => serialize_u64,
|
||||
|
||||
f32 => serialize_f32,
|
||||
f64 => serialize_f64,
|
||||
}
|
||||
|
||||
fn serialize_str(self, value: &str) -> Result<Self::Ok, Self::Error> {
|
||||
Ok(value.to_string())
|
||||
}
|
||||
|
||||
fn serialize_bytes(self, _v: &[u8]) -> Result<Self::Ok, Self::Error> {
|
||||
Err(SerializerError::UnserializableType { name: "&[u8]" })
|
||||
}
|
||||
|
||||
fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
|
||||
Err(SerializerError::UnserializableType { name: "Option" })
|
||||
}
|
||||
|
||||
fn serialize_some<T: ?Sized>(self, _value: &T) -> Result<Self::Ok, Self::Error>
|
||||
where T: Serialize,
|
||||
{
|
||||
Err(SerializerError::UnserializableType { name: "Option" })
|
||||
}
|
||||
|
||||
fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
|
||||
Err(SerializerError::UnserializableType { name: "()" })
|
||||
}
|
||||
|
||||
fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> {
|
||||
Err(SerializerError::UnserializableType { name: "unit struct" })
|
||||
}
|
||||
|
||||
fn serialize_unit_variant(
|
||||
self,
|
||||
_name: &'static str,
|
||||
_variant_index: u32,
|
||||
_variant: &'static str
|
||||
) -> Result<Self::Ok, Self::Error>
|
||||
{
|
||||
Err(SerializerError::UnserializableType { name: "unit variant" })
|
||||
}
|
||||
|
||||
fn serialize_newtype_struct<T: ?Sized>(
|
||||
self,
|
||||
_name: &'static str,
|
||||
value: &T
|
||||
) -> Result<Self::Ok, Self::Error>
|
||||
where T: Serialize,
|
||||
{
|
||||
value.serialize(self)
|
||||
}
|
||||
|
||||
fn serialize_newtype_variant<T: ?Sized>(
|
||||
self,
|
||||
_name: &'static str,
|
||||
_variant_index: u32,
|
||||
_variant: &'static str,
|
||||
_value: &T
|
||||
) -> Result<Self::Ok, Self::Error>
|
||||
where T: Serialize,
|
||||
{
|
||||
Err(SerializerError::UnserializableType { name: "newtype variant" })
|
||||
}
|
||||
|
||||
fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
|
||||
Err(SerializerError::UnserializableType { name: "sequence" })
|
||||
}
|
||||
|
||||
fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> {
|
||||
Err(SerializerError::UnserializableType { name: "tuple" })
|
||||
}
|
||||
|
||||
fn serialize_tuple_struct(
|
||||
self,
|
||||
_name: &'static str,
|
||||
_len: usize
|
||||
) -> Result<Self::SerializeTupleStruct, Self::Error>
|
||||
{
|
||||
Err(SerializerError::UnserializableType { name: "tuple struct" })
|
||||
}
|
||||
|
||||
fn serialize_tuple_variant(
|
||||
self,
|
||||
_name: &'static str,
|
||||
_variant_index: u32,
|
||||
_variant: &'static str,
|
||||
_len: usize
|
||||
) -> Result<Self::SerializeTupleVariant, Self::Error>
|
||||
{
|
||||
Err(SerializerError::UnserializableType { name: "tuple variant" })
|
||||
}
|
||||
|
||||
fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
|
||||
Err(SerializerError::UnserializableType { name: "map" })
|
||||
}
|
||||
|
||||
fn serialize_struct(
|
||||
self,
|
||||
_name: &'static str,
|
||||
_len: usize
|
||||
) -> Result<Self::SerializeStruct, Self::Error>
|
||||
{
|
||||
Err(SerializerError::UnserializableType { name: "struct" })
|
||||
}
|
||||
|
||||
fn serialize_struct_variant(
|
||||
self,
|
||||
_name: &'static str,
|
||||
_variant_index: u32,
|
||||
_variant: &'static str,
|
||||
_len: usize
|
||||
) -> Result<Self::SerializeStructVariant, Self::Error>
|
||||
{
|
||||
Err(SerializerError::UnserializableType { name: "struct variant" })
|
||||
}
|
||||
}
|
@ -1,3 +1,73 @@
|
||||
macro_rules! forward_to_unserializable_type {
|
||||
($($ty:ident => $se_method:ident,)*) => {
|
||||
$(
|
||||
fn $se_method(self, _v: $ty) -> Result<Self::Ok, Self::Error> {
|
||||
Err(SerializerError::UnserializableType { name: "$ty" })
|
||||
}
|
||||
)*
|
||||
}
|
||||
}
|
||||
|
||||
mod deserializer;
|
||||
mod serializer;
|
||||
mod extract_string;
|
||||
|
||||
pub use self::deserializer::Deserializer;
|
||||
pub use self::serializer::Serializer;
|
||||
pub use self::extract_string::ExtractString;
|
||||
|
||||
use std::{fmt, error::Error};
|
||||
use rmp_serde::encode::Error as RmpError;
|
||||
use serde::ser;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum SerializerError {
|
||||
DocumentIdNotFound,
|
||||
RmpError(RmpError),
|
||||
SledError(sled::Error),
|
||||
UnserializableType { name: &'static str },
|
||||
Custom(String),
|
||||
}
|
||||
|
||||
impl ser::Error for SerializerError {
|
||||
fn custom<T: fmt::Display>(msg: T) -> Self {
|
||||
SerializerError::Custom(msg.to_string())
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for SerializerError {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
match self {
|
||||
SerializerError::DocumentIdNotFound => {
|
||||
write!(f, "serialized document does not have an id according to the schema")
|
||||
}
|
||||
SerializerError::RmpError(e) => write!(f, "rmp serde related error: {}", e),
|
||||
SerializerError::SledError(e) => write!(f, "sled related error: {}", e),
|
||||
SerializerError::UnserializableType { name } => {
|
||||
write!(f, "Only struct and map types are considered valid documents and
|
||||
can be serialized, not {} types directly.", name)
|
||||
},
|
||||
SerializerError::Custom(s) => f.write_str(&s),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Error for SerializerError {}
|
||||
|
||||
impl From<String> for SerializerError {
|
||||
fn from(value: String) -> SerializerError {
|
||||
SerializerError::Custom(value)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<RmpError> for SerializerError {
|
||||
fn from(error: RmpError) -> SerializerError {
|
||||
SerializerError::RmpError(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<sled::Error> for SerializerError {
|
||||
fn from(error: sled::Error) -> SerializerError {
|
||||
SerializerError::SledError(error)
|
||||
}
|
||||
}
|
||||
|
266
meilidb-data/src/serde/serializer.rs
Normal file
266
meilidb-data/src/serde/serializer.rs
Normal file
@ -0,0 +1,266 @@
|
||||
use std::collections::{HashSet, HashMap};
|
||||
use std::fmt;
|
||||
use std::error::Error;
|
||||
|
||||
use meilidb_core::DocumentId;
|
||||
use serde::{de, ser};
|
||||
|
||||
use crate::schema::Schema;
|
||||
use crate::database::RawIndex;
|
||||
use super::{SerializerError, ExtractString};
|
||||
|
||||
pub struct Serializer<'a> {
|
||||
pub schema: &'a Schema,
|
||||
pub index: &'a RawIndex,
|
||||
pub document_id: DocumentId,
|
||||
}
|
||||
|
||||
impl<'a> ser::Serializer for Serializer<'a> {
|
||||
type Ok = ();
|
||||
type Error = SerializerError;
|
||||
type SerializeSeq = ser::Impossible<Self::Ok, Self::Error>;
|
||||
type SerializeTuple = ser::Impossible<Self::Ok, Self::Error>;
|
||||
type SerializeTupleStruct = ser::Impossible<Self::Ok, Self::Error>;
|
||||
type SerializeTupleVariant = ser::Impossible<Self::Ok, Self::Error>;
|
||||
type SerializeMap = MapSerializer<'a>;
|
||||
type SerializeStruct = StructSerializer<'a>;
|
||||
type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>;
|
||||
|
||||
forward_to_unserializable_type! {
|
||||
bool => serialize_bool,
|
||||
char => serialize_char,
|
||||
|
||||
i8 => serialize_i8,
|
||||
i16 => serialize_i16,
|
||||
i32 => serialize_i32,
|
||||
i64 => serialize_i64,
|
||||
|
||||
u8 => serialize_u8,
|
||||
u16 => serialize_u16,
|
||||
u32 => serialize_u32,
|
||||
u64 => serialize_u64,
|
||||
|
||||
f32 => serialize_f32,
|
||||
f64 => serialize_f64,
|
||||
}
|
||||
|
||||
fn serialize_str(self, _v: &str) -> Result<Self::Ok, Self::Error> {
|
||||
Err(SerializerError::UnserializableType { name: "str" })
|
||||
}
|
||||
|
||||
fn serialize_bytes(self, _v: &[u8]) -> Result<Self::Ok, Self::Error> {
|
||||
Err(SerializerError::UnserializableType { name: "&[u8]" })
|
||||
}
|
||||
|
||||
fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
|
||||
Err(SerializerError::UnserializableType { name: "Option" })
|
||||
}
|
||||
|
||||
fn serialize_some<T: ?Sized>(self, _value: &T) -> Result<Self::Ok, Self::Error>
|
||||
where T: ser::Serialize,
|
||||
{
|
||||
Err(SerializerError::UnserializableType { name: "Option" })
|
||||
}
|
||||
|
||||
fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
|
||||
Err(SerializerError::UnserializableType { name: "()" })
|
||||
}
|
||||
|
||||
fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> {
|
||||
Err(SerializerError::UnserializableType { name: "unit struct" })
|
||||
}
|
||||
|
||||
fn serialize_unit_variant(
|
||||
self,
|
||||
_name: &'static str,
|
||||
_variant_index: u32,
|
||||
_variant: &'static str
|
||||
) -> Result<Self::Ok, Self::Error>
|
||||
{
|
||||
Err(SerializerError::UnserializableType { name: "unit variant" })
|
||||
}
|
||||
|
||||
fn serialize_newtype_struct<T: ?Sized>(
|
||||
self,
|
||||
_name: &'static str,
|
||||
value: &T
|
||||
) -> Result<Self::Ok, Self::Error>
|
||||
where T: ser::Serialize,
|
||||
{
|
||||
value.serialize(self)
|
||||
}
|
||||
|
||||
fn serialize_newtype_variant<T: ?Sized>(
|
||||
self,
|
||||
_name: &'static str,
|
||||
_variant_index: u32,
|
||||
_variant: &'static str,
|
||||
_value: &T
|
||||
) -> Result<Self::Ok, Self::Error>
|
||||
where T: ser::Serialize,
|
||||
{
|
||||
Err(SerializerError::UnserializableType { name: "newtype variant" })
|
||||
}
|
||||
|
||||
fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
|
||||
Err(SerializerError::UnserializableType { name: "sequence" })
|
||||
}
|
||||
|
||||
fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> {
|
||||
Err(SerializerError::UnserializableType { name: "tuple" })
|
||||
}
|
||||
|
||||
fn serialize_tuple_struct(
|
||||
self,
|
||||
_name: &'static str,
|
||||
_len: usize
|
||||
) -> Result<Self::SerializeTupleStruct, Self::Error>
|
||||
{
|
||||
Err(SerializerError::UnserializableType { name: "tuple struct" })
|
||||
}
|
||||
|
||||
fn serialize_tuple_variant(
|
||||
self,
|
||||
_name: &'static str,
|
||||
_variant_index: u32,
|
||||
_variant: &'static str,
|
||||
_len: usize
|
||||
) -> Result<Self::SerializeTupleVariant, Self::Error>
|
||||
{
|
||||
Err(SerializerError::UnserializableType { name: "tuple variant" })
|
||||
}
|
||||
|
||||
fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
|
||||
Ok(MapSerializer {
|
||||
schema: self.schema,
|
||||
document_id: self.document_id,
|
||||
index: self.index,
|
||||
current_key_name: None,
|
||||
})
|
||||
}
|
||||
|
||||
fn serialize_struct(
|
||||
self,
|
||||
_name: &'static str,
|
||||
_len: usize
|
||||
) -> Result<Self::SerializeStruct, Self::Error>
|
||||
{
|
||||
Ok(StructSerializer {
|
||||
schema: self.schema,
|
||||
document_id: self.document_id,
|
||||
index: self.index,
|
||||
})
|
||||
}
|
||||
|
||||
fn serialize_struct_variant(
|
||||
self,
|
||||
_name: &'static str,
|
||||
_variant_index: u32,
|
||||
_variant: &'static str,
|
||||
_len: usize
|
||||
) -> Result<Self::SerializeStructVariant, Self::Error>
|
||||
{
|
||||
Err(SerializerError::UnserializableType { name: "struct variant" })
|
||||
}
|
||||
}
|
||||
|
||||
pub struct MapSerializer<'a> {
|
||||
pub schema: &'a Schema,
|
||||
pub document_id: DocumentId,
|
||||
pub index: &'a RawIndex,
|
||||
pub current_key_name: Option<String>,
|
||||
}
|
||||
|
||||
impl<'a> ser::SerializeMap for MapSerializer<'a> {
|
||||
type Ok = ();
|
||||
type Error = SerializerError;
|
||||
|
||||
fn serialize_key<T: ?Sized>(&mut self, key: &T) -> Result<(), Self::Error>
|
||||
where T: ser::Serialize,
|
||||
{
|
||||
let key = key.serialize(ExtractString)?;
|
||||
self.current_key_name = Some(key);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn serialize_value<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error>
|
||||
where T: ser::Serialize,
|
||||
{
|
||||
let key = self.current_key_name.take().unwrap();
|
||||
self.serialize_entry(&key, value)
|
||||
}
|
||||
|
||||
fn serialize_entry<K: ?Sized, V: ?Sized>(
|
||||
&mut self,
|
||||
key: &K,
|
||||
value: &V,
|
||||
) -> Result<(), Self::Error>
|
||||
where K: ser::Serialize, V: ser::Serialize,
|
||||
{
|
||||
let key = key.serialize(ExtractString)?;
|
||||
|
||||
serialize_value(
|
||||
self.schema,
|
||||
self.document_id,
|
||||
self.index,
|
||||
&key,
|
||||
value,
|
||||
)
|
||||
}
|
||||
|
||||
fn end(self) -> Result<Self::Ok, Self::Error> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
pub struct StructSerializer<'a> {
|
||||
pub schema: &'a Schema,
|
||||
pub document_id: DocumentId,
|
||||
pub index: &'a RawIndex,
|
||||
}
|
||||
|
||||
impl<'a> ser::SerializeStruct for StructSerializer<'a> {
|
||||
type Ok = ();
|
||||
type Error = SerializerError;
|
||||
|
||||
fn serialize_field<T: ?Sized>(
|
||||
&mut self,
|
||||
key: &'static str,
|
||||
value: &T,
|
||||
) -> Result<(), Self::Error>
|
||||
where T: ser::Serialize,
|
||||
{
|
||||
serialize_value(
|
||||
self.schema,
|
||||
self.document_id,
|
||||
self.index,
|
||||
key,
|
||||
value,
|
||||
)
|
||||
}
|
||||
|
||||
fn end(self) -> Result<Self::Ok, Self::Error> {
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
fn serialize_value<T: ?Sized>(
|
||||
schema: &Schema,
|
||||
document_id: DocumentId,
|
||||
index: &RawIndex,
|
||||
key: &str,
|
||||
value: &T,
|
||||
) -> Result<(), SerializerError>
|
||||
where T: ser::Serialize,
|
||||
{
|
||||
if let Some(attr) = schema.attribute(key) {
|
||||
let props = schema.props(attr);
|
||||
|
||||
if props.is_stored() {
|
||||
let value = rmp_serde::to_vec_named(value)?;
|
||||
index.set_document_attribute(document_id, attr, value)?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user