Remove the serde ExtractDocumentId struct

This commit is contained in:
Kerollmops 2020-05-18 13:19:19 +02:00
parent 2558ce9a00
commit 25b3c9a057
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
8 changed files with 47 additions and 635 deletions

View File

@ -18,7 +18,7 @@ mod query_words_mapper;
mod ranked_map;
mod raw_document;
mod reordered_attrs;
mod update;
pub mod update;
pub mod criterion;
pub mod facets;
pub mod raw_indexer;

View File

@ -1,279 +0,0 @@
use serde::ser;
use serde::Serialize;
use super::SerializerError;
pub struct ConvertToString;
impl ser::Serializer for ConvertToString {
type Ok = String;
type Error = SerializerError;
type SerializeSeq = SeqConvertToString;
type SerializeTuple = ser::Impossible<Self::Ok, Self::Error>;
type SerializeTupleStruct = ser::Impossible<Self::Ok, Self::Error>;
type SerializeTupleVariant = ser::Impossible<Self::Ok, Self::Error>;
type SerializeMap = MapConvertToString;
type SerializeStruct = StructConvertToString;
type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>;
fn serialize_bool(self, value: bool) -> Result<Self::Ok, Self::Error> {
Ok(value.to_string())
}
fn serialize_char(self, value: char) -> Result<Self::Ok, Self::Error> {
Ok(value.to_string())
}
fn serialize_i8(self, value: i8) -> Result<Self::Ok, Self::Error> {
Ok(value.to_string())
}
fn serialize_i16(self, value: i16) -> Result<Self::Ok, Self::Error> {
Ok(value.to_string())
}
fn serialize_i32(self, value: i32) -> Result<Self::Ok, Self::Error> {
Ok(value.to_string())
}
fn serialize_i64(self, value: i64) -> Result<Self::Ok, Self::Error> {
Ok(value.to_string())
}
fn serialize_u8(self, value: u8) -> Result<Self::Ok, Self::Error> {
Ok(value.to_string())
}
fn serialize_u16(self, value: u16) -> Result<Self::Ok, Self::Error> {
Ok(value.to_string())
}
fn serialize_u32(self, value: u32) -> Result<Self::Ok, Self::Error> {
Ok(value.to_string())
}
fn serialize_u64(self, value: u64) -> Result<Self::Ok, Self::Error> {
Ok(value.to_string())
}
fn serialize_f32(self, value: f32) -> Result<Self::Ok, Self::Error> {
Ok(value.to_string())
}
fn serialize_f64(self, value: f64) -> Result<Self::Ok, Self::Error> {
Ok(value.to_string())
}
fn serialize_str(self, value: &str) -> Result<Self::Ok, Self::Error> {
Ok(value.to_string())
}
fn serialize_bytes(self, _v: &[u8]) -> Result<Self::Ok, Self::Error> {
Err(SerializerError::UnserializableType { type_name: "&[u8]" })
}
fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
Err(SerializerError::UnserializableType {
type_name: "Option",
})
}
fn serialize_some<T: ?Sized>(self, _value: &T) -> Result<Self::Ok, Self::Error>
where
T: Serialize,
{
Err(SerializerError::UnserializableType {
type_name: "Option",
})
}
fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
Ok(String::new())
}
fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> {
Err(SerializerError::UnserializableType {
type_name: "unit struct",
})
}
fn serialize_unit_variant(
self,
_name: &'static str,
_variant_index: u32,
_variant: &'static str,
) -> Result<Self::Ok, Self::Error> {
Err(SerializerError::UnserializableType {
type_name: "unit variant",
})
}
fn serialize_newtype_struct<T: ?Sized>(
self,
_name: &'static str,
value: &T,
) -> Result<Self::Ok, Self::Error>
where
T: Serialize,
{
value.serialize(self)
}
fn serialize_newtype_variant<T: ?Sized>(
self,
_name: &'static str,
_variant_index: u32,
_variant: &'static str,
_value: &T,
) -> Result<Self::Ok, Self::Error>
where
T: Serialize,
{
Err(SerializerError::UnserializableType {
type_name: "newtype variant",
})
}
fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
Ok(SeqConvertToString {
text: String::new(),
})
}
fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> {
Err(SerializerError::UnserializableType { type_name: "tuple" })
}
fn serialize_tuple_struct(
self,
_name: &'static str,
_len: usize,
) -> Result<Self::SerializeTupleStruct, Self::Error> {
Err(SerializerError::UnserializableType {
type_name: "tuple struct",
})
}
fn serialize_tuple_variant(
self,
_name: &'static str,
_variant_index: u32,
_variant: &'static str,
_len: usize,
) -> Result<Self::SerializeTupleVariant, Self::Error> {
Err(SerializerError::UnserializableType {
type_name: "tuple variant",
})
}
fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
Ok(MapConvertToString {
text: String::new(),
})
}
fn serialize_struct(
self,
_name: &'static str,
_len: usize,
) -> Result<Self::SerializeStruct, Self::Error> {
Ok(StructConvertToString {
text: String::new(),
})
}
fn serialize_struct_variant(
self,
_name: &'static str,
_variant_index: u32,
_variant: &'static str,
_len: usize,
) -> Result<Self::SerializeStructVariant, Self::Error> {
Err(SerializerError::UnserializableType {
type_name: "struct variant",
})
}
}
pub struct MapConvertToString {
text: String,
}
impl ser::SerializeMap for MapConvertToString {
type Ok = String;
type Error = SerializerError;
fn serialize_key<T: ?Sized>(&mut self, key: &T) -> Result<(), Self::Error>
where
T: ser::Serialize,
{
let text = key.serialize(ConvertToString)?;
self.text.push_str(&text);
self.text.push_str(" ");
Ok(())
}
fn serialize_value<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error>
where
T: ser::Serialize,
{
let text = value.serialize(ConvertToString)?;
self.text.push_str(&text);
Ok(())
}
fn end(self) -> Result<Self::Ok, Self::Error> {
Ok(self.text)
}
}
pub struct StructConvertToString {
text: String,
}
impl ser::SerializeStruct for StructConvertToString {
type Ok = String;
type Error = SerializerError;
fn serialize_field<T: ?Sized>(
&mut self,
key: &'static str,
value: &T,
) -> Result<(), Self::Error>
where
T: ser::Serialize,
{
let value = value.serialize(ConvertToString)?;
self.text.push_str(key);
self.text.push_str(" ");
self.text.push_str(&value);
Ok(())
}
fn end(self) -> Result<Self::Ok, Self::Error> {
Ok(self.text)
}
}
pub struct SeqConvertToString {
text: String,
}
impl ser::SerializeSeq for SeqConvertToString {
type Ok = String;
type Error = SerializerError;
fn serialize_element<T: ?Sized>(&mut self, key: &T) -> Result<(), Self::Error>
where
T: ser::Serialize,
{
let text = key.serialize(ConvertToString)?;
self.text.push_str(&text);
self.text.push_str(" ");
Ok(())
}
fn end(self) -> Result<Self::Ok, Self::Error> {
Ok(self.text)
}
}

View File

@ -1,310 +0,0 @@
use std::hash::{Hash, Hasher};
use crate::DocumentId;
use serde::{ser, Serialize};
use serde_json::{Value, Number};
use siphasher::sip::SipHasher;
use super::{ConvertToString, SerializerError};
pub fn extract_document_id<D>(
primary_key: &str,
document: &D,
) -> Result<Option<DocumentId>, SerializerError>
where
D: serde::Serialize,
{
let serializer = ExtractDocumentId { primary_key };
document.serialize(serializer)
}
fn validate_number(value: &Number) -> Option<String> {
if value.is_f64() {
return None
}
Some(value.to_string())
}
fn validate_string(value: &str) -> Option<String> {
if value.chars().all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_') {
Some(value.to_string())
} else {
None
}
}
pub fn value_to_string(value: &Value) -> Option<String> {
match value {
Value::Null => None,
Value::Bool(_) => None,
Value::Number(value) => validate_number(value),
Value::String(value) => validate_string(value),
Value::Array(_) => None,
Value::Object(_) => None,
}
}
pub fn compute_document_id<H: Hash>(t: H) -> DocumentId {
let mut s = SipHasher::new();
t.hash(&mut s);
let hash = s.finish();
DocumentId(hash)
}
struct ExtractDocumentId<'a> {
primary_key: &'a str,
}
impl<'a> ser::Serializer for ExtractDocumentId<'a> {
type Ok = Option<DocumentId>;
type Error = SerializerError;
type SerializeSeq = ser::Impossible<Self::Ok, Self::Error>;
type SerializeTuple = ser::Impossible<Self::Ok, Self::Error>;
type SerializeTupleStruct = ser::Impossible<Self::Ok, Self::Error>;
type SerializeTupleVariant = ser::Impossible<Self::Ok, Self::Error>;
type SerializeMap = ExtractDocumentIdMapSerializer<'a>;
type SerializeStruct = ExtractDocumentIdStructSerializer<'a>;
type SerializeStructVariant = ser::Impossible<Self::Ok, Self::Error>;
forward_to_unserializable_type! {
bool => serialize_bool,
char => serialize_char,
i8 => serialize_i8,
i16 => serialize_i16,
i32 => serialize_i32,
i64 => serialize_i64,
u8 => serialize_u8,
u16 => serialize_u16,
u32 => serialize_u32,
u64 => serialize_u64,
f32 => serialize_f32,
f64 => serialize_f64,
}
fn serialize_str(self, _value: &str) -> Result<Self::Ok, Self::Error> {
Err(SerializerError::UnserializableType { type_name: "str" })
}
fn serialize_bytes(self, _value: &[u8]) -> Result<Self::Ok, Self::Error> {
Err(SerializerError::UnserializableType { type_name: "&[u8]" })
}
fn serialize_none(self) -> Result<Self::Ok, Self::Error> {
Err(SerializerError::UnserializableType {
type_name: "Option",
})
}
fn serialize_some<T: ?Sized>(self, _value: &T) -> Result<Self::Ok, Self::Error>
where
T: Serialize,
{
Err(SerializerError::UnserializableType {
type_name: "Option",
})
}
fn serialize_unit(self) -> Result<Self::Ok, Self::Error> {
Err(SerializerError::UnserializableType { type_name: "()" })
}
fn serialize_unit_struct(self, _name: &'static str) -> Result<Self::Ok, Self::Error> {
Err(SerializerError::UnserializableType {
type_name: "unit struct",
})
}
fn serialize_unit_variant(
self,
_name: &'static str,
_variant_index: u32,
_variant: &'static str,
) -> Result<Self::Ok, Self::Error> {
Err(SerializerError::UnserializableType {
type_name: "unit variant",
})
}
fn serialize_newtype_struct<T: ?Sized>(
self,
_name: &'static str,
value: &T,
) -> Result<Self::Ok, Self::Error>
where
T: Serialize,
{
value.serialize(self)
}
fn serialize_newtype_variant<T: ?Sized>(
self,
_name: &'static str,
_variant_index: u32,
_variant: &'static str,
_value: &T,
) -> Result<Self::Ok, Self::Error>
where
T: Serialize,
{
Err(SerializerError::UnserializableType {
type_name: "newtype variant",
})
}
fn serialize_seq(self, _len: Option<usize>) -> Result<Self::SerializeSeq, Self::Error> {
Err(SerializerError::UnserializableType {
type_name: "sequence",
})
}
fn serialize_tuple(self, _len: usize) -> Result<Self::SerializeTuple, Self::Error> {
Err(SerializerError::UnserializableType { type_name: "tuple" })
}
fn serialize_tuple_struct(
self,
_name: &'static str,
_len: usize,
) -> Result<Self::SerializeTupleStruct, Self::Error> {
Err(SerializerError::UnserializableType {
type_name: "tuple struct",
})
}
fn serialize_tuple_variant(
self,
_name: &'static str,
_variant_index: u32,
_variant: &'static str,
_len: usize,
) -> Result<Self::SerializeTupleVariant, Self::Error> {
Err(SerializerError::UnserializableType {
type_name: "tuple variant",
})
}
fn serialize_map(self, _len: Option<usize>) -> Result<Self::SerializeMap, Self::Error> {
let serializer = ExtractDocumentIdMapSerializer {
primary_key: self.primary_key,
document_id: None,
current_key_name: None,
};
Ok(serializer)
}
fn serialize_struct(
self,
_name: &'static str,
_len: usize,
) -> Result<Self::SerializeStruct, Self::Error> {
let serializer = ExtractDocumentIdStructSerializer {
primary_key: self.primary_key,
document_id: None,
};
Ok(serializer)
}
fn serialize_struct_variant(
self,
_name: &'static str,
_variant_index: u32,
_variant: &'static str,
_len: usize,
) -> Result<Self::SerializeStructVariant, Self::Error> {
Err(SerializerError::UnserializableType {
type_name: "struct variant",
})
}
}
pub struct ExtractDocumentIdMapSerializer<'a> {
primary_key: &'a str,
document_id: Option<DocumentId>,
current_key_name: Option<String>,
}
impl<'a> ser::SerializeMap for ExtractDocumentIdMapSerializer<'a> {
type Ok = Option<DocumentId>;
type Error = SerializerError;
fn serialize_key<T: ?Sized>(&mut self, key: &T) -> Result<(), Self::Error>
where
T: Serialize,
{
let key = key.serialize(ConvertToString)?;
self.current_key_name = Some(key);
Ok(())
}
fn serialize_value<T: ?Sized>(&mut self, value: &T) -> Result<(), Self::Error>
where
T: Serialize,
{
let key = self.current_key_name.take().unwrap();
self.serialize_entry(&key, value)
}
fn serialize_entry<K: ?Sized, V: ?Sized>(
&mut self,
key: &K,
value: &V,
) -> Result<(), Self::Error>
where
K: Serialize,
V: Serialize,
{
let key = key.serialize(ConvertToString)?;
if self.primary_key == key {
let value = serde_json::to_string(value).and_then(|s| serde_json::from_str(&s))?;
match value_to_string(&value).map(|s| compute_document_id(&s)) {
Some(document_id) => self.document_id = Some(document_id),
None => return Err(SerializerError::InvalidDocumentIdType),
}
}
Ok(())
}
fn end(self) -> Result<Self::Ok, Self::Error> {
Ok(self.document_id)
}
}
pub struct ExtractDocumentIdStructSerializer<'a> {
primary_key: &'a str,
document_id: Option<DocumentId>,
}
impl<'a> ser::SerializeStruct for ExtractDocumentIdStructSerializer<'a> {
type Ok = Option<DocumentId>;
type Error = SerializerError;
fn serialize_field<T: ?Sized>(
&mut self,
key: &'static str,
value: &T,
) -> Result<(), Self::Error>
where
T: Serialize,
{
if self.primary_key == key {
let value = serde_json::to_string(value).and_then(|s| serde_json::from_str(&s))?;
match value_to_string(&value).map(compute_document_id) {
Some(document_id) => self.document_id = Some(document_id),
None => return Err(SerializerError::InvalidDocumentIdType),
}
}
Ok(())
}
fn end(self) -> Result<Self::Ok, Self::Error> {
Ok(self.document_id)
}
}

View File

@ -1,20 +1,6 @@
macro_rules! forward_to_unserializable_type {
($($ty:ident => $se_method:ident,)*) => {
$(
fn $se_method(self, _v: $ty) -> Result<Self::Ok, Self::Error> {
Err(SerializerError::UnserializableType { type_name: "$ty" })
}
)*
}
}
mod convert_to_string;
mod deserializer;
mod extract_document_id;
pub use self::convert_to_string::ConvertToString;
pub use self::deserializer::{Deserializer, DeserializerError};
pub use self::extract_document_id::{compute_document_id, extract_document_id, value_to_string};
use std::{error::Error, fmt};
@ -27,7 +13,7 @@ use crate::ParseNumberError;
#[derive(Debug)]
pub enum SerializerError {
DocumentIdNotFound,
InvalidDocumentIdType,
InvalidDocumentIdFormat,
Zlmdb(heed::Error),
SerdeJson(SerdeJsonError),
ParseNumber(ParseNumberError),
@ -50,7 +36,7 @@ impl fmt::Display for SerializerError {
SerializerError::DocumentIdNotFound => {
f.write_str("serialized document does not have an id according to the schema")
}
SerializerError::InvalidDocumentIdType => {
SerializerError::InvalidDocumentIdFormat => {
f.write_str("a document primary key can be of type integer or string only composed of alphanumeric characters, hyphens (-) and underscores (_).")
}
SerializerError::Zlmdb(e) => write!(f, "heed related error: {}", e),

View File

@ -1,11 +1,13 @@
use std::collections::HashMap;
use std::fmt::Write as _;
use std::hash::{Hash, Hasher};
use fst::{set::OpBuilder, SetBuilder};
use indexmap::IndexMap;
use sdset::{duo::Union, SetOperation};
use serde::Deserialize;
use serde_json::Value;
use siphasher::sip::SipHasher;
use meilisearch_types::DocumentId;
use meilisearch_schema::IndexedPos;
@ -14,7 +16,7 @@ use crate::database::{MainT, UpdateT};
use crate::database::{UpdateEvent, UpdateEventsEmitter};
use crate::facets;
use crate::raw_indexer::RawIndexer;
use crate::serde::{extract_document_id, Deserializer};
use crate::serde::{Deserializer, SerializerError};
use crate::store;
use crate::update::{apply_documents_deletion, compute_short_prefixes, next_update_id, Update};
use crate::{Error, Number, MResult, RankedMap};
@ -148,7 +150,7 @@ fn index_value(
}
// TODO move this helper functions elsewhere
fn value_to_string(value: &Value) -> String {
pub fn value_to_string(value: &Value) -> String {
fn internal_value_to_string(string: &mut String, value: &Value) {
match value {
Value::Null => (),
@ -191,6 +193,39 @@ fn value_to_number(value: &Value) -> Option<Number> {
}
}
// TODO move this helper functions elsewhere
pub fn compute_document_id<H: Hash>(t: H) -> DocumentId {
let mut s = SipHasher::new();
t.hash(&mut s);
let hash = s.finish();
DocumentId(hash)
}
// TODO move this helper functions elsewhere
pub fn extract_document_id(primary_key: &str, document: &IndexMap<String, Value>) -> Result<DocumentId, SerializerError> {
fn validate_document_id(string: &str) -> bool {
string.chars().all(|x| x.is_ascii_alphanumeric() || x == '-' || x == '_')
}
match document.get(primary_key) {
Some(value) => {
let string = match value {
Value::Number(number) => number.to_string(),
Value::String(string) => string.clone(),
_ => return Err(SerializerError::InvalidDocumentIdFormat),
};
if validate_document_id(&string) {
Ok(compute_document_id(string))
} else {
Err(SerializerError::InvalidDocumentIdFormat)
}
}
None => Err(SerializerError::DocumentIdNotFound),
}
}
pub fn apply_addition<'a, 'b>(
writer: &'a mut heed::RwTxn<'b, MainT>,
index: &store::Index,
@ -208,10 +243,7 @@ pub fn apply_addition<'a, 'b>(
// 1. store documents ids for future deletion
for mut document in addition {
let document_id = match extract_document_id(&primary_key, &document)? {
Some(id) => id,
None => return Err(Error::MissingDocumentId),
};
let document_id = extract_document_id(&primary_key, &document)?;
if partial {
let mut deserializer = Deserializer {

View File

@ -1,13 +1,11 @@
use std::collections::{BTreeSet, HashMap, HashSet};
use fst::{SetBuilder, Streamer};
use meilisearch_schema::Schema;
use sdset::{duo::DifferenceByKey, SetBuf, SetOperation};
use crate::database::{MainT, UpdateT};
use crate::database::{UpdateEvent, UpdateEventsEmitter};
use crate::facets;
use crate::serde::extract_document_id;
use crate::store;
use crate::update::{next_update_id, compute_short_prefixes, Update};
use crate::{DocumentId, Error, MResult, RankedMap};
@ -37,21 +35,6 @@ impl DocumentsDeletion {
self.documents.push(document_id);
}
pub fn delete_document<D>(&mut self, schema: &Schema, document: D) -> MResult<()>
where
D: serde::Serialize,
{
let primary_key = schema.primary_key().ok_or(Error::MissingPrimaryKey)?;
let document_id = match extract_document_id(&primary_key, &document)? {
Some(id) => id,
None => return Err(Error::MissingDocumentId),
};
self.delete_document_by_id(document_id);
Ok(())
}
pub fn finalize(self, writer: &mut heed::RwTxn<UpdateT>) -> MResult<u64> {
let _ = self.updates_notifier.send(UpdateEvent::NewUpdate);
let update_id = push_documents_deletion(

View File

@ -8,6 +8,7 @@ pub use self::clear_all::{apply_clear_all, push_clear_all};
pub use self::customs_update::{apply_customs_update, push_customs_update};
pub use self::documents_addition::{
apply_documents_addition, apply_documents_partial_addition, DocumentsAddition,
value_to_string, compute_document_id, extract_document_id,
};
pub use self::documents_deletion::{apply_documents_deletion, DocumentsDeletion};
pub use self::settings_update::{apply_settings_update, push_settings_update};

View File

@ -42,7 +42,7 @@ async fn get_document(
.open_index(&path.index_uid)
.ok_or(ResponseError::index_not_found(&path.index_uid))?;
let document_id = meilisearch_core::serde::compute_document_id(&path.document_id);
let document_id = meilisearch_core::update::compute_document_id(&path.document_id);
let reader = data.db.main_read_txn()?;
@ -65,7 +65,7 @@ async fn delete_document(
.db
.open_index(&path.index_uid)
.ok_or(ResponseError::index_not_found(&path.index_uid))?;
let document_id = meilisearch_core::serde::compute_document_id(&path.document_id);
let document_id = meilisearch_core::update::compute_document_id(&path.document_id);
let mut update_writer = data.db.update_write_txn()?;
@ -237,10 +237,9 @@ async fn delete_documents(
let mut documents_deletion = index.documents_deletion();
for document_id in body.into_inner() {
if let Some(document_id) = meilisearch_core::serde::value_to_string(&document_id) {
documents_deletion
.delete_document_by_id(meilisearch_core::serde::compute_document_id(document_id));
}
let document_id_string = meilisearch_core::update::value_to_string(&document_id);
let document_id = meilisearch_core::update::compute_document_id(document_id_string);
documents_deletion.delete_document_by_id(document_id);
}
let update_id = documents_deletion.finalize(&mut writer)?;