Merge pull request #171 from meilisearch/stringify-document-id

Transform identifiers fields into a string before hashing it
This commit is contained in:
Clément Renault 2019-08-29 13:42:46 +02:00 committed by GitHub
commit 6f34dccc89
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 39 additions and 12 deletions

View File

@ -15,7 +15,8 @@ meilidb-tokenizer = { path = "../meilidb-tokenizer", version = "0.1.0" }
ordered-float = { version = "1.0.2", features = ["serde"] }
rocksdb = { version = "0.12.2", default-features = false }
sdset = "0.3.2"
serde = { version = "1.0.91", features = ["derive"] }
serde = { version = "1.0.99", features = ["derive"] }
serde_json = "1.0.40"
siphasher = "0.3.0"
zerocopy = "0.2.2"
@ -29,4 +30,3 @@ branch = "arc-byte-slice"
[dev-dependencies]
tempfile = "3.0.7"
serde_json = "1.0.39"

View File

@ -2,6 +2,7 @@ use std::hash::{Hash, Hasher};
use meilidb_core::DocumentId;
use serde::{ser, Serialize};
use serde_json::Value;
use siphasher::sip::SipHasher;
use super::{SerializerError, ConvertToString};
@ -16,7 +17,18 @@ where D: serde::Serialize,
document.serialize(serializer)
}
pub fn compute_document_id<T: Hash>(t: &T) -> DocumentId {
pub fn value_to_string(value: &Value) -> Option<String> {
match value {
Value::Null => None,
Value::Bool(_) => None,
Value::Number(value) => Some(value.to_string()),
Value::String(value) => Some(value.to_string()),
Value::Array(_) => None,
Value::Object(_) => None,
}
}
pub fn compute_document_id<H: Hash>(t: H) -> DocumentId {
let mut s = SipHasher::new();
t.hash(&mut s);
let hash = s.finish();
@ -213,10 +225,11 @@ impl<'a> ser::SerializeMap for ExtractDocumentIdMapSerializer<'a> {
let key = key.serialize(ConvertToString)?;
if self.identifier == key {
// TODO is it possible to have multiple ids?
let id = bincode::serialize(value).unwrap();
let document_id = compute_document_id(&id);
self.document_id = Some(document_id);
let value = serde_json::to_string(value).and_then(|s| serde_json::from_str(&s))?;
match value_to_string(&value).map(|s| compute_document_id(&s)) {
Some(document_id) => self.document_id = Some(document_id),
None => return Err(SerializerError::InvalidDocumentIdType),
}
}
Ok(())
@ -244,10 +257,11 @@ impl<'a> ser::SerializeStruct for ExtractDocumentIdStructSerializer<'a> {
where T: Serialize,
{
if self.identifier == key {
// TODO can it be possible to have multiple ids?
let id = bincode::serialize(value).unwrap();
let document_id = compute_document_id(&id);
self.document_id = Some(document_id);
let value = serde_json::to_string(value).and_then(|s| serde_json::from_str(&s))?;
match value_to_string(&value).map(compute_document_id) {
Some(document_id) => self.document_id = Some(document_id),
None => return Err(SerializerError::InvalidDocumentIdType),
}
}
Ok(())

View File

@ -28,6 +28,7 @@ use std::{fmt, error::Error};
use meilidb_core::DocumentId;
use meilidb_schema::SchemaAttr;
use rmp_serde::encode::Error as RmpError;
use serde_json::Error as SerdeJsonError;
use serde::ser;
use crate::number::ParseNumberError;
@ -35,7 +36,9 @@ use crate::number::ParseNumberError;
#[derive(Debug)]
pub enum SerializerError {
DocumentIdNotFound,
InvalidDocumentIdType,
RmpError(RmpError),
SerdeJsonError(SerdeJsonError),
RocksdbError(rocksdb::Error),
ParseNumberError(ParseNumberError),
UnserializableType { type_name: &'static str },
@ -55,8 +58,12 @@ impl fmt::Display for SerializerError {
match self {
SerializerError::DocumentIdNotFound => {
write!(f, "serialized document does not have an id according to the schema")
}
},
SerializerError::InvalidDocumentIdType => {
write!(f, "document identifier can only be of type string or number")
},
SerializerError::RmpError(e) => write!(f, "rmp serde related error: {}", e),
SerializerError::SerdeJsonError(e) => write!(f, "serde json error: {}", e),
SerializerError::RocksdbError(e) => write!(f, "RocksDB related error: {}", e),
SerializerError::ParseNumberError(e) => {
write!(f, "error while trying to parse a number: {}", e)
@ -89,6 +96,12 @@ impl From<RmpError> for SerializerError {
}
}
impl From<SerdeJsonError> for SerializerError {
fn from(error: SerdeJsonError) -> SerializerError {
SerializerError::SerdeJsonError(error)
}
}
impl From<rocksdb::Error> for SerializerError {
fn from(error: rocksdb::Error) -> SerializerError {
SerializerError::RocksdbError(error)