mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 23:04:26 +01:00
Merge pull request #171 from meilisearch/stringify-document-id
Transform identifiers fields into a string before hashing it
This commit is contained in:
commit
6f34dccc89
@ -15,7 +15,8 @@ meilidb-tokenizer = { path = "../meilidb-tokenizer", version = "0.1.0" }
|
||||
ordered-float = { version = "1.0.2", features = ["serde"] }
|
||||
rocksdb = { version = "0.12.2", default-features = false }
|
||||
sdset = "0.3.2"
|
||||
serde = { version = "1.0.91", features = ["derive"] }
|
||||
serde = { version = "1.0.99", features = ["derive"] }
|
||||
serde_json = "1.0.40"
|
||||
siphasher = "0.3.0"
|
||||
zerocopy = "0.2.2"
|
||||
|
||||
@ -29,4 +30,3 @@ branch = "arc-byte-slice"
|
||||
|
||||
[dev-dependencies]
|
||||
tempfile = "3.0.7"
|
||||
serde_json = "1.0.39"
|
||||
|
@ -2,6 +2,7 @@ use std::hash::{Hash, Hasher};
|
||||
|
||||
use meilidb_core::DocumentId;
|
||||
use serde::{ser, Serialize};
|
||||
use serde_json::Value;
|
||||
use siphasher::sip::SipHasher;
|
||||
|
||||
use super::{SerializerError, ConvertToString};
|
||||
@ -16,7 +17,18 @@ where D: serde::Serialize,
|
||||
document.serialize(serializer)
|
||||
}
|
||||
|
||||
pub fn compute_document_id<T: Hash>(t: &T) -> DocumentId {
|
||||
pub fn value_to_string(value: &Value) -> Option<String> {
|
||||
match value {
|
||||
Value::Null => None,
|
||||
Value::Bool(_) => None,
|
||||
Value::Number(value) => Some(value.to_string()),
|
||||
Value::String(value) => Some(value.to_string()),
|
||||
Value::Array(_) => None,
|
||||
Value::Object(_) => None,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn compute_document_id<H: Hash>(t: H) -> DocumentId {
|
||||
let mut s = SipHasher::new();
|
||||
t.hash(&mut s);
|
||||
let hash = s.finish();
|
||||
@ -213,10 +225,11 @@ impl<'a> ser::SerializeMap for ExtractDocumentIdMapSerializer<'a> {
|
||||
let key = key.serialize(ConvertToString)?;
|
||||
|
||||
if self.identifier == key {
|
||||
// TODO is it possible to have multiple ids?
|
||||
let id = bincode::serialize(value).unwrap();
|
||||
let document_id = compute_document_id(&id);
|
||||
self.document_id = Some(document_id);
|
||||
let value = serde_json::to_string(value).and_then(|s| serde_json::from_str(&s))?;
|
||||
match value_to_string(&value).map(|s| compute_document_id(&s)) {
|
||||
Some(document_id) => self.document_id = Some(document_id),
|
||||
None => return Err(SerializerError::InvalidDocumentIdType),
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@ -244,10 +257,11 @@ impl<'a> ser::SerializeStruct for ExtractDocumentIdStructSerializer<'a> {
|
||||
where T: Serialize,
|
||||
{
|
||||
if self.identifier == key {
|
||||
// TODO can it be possible to have multiple ids?
|
||||
let id = bincode::serialize(value).unwrap();
|
||||
let document_id = compute_document_id(&id);
|
||||
self.document_id = Some(document_id);
|
||||
let value = serde_json::to_string(value).and_then(|s| serde_json::from_str(&s))?;
|
||||
match value_to_string(&value).map(compute_document_id) {
|
||||
Some(document_id) => self.document_id = Some(document_id),
|
||||
None => return Err(SerializerError::InvalidDocumentIdType),
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
@ -28,6 +28,7 @@ use std::{fmt, error::Error};
|
||||
use meilidb_core::DocumentId;
|
||||
use meilidb_schema::SchemaAttr;
|
||||
use rmp_serde::encode::Error as RmpError;
|
||||
use serde_json::Error as SerdeJsonError;
|
||||
use serde::ser;
|
||||
|
||||
use crate::number::ParseNumberError;
|
||||
@ -35,7 +36,9 @@ use crate::number::ParseNumberError;
|
||||
#[derive(Debug)]
|
||||
pub enum SerializerError {
|
||||
DocumentIdNotFound,
|
||||
InvalidDocumentIdType,
|
||||
RmpError(RmpError),
|
||||
SerdeJsonError(SerdeJsonError),
|
||||
RocksdbError(rocksdb::Error),
|
||||
ParseNumberError(ParseNumberError),
|
||||
UnserializableType { type_name: &'static str },
|
||||
@ -55,8 +58,12 @@ impl fmt::Display for SerializerError {
|
||||
match self {
|
||||
SerializerError::DocumentIdNotFound => {
|
||||
write!(f, "serialized document does not have an id according to the schema")
|
||||
}
|
||||
},
|
||||
SerializerError::InvalidDocumentIdType => {
|
||||
write!(f, "document identifier can only be of type string or number")
|
||||
},
|
||||
SerializerError::RmpError(e) => write!(f, "rmp serde related error: {}", e),
|
||||
SerializerError::SerdeJsonError(e) => write!(f, "serde json error: {}", e),
|
||||
SerializerError::RocksdbError(e) => write!(f, "RocksDB related error: {}", e),
|
||||
SerializerError::ParseNumberError(e) => {
|
||||
write!(f, "error while trying to parse a number: {}", e)
|
||||
@ -89,6 +96,12 @@ impl From<RmpError> for SerializerError {
|
||||
}
|
||||
}
|
||||
|
||||
impl From<SerdeJsonError> for SerializerError {
|
||||
fn from(error: SerdeJsonError) -> SerializerError {
|
||||
SerializerError::SerdeJsonError(error)
|
||||
}
|
||||
}
|
||||
|
||||
impl From<rocksdb::Error> for SerializerError {
|
||||
fn from(error: rocksdb::Error) -> SerializerError {
|
||||
SerializerError::RocksdbError(error)
|
||||
|
Loading…
Reference in New Issue
Block a user