mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-23 13:24:27 +01:00
Merge pull request #171 from meilisearch/stringify-document-id
Transform identifiers fields into a string before hashing it
This commit is contained in:
commit
6f34dccc89
@ -15,7 +15,8 @@ meilidb-tokenizer = { path = "../meilidb-tokenizer", version = "0.1.0" }
|
|||||||
ordered-float = { version = "1.0.2", features = ["serde"] }
|
ordered-float = { version = "1.0.2", features = ["serde"] }
|
||||||
rocksdb = { version = "0.12.2", default-features = false }
|
rocksdb = { version = "0.12.2", default-features = false }
|
||||||
sdset = "0.3.2"
|
sdset = "0.3.2"
|
||||||
serde = { version = "1.0.91", features = ["derive"] }
|
serde = { version = "1.0.99", features = ["derive"] }
|
||||||
|
serde_json = "1.0.40"
|
||||||
siphasher = "0.3.0"
|
siphasher = "0.3.0"
|
||||||
zerocopy = "0.2.2"
|
zerocopy = "0.2.2"
|
||||||
|
|
||||||
@ -29,4 +30,3 @@ branch = "arc-byte-slice"
|
|||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
tempfile = "3.0.7"
|
tempfile = "3.0.7"
|
||||||
serde_json = "1.0.39"
|
|
||||||
|
@ -2,6 +2,7 @@ use std::hash::{Hash, Hasher};
|
|||||||
|
|
||||||
use meilidb_core::DocumentId;
|
use meilidb_core::DocumentId;
|
||||||
use serde::{ser, Serialize};
|
use serde::{ser, Serialize};
|
||||||
|
use serde_json::Value;
|
||||||
use siphasher::sip::SipHasher;
|
use siphasher::sip::SipHasher;
|
||||||
|
|
||||||
use super::{SerializerError, ConvertToString};
|
use super::{SerializerError, ConvertToString};
|
||||||
@ -16,7 +17,18 @@ where D: serde::Serialize,
|
|||||||
document.serialize(serializer)
|
document.serialize(serializer)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn compute_document_id<T: Hash>(t: &T) -> DocumentId {
|
pub fn value_to_string(value: &Value) -> Option<String> {
|
||||||
|
match value {
|
||||||
|
Value::Null => None,
|
||||||
|
Value::Bool(_) => None,
|
||||||
|
Value::Number(value) => Some(value.to_string()),
|
||||||
|
Value::String(value) => Some(value.to_string()),
|
||||||
|
Value::Array(_) => None,
|
||||||
|
Value::Object(_) => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn compute_document_id<H: Hash>(t: H) -> DocumentId {
|
||||||
let mut s = SipHasher::new();
|
let mut s = SipHasher::new();
|
||||||
t.hash(&mut s);
|
t.hash(&mut s);
|
||||||
let hash = s.finish();
|
let hash = s.finish();
|
||||||
@ -213,10 +225,11 @@ impl<'a> ser::SerializeMap for ExtractDocumentIdMapSerializer<'a> {
|
|||||||
let key = key.serialize(ConvertToString)?;
|
let key = key.serialize(ConvertToString)?;
|
||||||
|
|
||||||
if self.identifier == key {
|
if self.identifier == key {
|
||||||
// TODO is it possible to have multiple ids?
|
let value = serde_json::to_string(value).and_then(|s| serde_json::from_str(&s))?;
|
||||||
let id = bincode::serialize(value).unwrap();
|
match value_to_string(&value).map(|s| compute_document_id(&s)) {
|
||||||
let document_id = compute_document_id(&id);
|
Some(document_id) => self.document_id = Some(document_id),
|
||||||
self.document_id = Some(document_id);
|
None => return Err(SerializerError::InvalidDocumentIdType),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
@ -244,10 +257,11 @@ impl<'a> ser::SerializeStruct for ExtractDocumentIdStructSerializer<'a> {
|
|||||||
where T: Serialize,
|
where T: Serialize,
|
||||||
{
|
{
|
||||||
if self.identifier == key {
|
if self.identifier == key {
|
||||||
// TODO can it be possible to have multiple ids?
|
let value = serde_json::to_string(value).and_then(|s| serde_json::from_str(&s))?;
|
||||||
let id = bincode::serialize(value).unwrap();
|
match value_to_string(&value).map(compute_document_id) {
|
||||||
let document_id = compute_document_id(&id);
|
Some(document_id) => self.document_id = Some(document_id),
|
||||||
self.document_id = Some(document_id);
|
None => return Err(SerializerError::InvalidDocumentIdType),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
@ -28,6 +28,7 @@ use std::{fmt, error::Error};
|
|||||||
use meilidb_core::DocumentId;
|
use meilidb_core::DocumentId;
|
||||||
use meilidb_schema::SchemaAttr;
|
use meilidb_schema::SchemaAttr;
|
||||||
use rmp_serde::encode::Error as RmpError;
|
use rmp_serde::encode::Error as RmpError;
|
||||||
|
use serde_json::Error as SerdeJsonError;
|
||||||
use serde::ser;
|
use serde::ser;
|
||||||
|
|
||||||
use crate::number::ParseNumberError;
|
use crate::number::ParseNumberError;
|
||||||
@ -35,7 +36,9 @@ use crate::number::ParseNumberError;
|
|||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub enum SerializerError {
|
pub enum SerializerError {
|
||||||
DocumentIdNotFound,
|
DocumentIdNotFound,
|
||||||
|
InvalidDocumentIdType,
|
||||||
RmpError(RmpError),
|
RmpError(RmpError),
|
||||||
|
SerdeJsonError(SerdeJsonError),
|
||||||
RocksdbError(rocksdb::Error),
|
RocksdbError(rocksdb::Error),
|
||||||
ParseNumberError(ParseNumberError),
|
ParseNumberError(ParseNumberError),
|
||||||
UnserializableType { type_name: &'static str },
|
UnserializableType { type_name: &'static str },
|
||||||
@ -55,8 +58,12 @@ impl fmt::Display for SerializerError {
|
|||||||
match self {
|
match self {
|
||||||
SerializerError::DocumentIdNotFound => {
|
SerializerError::DocumentIdNotFound => {
|
||||||
write!(f, "serialized document does not have an id according to the schema")
|
write!(f, "serialized document does not have an id according to the schema")
|
||||||
}
|
},
|
||||||
|
SerializerError::InvalidDocumentIdType => {
|
||||||
|
write!(f, "document identifier can only be of type string or number")
|
||||||
|
},
|
||||||
SerializerError::RmpError(e) => write!(f, "rmp serde related error: {}", e),
|
SerializerError::RmpError(e) => write!(f, "rmp serde related error: {}", e),
|
||||||
|
SerializerError::SerdeJsonError(e) => write!(f, "serde json error: {}", e),
|
||||||
SerializerError::RocksdbError(e) => write!(f, "RocksDB related error: {}", e),
|
SerializerError::RocksdbError(e) => write!(f, "RocksDB related error: {}", e),
|
||||||
SerializerError::ParseNumberError(e) => {
|
SerializerError::ParseNumberError(e) => {
|
||||||
write!(f, "error while trying to parse a number: {}", e)
|
write!(f, "error while trying to parse a number: {}", e)
|
||||||
@ -89,6 +96,12 @@ impl From<RmpError> for SerializerError {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl From<SerdeJsonError> for SerializerError {
|
||||||
|
fn from(error: SerdeJsonError) -> SerializerError {
|
||||||
|
SerializerError::SerdeJsonError(error)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
impl From<rocksdb::Error> for SerializerError {
|
impl From<rocksdb::Error> for SerializerError {
|
||||||
fn from(error: rocksdb::Error) -> SerializerError {
|
fn from(error: rocksdb::Error) -> SerializerError {
|
||||||
SerializerError::RocksdbError(error)
|
SerializerError::RocksdbError(error)
|
||||||
|
Loading…
Reference in New Issue
Block a user