From db6210c7ee7b0dc9adf544ed7c53b41e235d1acf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Mon, 11 Feb 2019 16:58:44 +0100 Subject: [PATCH] feat: Introduce the Number type --- src/database/mod.rs | 10 +- src/database/number.rs | 98 +++++++++++++++++++ src/database/serde/mod.rs | 2 +- src/database/serde/serializer.rs | 8 +- .../{value_to_i64.rs => value_to_number.rs} | 39 +++++--- src/database/update.rs | 6 +- src/database/view.rs | 1 - 7 files changed, 135 insertions(+), 29 deletions(-) create mode 100644 src/database/number.rs rename src/database/serde/{value_to_i64.rs => value_to_number.rs} (85%) diff --git a/src/database/mod.rs b/src/database/mod.rs index 3e11b1b81..8097dd726 100644 --- a/src/database/mod.rs +++ b/src/database/mod.rs @@ -8,13 +8,13 @@ use std::path::{Path, PathBuf}; use std::sync::atomic::{AtomicBool, Ordering}; use std::ops::{Deref, DerefMut}; -use crossbeam::atomic::ArcCell; -use log::{info, error, warn}; -use rocksdb::rocksdb::{Writable, Snapshot}; use rocksdb::rocksdb_options::{DBOptions, ColumnFamilyOptions}; +use rocksdb::rocksdb::{Writable, Snapshot}; use rocksdb::{DB, MergeOperands}; +use crossbeam::atomic::ArcCell; use lockfree::map::Map; use hashbrown::HashMap; +use log::{info, error, warn}; pub use self::document_key::{DocumentKey, DocumentKeyAttr}; pub use self::view::{DatabaseView, DocumentIter}; @@ -22,8 +22,9 @@ pub use self::update::Update; pub use self::serde::SerializerError; pub use self::schema::Schema; pub use self::index::Index; +pub use self::number::{Number, ParseNumberError}; -pub type RankedMap = HashMap<(DocumentId, SchemaAttr), i64>; +pub type RankedMap = HashMap<(DocumentId, SchemaAttr), Number>; const DATA_INDEX: &[u8] = b"data-index"; const DATA_RANKED_MAP: &[u8] = b"data-ranked-map"; @@ -31,6 +32,7 @@ const DATA_SCHEMA: &[u8] = b"data-schema"; pub mod schema; pub(crate) mod index; +mod number; mod document_key; mod serde; mod update; diff --git a/src/database/number.rs b/src/database/number.rs new file mode 100644 index 000000000..b2c4c9a88 --- /dev/null +++ b/src/database/number.rs @@ -0,0 +1,98 @@ +use std::cmp::Ordering; +use std::str::FromStr; +use std::fmt; + +use serde_derive::{Serialize, Deserialize}; + +#[derive(Serialize, Deserialize)] +#[derive(Debug, Copy, Clone)] +pub enum Number { + Unsigned(u64), + Signed(i64), + Float(f64), +} + +impl FromStr for Number { + type Err = ParseNumberError; + + fn from_str(s: &str) -> Result { + if let Ok(unsigned) = u64::from_str(s) { + return Ok(Number::Unsigned(unsigned)) + } + + if let Ok(signed) = i64::from_str(s) { + return Ok(Number::Signed(signed)) + } + + if let Ok(float) = f64::from_str(s) { + if float == 0.0 || float.is_normal() { + return Ok(Number::Float(float)) + } + } + + Err(ParseNumberError) + } +} + +impl PartialOrd for Number { + fn partial_cmp(&self, other: &Number) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for Number { + fn cmp(&self, other: &Number) -> Ordering { + use Number::*; + match (self, other) { + (Unsigned(s), Unsigned(o)) => s.cmp(o), + (Unsigned(s), Signed(o)) => { + let s = i128::from(*s); + let o = i128::from(*o); + s.cmp(&o) + }, + (Unsigned(s), Float(o)) => { + let s = *s as f64; + s.partial_cmp(&o).unwrap_or(Ordering::Equal) + }, + + (Signed(s), Unsigned(o)) => { + let s = i128::from(*s); + let o = i128::from(*o); + s.cmp(&o) + }, + (Signed(s), Signed(o)) => s.cmp(o), + (Signed(s), Float(o)) => { + let s = *s as f64; + s.partial_cmp(o).unwrap_or(Ordering::Equal) + }, + + (Float(s), Unsigned(o)) => { + let o = *o as f64; + s.partial_cmp(&o).unwrap_or(Ordering::Equal) + }, + (Float(s), Signed(o)) => { + let o = *o as f64; + s.partial_cmp(&o).unwrap_or(Ordering::Equal) + }, + (Float(s), Float(o)) => { + s.partial_cmp(o).unwrap_or(Ordering::Equal) + }, + } + } +} + +impl PartialEq for Number { + fn eq(&self, other: &Number) -> bool { + self.cmp(other) == Ordering::Equal + } +} + +impl Eq for Number { } + +pub struct ParseNumberError; + +impl fmt::Display for ParseNumberError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.write_str("can not parse number") + } +} diff --git a/src/database/serde/mod.rs b/src/database/serde/mod.rs index 50a3c619e..493124f7e 100644 --- a/src/database/serde/mod.rs +++ b/src/database/serde/mod.rs @@ -17,7 +17,7 @@ macro_rules! forward_to_unserializable_type { pub mod find_id; pub mod key_to_string; -pub mod value_to_i64; +pub mod value_to_number; pub mod serializer; pub mod indexer_serializer; pub mod deserializer; diff --git a/src/database/serde/serializer.rs b/src/database/serde/serializer.rs index bc8b4d1ab..2f41bb82c 100644 --- a/src/database/serde/serializer.rs +++ b/src/database/serde/serializer.rs @@ -5,7 +5,7 @@ use serde::ser; use crate::database::serde::indexer_serializer::IndexerSerializer; use crate::database::serde::key_to_string::KeyToStringSerializer; -use crate::database::serde::value_to_i64::ValueToI64Serializer; +use crate::database::serde::value_to_number::ValueToNumberSerializer; use crate::database::update::DocumentUpdate; use crate::database::serde::SerializerError; use crate::tokenizer::TokenizerBuilder; @@ -231,8 +231,8 @@ where B: TokenizerBuilder value.serialize(serializer)?; } if props.is_ranked() { - let integer = value.serialize(ValueToI64Serializer)?; - self.update.register_ranked_attribute(attr, integer)?; + let number = value.serialize(ValueToNumberSerializer)?; + self.update.register_ranked_attribute(attr, number)?; } } @@ -282,7 +282,7 @@ where B: TokenizerBuilder value.serialize(serializer)?; } if props.is_ranked() { - let integer = value.serialize(ValueToI64Serializer)?; + let integer = value.serialize(ValueToNumberSerializer)?; self.update.register_ranked_attribute(attr, integer)?; } } diff --git a/src/database/serde/value_to_i64.rs b/src/database/serde/value_to_number.rs similarity index 85% rename from src/database/serde/value_to_i64.rs rename to src/database/serde/value_to_number.rs index 9c046d391..a70b92fc4 100644 --- a/src/database/serde/value_to_i64.rs +++ b/src/database/serde/value_to_number.rs @@ -1,12 +1,15 @@ +use std::str::FromStr; + use serde::Serialize; use serde::{ser, ser::Error}; use crate::database::serde::SerializerError; +use crate::database::Number; -pub struct ValueToI64Serializer; +pub struct ValueToNumberSerializer; -impl ser::Serializer for ValueToI64Serializer { - type Ok = i64; +impl ser::Serializer for ValueToNumberSerializer { + type Ok = Number; type Error = SerializerError; type SerializeSeq = ser::Impossible; type SerializeTuple = ser::Impossible; @@ -19,46 +22,50 @@ impl ser::Serializer for ValueToI64Serializer { forward_to_unserializable_type! { bool => serialize_bool, char => serialize_char, - - f32 => serialize_f32, - f64 => serialize_f64, } fn serialize_i8(self, value: i8) -> Result { - Ok(i64::from(value)) + Ok(Number::Signed(value as i64)) } fn serialize_i16(self, value: i16) -> Result { - Ok(i64::from(value)) + Ok(Number::Signed(value as i64)) } fn serialize_i32(self, value: i32) -> Result { - Ok(i64::from(value)) + Ok(Number::Signed(value as i64)) } fn serialize_i64(self, value: i64) -> Result { - Ok(i64::from(value)) + Ok(Number::Signed(value as i64)) } fn serialize_u8(self, value: u8) -> Result { - Ok(i64::from(value)) + Ok(Number::Unsigned(value as u64)) } fn serialize_u16(self, value: u16) -> Result { - Ok(i64::from(value)) + Ok(Number::Unsigned(value as u64)) } fn serialize_u32(self, value: u32) -> Result { - Ok(i64::from(value)) + Ok(Number::Unsigned(value as u64)) } fn serialize_u64(self, value: u64) -> Result { - // Ok(i64::from(value)) - unimplemented!() + Ok(Number::Unsigned(value as u64)) + } + + fn serialize_f32(self, value: f32) -> Result { + Ok(Number::Float(value as f64)) + } + + fn serialize_f64(self, value: f64) -> Result { + Ok(Number::Float(value)) } fn serialize_str(self, value: &str) -> Result { - i64::from_str_radix(value, 10).map_err(SerializerError::custom) + Number::from_str(value).map_err(SerializerError::custom) } fn serialize_bytes(self, _v: &[u8]) -> Result { diff --git a/src/database/update.rs b/src/database/update.rs index 5961b2ec8..e37576e6d 100644 --- a/src/database/update.rs +++ b/src/database/update.rs @@ -16,8 +16,8 @@ use crate::tokenizer::TokenizerBuilder; use crate::data::{DocIds, DocIndexes}; use crate::database::schema::Schema; use crate::database::index::Index; -use crate::database::RankedMap; use crate::database::{DATA_INDEX, DATA_RANKED_MAP}; +use crate::database::{RankedMap, Number}; use crate::{DocumentId, DocIndex}; pub type Token = Vec; // TODO could be replaced by a SmallVec @@ -205,7 +205,7 @@ impl<'a> DocumentUpdate<'a> { pub fn register_ranked_attribute( &mut self, attr: SchemaAttr, - integer: i64, + number: Number, ) -> Result<(), SerializerError> { use serde::ser::Error; @@ -216,7 +216,7 @@ impl<'a> DocumentUpdate<'a> { )); } - self.inner.documents_ranked_fields.insert((self.document_id, attr), integer); + self.inner.documents_ranked_fields.insert((self.document_id, attr), number); Ok(()) } diff --git a/src/database/view.rs b/src/database/view.rs index e757b6021..74e4ef002 100644 --- a/src/database/view.rs +++ b/src/database/view.rs @@ -11,7 +11,6 @@ use crate::database::{retrieve_data_schema, retrieve_data_index, retrieve_data_r use crate::database::serde::deserializer::Deserializer; use crate::database::{DocumentKey, DocumentKeyAttr}; use crate::rank::{QueryBuilder, FilterFunc}; -use crate::database::schema::SchemaAttr; use crate::database::schema::Schema; use crate::database::index::Index; use crate::database::RankedMap;