From 8ae988895943a52b1352811204dbbc562cdec31c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Fri, 13 Nov 2020 14:04:24 +0100 Subject: [PATCH] Store the field id instead of the field name in the facets database --- src/heed_codec/facet/facet_value_f64_codec.rs | 24 ++++++++-------- src/heed_codec/facet/facet_value_i64_codec.rs | 22 +++++++-------- .../facet/facet_value_string_codec.rs | 18 ++++++------ src/heed_codec/mod.rs | 4 +-- src/heed_codec/str_bytes_codec.rs | 28 ------------------- 5 files changed, 33 insertions(+), 63 deletions(-) delete mode 100644 src/heed_codec/str_bytes_codec.rs diff --git a/src/heed_codec/facet/facet_value_f64_codec.rs b/src/heed_codec/facet/facet_value_f64_codec.rs index fdacb1f08..228514de5 100644 --- a/src/heed_codec/facet/facet_value_f64_codec.rs +++ b/src/heed_codec/facet/facet_value_f64_codec.rs @@ -1,26 +1,24 @@ use std::borrow::Cow; use std::convert::TryInto; -use std::str; -use crate::heed_codec::StrBytesCodec; use crate::facet::value_encoding::f64_into_bytes; pub struct FacetValueF64Codec; impl<'a> heed::BytesDecode<'a> for FacetValueF64Codec { - type DItem = (&'a str, f64); + type DItem = (u8, f64); fn bytes_decode(bytes: &'a [u8]) -> Option { - let (name, buffer) = StrBytesCodec::bytes_decode(bytes)?; + let (field_id, buffer) = bytes.split_first()?; let value = buffer[8..].try_into().ok().map(f64::from_be_bytes)?; - Some((name, value)) + Some((*field_id, value)) } } -impl<'a> heed::BytesEncode<'a> for FacetValueF64Codec { - type EItem = (&'a str, f64); +impl heed::BytesEncode<'_> for FacetValueF64Codec { + type EItem = (u8, f64); - fn bytes_encode((name, value): &Self::EItem) -> Option> { + fn bytes_encode((field_id, value): &Self::EItem) -> Option> { let mut buffer = [0u8; 16]; // Write the globally ordered float. @@ -31,8 +29,10 @@ impl<'a> heed::BytesEncode<'a> for FacetValueF64Codec { let bytes = value.to_be_bytes(); buffer[8..].copy_from_slice(&bytes[..]); - let tuple = (*name, &buffer[..]); - StrBytesCodec::bytes_encode(&tuple).map(Cow::into_owned).map(Cow::Owned) + let mut bytes = Vec::with_capacity(buffer.len() + 1); + bytes.push(*field_id); + bytes.extend_from_slice(&buffer[..]); + Some(Cow::Owned(bytes)) } } @@ -43,8 +43,8 @@ mod tests { #[test] fn globally_ordered_f64() { - let bytes = FacetValueF64Codec::bytes_encode(&("hello", -32.0)).unwrap(); + let bytes = FacetValueF64Codec::bytes_encode(&(3, -32.0)).unwrap(); let (name, value) = FacetValueF64Codec::bytes_decode(&bytes).unwrap(); - assert_eq!((name, value), ("hello", -32.0)); + assert_eq!((name, value), (3, -32.0)); } } diff --git a/src/heed_codec/facet/facet_value_i64_codec.rs b/src/heed_codec/facet/facet_value_i64_codec.rs index e3c333883..f99b8a3ea 100644 --- a/src/heed_codec/facet/facet_value_i64_codec.rs +++ b/src/heed_codec/facet/facet_value_i64_codec.rs @@ -1,28 +1,28 @@ use std::borrow::Cow; use std::convert::TryInto; -use std::str; -use crate::heed_codec::StrBytesCodec; use crate::facet::value_encoding::{i64_from_bytes, i64_into_bytes}; pub struct FacetValueI64Codec; impl<'a> heed::BytesDecode<'a> for FacetValueI64Codec { - type DItem = (&'a str, i64); + type DItem = (u8, i64); fn bytes_decode(bytes: &'a [u8]) -> Option { - let (name, bytes) = StrBytesCodec::bytes_decode(bytes)?; - let value = bytes.try_into().map(i64_from_bytes).ok()?; - Some((name, value)) + let (field_id, buffer) = bytes.split_first()?; + let value = buffer.try_into().map(i64_from_bytes).ok()?; + Some((*field_id, value)) } } -impl<'a> heed::BytesEncode<'a> for FacetValueI64Codec { - type EItem = (&'a str, i64); +impl heed::BytesEncode<'_> for FacetValueI64Codec { + type EItem = (u8, i64); - fn bytes_encode((name, value): &Self::EItem) -> Option> { + fn bytes_encode((field_id, value): &Self::EItem) -> Option> { let value = i64_into_bytes(*value); - let tuple = (*name, &value[..]); - StrBytesCodec::bytes_encode(&tuple).map(Cow::into_owned).map(Cow::Owned) + let mut bytes = Vec::with_capacity(value.len() + 1); + bytes.push(*field_id); + bytes.extend_from_slice(&value[..]); + Some(Cow::Owned(bytes)) } } diff --git a/src/heed_codec/facet/facet_value_string_codec.rs b/src/heed_codec/facet/facet_value_string_codec.rs index 8b046192a..faa8b407b 100644 --- a/src/heed_codec/facet/facet_value_string_codec.rs +++ b/src/heed_codec/facet/facet_value_string_codec.rs @@ -1,25 +1,25 @@ use std::borrow::Cow; use std::str; -use crate::heed_codec::StrBytesCodec; - pub struct FacetValueStringCodec; impl<'a> heed::BytesDecode<'a> for FacetValueStringCodec { - type DItem = (&'a str, &'a str); + type DItem = (u8, &'a str); fn bytes_decode(bytes: &'a [u8]) -> Option { - let (name, bytes) = StrBytesCodec::bytes_decode(bytes)?; + let (field_id, bytes) = bytes.split_first()?; let value = str::from_utf8(bytes).ok()?; - Some((name, value)) + Some((*field_id, value)) } } impl<'a> heed::BytesEncode<'a> for FacetValueStringCodec { - type EItem = (&'a str, &'a str); + type EItem = (u8, &'a str); - fn bytes_encode((name, value): &Self::EItem) -> Option> { - let tuple = (*name, value.as_bytes()); - StrBytesCodec::bytes_encode(&tuple).map(Cow::into_owned).map(Cow::Owned) + fn bytes_encode((field_id, value): &Self::EItem) -> Option> { + let mut bytes = Vec::with_capacity(value.len() + 1); + bytes.push(*field_id); + bytes.extend_from_slice(value.as_bytes()); + Some(Cow::Owned(bytes)) } } diff --git a/src/heed_codec/mod.rs b/src/heed_codec/mod.rs index 260e79c4b..e7b8cf256 100644 --- a/src/heed_codec/mod.rs +++ b/src/heed_codec/mod.rs @@ -1,16 +1,14 @@ mod beu32_str_codec; mod bo_roaring_bitmap_codec; mod cbo_roaring_bitmap_codec; -mod facet; mod obkv_codec; mod roaring_bitmap_codec; -mod str_bytes_codec; mod str_str_u8_codec; +pub mod facet; pub use self::beu32_str_codec::BEU32StrCodec; pub use self::bo_roaring_bitmap_codec::BoRoaringBitmapCodec; pub use self::cbo_roaring_bitmap_codec::CboRoaringBitmapCodec; pub use self::obkv_codec::ObkvCodec; pub use self::roaring_bitmap_codec::RoaringBitmapCodec; -pub use self::str_bytes_codec::StrBytesCodec; pub use self::str_str_u8_codec::StrStrU8Codec; diff --git a/src/heed_codec/str_bytes_codec.rs b/src/heed_codec/str_bytes_codec.rs deleted file mode 100644 index 1a864c1fb..000000000 --- a/src/heed_codec/str_bytes_codec.rs +++ /dev/null @@ -1,28 +0,0 @@ -use std::borrow::Cow; -use std::str; - -pub struct StrBytesCodec; - -impl<'a> heed::BytesDecode<'a> for StrBytesCodec { - type DItem = (&'a str, &'a [u8]); - - fn bytes_decode(bytes: &'a [u8]) -> Option { - let s1_end = bytes.iter().position(|b| *b == 0)?; - let (s1_bytes, s2_bytes) = bytes.split_at(s1_end); - let s1 = str::from_utf8(s1_bytes).ok()?; - let s2 = &s2_bytes[1..]; - Some((s1, s2)) - } -} - -impl<'a> heed::BytesEncode<'a> for StrBytesCodec { - type EItem = (&'a str, &'a [u8]); - - fn bytes_encode((s1, s2): &Self::EItem) -> Option> { - let mut bytes = Vec::with_capacity(s1.len() + s2.len() + 1); - bytes.extend_from_slice(s1.as_bytes()); - bytes.push(0); - bytes.extend_from_slice(s2); - Some(Cow::Owned(bytes)) - } -}