mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-10 23:18:55 +01:00
Increase the CboRoaringBitmapCodec threshold
This commit is contained in:
parent
e41a3822a6
commit
6cc6addc2f
@ -5,13 +5,18 @@ use std::mem::size_of;
|
|||||||
use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt};
|
use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
|
/// This is the limit where using a byteorder became less size efficient
|
||||||
|
/// than using a direct roaring encoding, it is also the point where we are able
|
||||||
|
/// to determine the encoding used only by using the array of bytes length.
|
||||||
|
const THRESHOLD: usize = 7;
|
||||||
|
|
||||||
/// A conditionnal codec that either use the RoaringBitmap
|
/// A conditionnal codec that either use the RoaringBitmap
|
||||||
/// or a lighter ByteOrder en/decoding method.
|
/// or a lighter ByteOrder en/decoding method.
|
||||||
pub struct CboRoaringBitmapCodec;
|
pub struct CboRoaringBitmapCodec;
|
||||||
|
|
||||||
impl CboRoaringBitmapCodec {
|
impl CboRoaringBitmapCodec {
|
||||||
pub fn serialized_size(roaring: &RoaringBitmap) -> usize {
|
pub fn serialized_size(roaring: &RoaringBitmap) -> usize {
|
||||||
if roaring.len() <= 4 {
|
if roaring.len() <= THRESHOLD as u64 {
|
||||||
roaring.len() as usize * size_of::<u32>()
|
roaring.len() as usize * size_of::<u32>()
|
||||||
} else {
|
} else {
|
||||||
roaring.serialized_size()
|
roaring.serialized_size()
|
||||||
@ -19,8 +24,8 @@ impl CboRoaringBitmapCodec {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn serialize_into(roaring: &RoaringBitmap, vec: &mut Vec<u8>) -> io::Result<()> {
|
pub fn serialize_into(roaring: &RoaringBitmap, vec: &mut Vec<u8>) -> io::Result<()> {
|
||||||
if roaring.len() <= 4 {
|
if roaring.len() <= THRESHOLD as u64 {
|
||||||
// If the number of items (u32s) to encode is less than or equal to 4
|
// If the number of items (u32s) to encode is less than or equal to the threshold
|
||||||
// it means that it would weigh the same or less than the RoaringBitmap
|
// it means that it would weigh the same or less than the RoaringBitmap
|
||||||
// header, so we directly encode them using ByteOrder instead.
|
// header, so we directly encode them using ByteOrder instead.
|
||||||
for integer in roaring {
|
for integer in roaring {
|
||||||
@ -34,8 +39,8 @@ impl CboRoaringBitmapCodec {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn deserialize_from(mut bytes: &[u8]) -> io::Result<RoaringBitmap> {
|
pub fn deserialize_from(mut bytes: &[u8]) -> io::Result<RoaringBitmap> {
|
||||||
if bytes.len() <= 4 * size_of::<u32>() {
|
if bytes.len() <= THRESHOLD * size_of::<u32>() {
|
||||||
// If there is 4 or less than 4 integers that can fit into this array
|
// If there is threshold or less than threshold integers that can fit into this array
|
||||||
// of bytes it means that we used the ByteOrder codec serializer.
|
// of bytes it means that we used the ByteOrder codec serializer.
|
||||||
let mut bitmap = RoaringBitmap::new();
|
let mut bitmap = RoaringBitmap::new();
|
||||||
while let Ok(integer) = bytes.read_u32::<NativeEndian>() {
|
while let Ok(integer) = bytes.read_u32::<NativeEndian>() {
|
||||||
@ -44,7 +49,7 @@ impl CboRoaringBitmapCodec {
|
|||||||
Ok(bitmap)
|
Ok(bitmap)
|
||||||
} else {
|
} else {
|
||||||
// Otherwise, it means we used the classic RoaringBitmapCodec and
|
// Otherwise, it means we used the classic RoaringBitmapCodec and
|
||||||
// that the header takes 4 integers.
|
// that the header takes threshold integers.
|
||||||
RoaringBitmap::deserialize_from(bytes)
|
RoaringBitmap::deserialize_from(bytes)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -75,10 +80,29 @@ mod tests {
|
|||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn limit_four() {
|
fn verify_encoding_decoding() {
|
||||||
let input = RoaringBitmap::from_iter(vec![0, 1, 2, 3]);
|
let input = RoaringBitmap::from_iter(0..THRESHOLD as u32);
|
||||||
let bytes = CboRoaringBitmapCodec::bytes_encode(&input).unwrap();
|
let bytes = CboRoaringBitmapCodec::bytes_encode(&input).unwrap();
|
||||||
let output = CboRoaringBitmapCodec::bytes_decode(&bytes).unwrap();
|
let output = CboRoaringBitmapCodec::bytes_decode(&bytes).unwrap();
|
||||||
assert_eq!(input, output);
|
assert_eq!(input, output);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn verify_threshold() {
|
||||||
|
let input = RoaringBitmap::from_iter(0..THRESHOLD as u32);
|
||||||
|
|
||||||
|
// use roaring bitmap
|
||||||
|
let mut bytes = Vec::new();
|
||||||
|
input.serialize_into(&mut bytes).unwrap();
|
||||||
|
let roaring_size = bytes.len();
|
||||||
|
|
||||||
|
// use byteorder directly
|
||||||
|
let mut bytes = Vec::new();
|
||||||
|
for integer in input {
|
||||||
|
bytes.write_u32::<NativeEndian>(integer).unwrap();
|
||||||
|
}
|
||||||
|
let bo_size = bytes.len();
|
||||||
|
|
||||||
|
assert!(roaring_size > bo_size);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user