From 4ca4a3f954b2a2df17a6aaf0ccf77fafccd65d7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Wed, 5 Jun 2024 15:06:57 -0400 Subject: [PATCH] Make the CboRoaringBitmapCodec support intersection on deserialization --- .../cbo_roaring_bitmap_codec.rs | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs b/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs index 1db518c7d..a04698019 100644 --- a/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs +++ b/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs @@ -1,5 +1,5 @@ use std::borrow::Cow; -use std::io; +use std::io::{self, Cursor}; use std::mem::size_of; use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt}; @@ -57,6 +57,24 @@ impl CboRoaringBitmapCodec { } } + pub fn intersection_with_serialized( + mut bytes: &[u8], + other: &RoaringBitmap, + ) -> io::Result { + // See above `deserialize_from` method for implementation details. + if bytes.len() <= THRESHOLD * size_of::() { + let mut bitmap = RoaringBitmap::new(); + while let Ok(integer) = bytes.read_u32::() { + if other.contains(integer) { + bitmap.insert(integer); + } + } + Ok(bitmap) + } else { + other.intersection_with_serialized_unchecked(Cursor::new(bytes)) + } + } + /// Merge serialized CboRoaringBitmaps in a buffer. /// /// if the merged values length is under the threshold, values are directly