From 8d82e37ec03efa7679037f5829771a82a038ec1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 22 Oct 2020 17:41:22 +0200 Subject: [PATCH] Introduce the AvailableDocumentsIds iterator --- src/available_documents_ids.rs | 67 ++++++++++++++++++++++++++++++++++ src/lib.rs | 2 +- 2 files changed, 68 insertions(+), 1 deletion(-) create mode 100644 src/available_documents_ids.rs diff --git a/src/available_documents_ids.rs b/src/available_documents_ids.rs new file mode 100644 index 000000000..34ff743f0 --- /dev/null +++ b/src/available_documents_ids.rs @@ -0,0 +1,67 @@ +use std::iter::{Chain, FromIterator}; +use std::ops::RangeInclusive; +use roaring::bitmap::{RoaringBitmap, IntoIter}; + +pub struct AvailableDocumentsIds { + iter: Chain>, +} + +impl AvailableDocumentsIds { + pub fn from_documents_ids(docids: &RoaringBitmap) -> AvailableDocumentsIds { + match docids.max() { + Some(last_id) => { + let mut available = RoaringBitmap::from_iter(0..last_id); + available.difference_with(&docids); + + let iter = match last_id.checked_add(1) { + Some(id) => id..=u32::max_value(), + None => 1..=0, // empty range iterator + }; + + AvailableDocumentsIds { + iter: available.into_iter().chain(iter), + } + }, + None => { + let empty = RoaringBitmap::new().into_iter(); + AvailableDocumentsIds { + iter: empty.chain(0..=u32::max_value()), + } + }, + } + } +} + +impl Iterator for AvailableDocumentsIds { + type Item = u32; + + fn next(&mut self) -> Option { + self.iter.next() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn empty() { + let base = RoaringBitmap::new(); + let left = AvailableDocumentsIds::from_documents_ids(&base); + let right = 0..=u32::max_value(); + left.zip(right).take(500).for_each(|(l, r)| assert_eq!(l, r)); + } + + #[test] + fn scattered() { + let mut base = RoaringBitmap::new(); + base.insert(0); + base.insert(10); + base.insert(100); + base.insert(405); + + let left = AvailableDocumentsIds::from_documents_ids(&base); + let right = (0..=u32::max_value()).filter(|&n| n != 0 && n != 10 && n != 100 && n != 405); + left.zip(right).take(500).for_each(|(l, r)| assert_eq!(l, r)); + } +} diff --git a/src/lib.rs b/src/lib.rs index 2d03b4ddf..366bf5428 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,4 @@ +mod available_documents_ids; mod criterion; mod fields_ids_map; mod index; @@ -34,4 +35,3 @@ pub type BEU64 = heed::zerocopy::U64; pub type DocumentId = u32; pub type Attribute = u32; pub type Position = u32; -