From e6a7521610fcad26483c5fa20ec23c57982dbb22 Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Mon, 18 May 2020 18:56:21 +0200 Subject: [PATCH] Introduce the DiscoverIds and DocumentsIds types --- meilisearch-core/src/store/documents_ids.rs | 75 +++++++++++++++++++++ meilisearch-core/src/store/mod.rs | 7 +- 2 files changed, 79 insertions(+), 3 deletions(-) create mode 100644 meilisearch-core/src/store/documents_ids.rs diff --git a/meilisearch-core/src/store/documents_ids.rs b/meilisearch-core/src/store/documents_ids.rs new file mode 100644 index 000000000..d9d80d33c --- /dev/null +++ b/meilisearch-core/src/store/documents_ids.rs @@ -0,0 +1,75 @@ +use std::borrow::Cow; + +use heed::{BytesDecode, BytesEncode}; +use sdset::Set; + +use crate::DocumentId; +use super::cow_set::CowSet; + +pub struct DocumentsIds; + +impl BytesEncode<'_> for DocumentsIds { + type EItem = Set; + + fn bytes_encode(item: &Self::EItem) -> Option> { + CowSet::bytes_encode(item) + } +} + +impl<'a> BytesDecode<'a> for DocumentsIds { + type DItem = Cow<'a, Set>; + + fn bytes_decode(bytes: &'a [u8]) -> Option { + CowSet::bytes_decode(bytes) + } +} + +pub struct DiscoverIds<'a> { + ids_iter: std::slice::Iter<'a, DocumentId>, + left_id: Option, + right_id: Option, + available_range: std::ops::Range, +} + +impl DiscoverIds<'_> { + pub fn new(ids: &Set) -> DiscoverIds { + let mut ids_iter = ids.iter(); + let right_id = ids_iter.next().map(|id| id.0); + let available_range = 0..right_id.unwrap_or(u64::max_value()); + DiscoverIds { ids_iter, left_id: None, right_id, available_range } + } +} + +impl Iterator for DiscoverIds<'_> { + type Item = DocumentId; + + fn next(&mut self) -> Option { + loop { + match self.available_range.next() { + // The available range gives us a new id, we return it. + Some(id) => return Some(DocumentId(id)), + // The available range is exhausted, we need to find the next one. + None if self.available_range.end == u64::max_value() => return None, + None => loop { + self.left_id = self.right_id.take(); + self.right_id = self.ids_iter.next().map(|id| id.0); + match (self.left_id, self.right_id) { + // We found a gap in the used ids, we can yield all ids + // until the end of the gap + (Some(l), Some(r)) => if l.saturating_add(1) != r { + self.available_range = (l + 1)..r; + break; + }, + // The last used id has been reached, we can use all ids + // until u64 MAX + (Some(l), None) => { + self.available_range = l.saturating_add(1)..u64::max_value(); + break; + }, + _ => (), + } + }, + } + } + } +} diff --git a/meilisearch-core/src/store/mod.rs b/meilisearch-core/src/store/mod.rs index 6448a3441..c172d3204 100644 --- a/meilisearch-core/src/store/mod.rs +++ b/meilisearch-core/src/store/mod.rs @@ -1,15 +1,16 @@ mod cow_set; mod docs_words; -mod prefix_documents_cache; -mod prefix_postings_lists_cache; +mod documents_ids; mod documents_fields; mod documents_fields_counts; +mod facets; mod main; mod postings_lists; +mod prefix_documents_cache; +mod prefix_postings_lists_cache; mod synonyms; mod updates; mod updates_results; -mod facets; pub use self::docs_words::DocsWords; pub use self::facets::Facets;