Introduce the DiscoverIds and DocumentsIds types

2025-07-04 20:37:15 +02:00 · 2020-05-18 18:56:21 +02:00 · 2020-05-18 18:56:21 +02:00 · e6a7521610
commit e6a7521610
parent 3e84f916b6
2 changed files with 79 additions and 3 deletions
--- a/meilisearch-core/src/store/documents_ids.rs
+++ b/meilisearch-core/src/store/documents_ids.rs
@ -0,0 +1,75 @@
+use std::borrow::Cow;
+
+use heed::{BytesDecode, BytesEncode};
+use sdset::Set;
+
+use crate::DocumentId;
+use super::cow_set::CowSet;
+
+pub struct DocumentsIds;
+
+impl BytesEncode<'_> for DocumentsIds {
+    type EItem = Set<DocumentId>;
+
+    fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
+        CowSet::bytes_encode(item)
+    }
+}
+
+impl<'a> BytesDecode<'a> for DocumentsIds {
+    type DItem = Cow<'a, Set<DocumentId>>;
+
+    fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
+        CowSet::bytes_decode(bytes)
+    }
+}
+
+pub struct DiscoverIds<'a> {
+    ids_iter: std::slice::Iter<'a, DocumentId>,
+    left_id: Option<u64>,
+    right_id: Option<u64>,
+    available_range: std::ops::Range<u64>,
+}
+
+impl DiscoverIds<'_> {
+    pub fn new(ids: &Set<DocumentId>) -> DiscoverIds {
+        let mut ids_iter = ids.iter();
+        let right_id = ids_iter.next().map(|id| id.0);
+        let available_range = 0..right_id.unwrap_or(u64::max_value());
+        DiscoverIds { ids_iter, left_id: None, right_id, available_range }
+    }
+}
+
+impl Iterator for DiscoverIds<'_> {
+    type Item = DocumentId;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        loop {
+            match self.available_range.next() {
+                // The available range gives us a new id, we return it.
+                Some(id) => return Some(DocumentId(id)),
+                // The available range is exhausted, we need to find the next one.
+                None if self.available_range.end == u64::max_value() => return None,
+                None => loop {
+                    self.left_id = self.right_id.take();
+                    self.right_id = self.ids_iter.next().map(|id| id.0);
+                    match (self.left_id, self.right_id) {
+                        // We found a gap in the used ids, we can yield all ids
+                        // until the end of the gap
+                        (Some(l), Some(r)) => if l.saturating_add(1) != r {
+                            self.available_range = (l + 1)..r;
+                            break;
+                        },
+                        // The last used id has been reached, we can use all ids
+                        // until u64 MAX
+                        (Some(l), None) => {
+                            self.available_range = l.saturating_add(1)..u64::max_value();
+                            break;
+                        },
+                        _ => (),
+                    }
+                },
+            }
+        }
+    }
+}
--- a/meilisearch-core/src/store/mod.rs
+++ b/meilisearch-core/src/store/mod.rs
@ -1,15 +1,16 @@
 mod cow_set;
 mod docs_words;
-mod prefix_documents_cache;
-mod prefix_postings_lists_cache;
+mod documents_ids;
 mod documents_fields;
 mod documents_fields_counts;
+mod facets;
 mod main;
 mod postings_lists;
+mod prefix_documents_cache;
+mod prefix_postings_lists_cache;
 mod synonyms;
 mod updates;
 mod updates_results;
-mod facets;

 pub use self::docs_words::DocsWords;
 pub use self::facets::Facets;