Introduce the DiscoverIds and DocumentsIds types

2025-05-25 09:03:59 +02:00 · 2020-05-18 18:56:21 +02:00 · 2020-05-18 18:56:21 +02:00 · e6a7521610
commit e6a7521610
parent 3e84f916b6
2 changed files with 79 additions and 3 deletions
--- a/meilisearch-core/src/store/documents_ids.rs
+++ b/meilisearch-core/src/store/documents_ids.rs
@ -0,0 +1,75 @@
 use std::borrow::Cow;
 use heed::{BytesDecode, BytesEncode};
 use sdset::Set;
 use crate::DocumentId;
 use super::cow_set::CowSet;
 pub struct DocumentsIds;
 impl BytesEncode<'_> for DocumentsIds {
    type EItem = Set<DocumentId>;
    fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
        CowSet::bytes_encode(item)
    }
 }
 impl<'a> BytesDecode<'a> for DocumentsIds {
    type DItem = Cow<'a, Set<DocumentId>>;
    fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
        CowSet::bytes_decode(bytes)
    }
 }
 pub struct DiscoverIds<'a> {
    ids_iter: std::slice::Iter<'a, DocumentId>,
    left_id: Option<u64>,
    right_id: Option<u64>,
    available_range: std::ops::Range<u64>,
 }
 impl DiscoverIds<'_> {
    pub fn new(ids: &Set<DocumentId>) -> DiscoverIds {
        let mut ids_iter = ids.iter();
        let right_id = ids_iter.next().map(|id| id.0);
        let available_range = 0..right_id.unwrap_or(u64::max_value());
        DiscoverIds { ids_iter, left_id: None, right_id, available_range }
    }
 }
 impl Iterator for DiscoverIds<'_> {
    type Item = DocumentId;
    fn next(&mut self) -> Option<Self::Item> {
        loop {
            match self.available_range.next() {
                // The available range gives us a new id, we return it.
                Some(id) => return Some(DocumentId(id)),
                // The available range is exhausted, we need to find the next one.
                None if self.available_range.end == u64::max_value() => return None,
                None => loop {
                    self.left_id = self.right_id.take();
                    self.right_id = self.ids_iter.next().map(|id| id.0);
                    match (self.left_id, self.right_id) {
                        // We found a gap in the used ids, we can yield all ids
                        // until the end of the gap
                        (Some(l), Some(r)) => if l.saturating_add(1) != r {
                            self.available_range = (l + 1)..r;
                            break;
                        },
                        // The last used id has been reached, we can use all ids
                        // until u64 MAX
                        (Some(l), None) => {
                            self.available_range = l.saturating_add(1)..u64::max_value();
                            break;
                        },
                        _ => (),
                    }
                },
            }
        }
    }
 }
--- a/meilisearch-core/src/store/mod.rs
+++ b/meilisearch-core/src/store/mod.rs
@ -1,15 +1,16 @@
 mod cow_set;
 mod docs_words;
-mod prefix_documents_cache;
+mod documents_ids;
 mod prefix_postings_lists_cache;
 mod documents_fields;
 mod documents_fields_counts;
 mod facets;
 mod main;
 mod postings_lists;
 mod prefix_documents_cache;
 mod prefix_postings_lists_cache;
 mod synonyms;
 mod updates;
 mod updates_results;
 mod facets;
 pub use self::docs_words::DocsWords;
 pub use self::facets::Facets;