Add the raw document IDs to the postings lists

2025-07-04 04:17:10 +02:00 · 2020-01-08 15:30:43 +01:00 · 2020-01-08 15:30:43 +01:00 · 81c573ec92
commit 81c573ec92
parent 9420edadf4
7 changed files with 54 additions and 59 deletions
--- a/meilisearch-core/src/store/mod.rs
+++ b/meilisearch-core/src/store/mod.rs
@ -59,13 +59,13 @@ impl DocumentAttrKey {
    }
 }

-#[derive(Debug)]
+#[derive(Default, Debug)]
 pub struct Postings<'a> {
    pub docids: Cow<'a, Set<DocumentId>>,
    pub matches: Cow<'a, Set<DocIndex>>,
 }

-struct PostingsCodec;
+pub struct PostingsCodec;

 impl<'a> BytesEncode<'a> for PostingsCodec {
    type EItem = Postings<'a>;
@ -125,7 +125,6 @@ impl<'a> BytesDecode<'a> for PostingsCodec {
    fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
        let u64_size = mem::size_of::<u64>();
        let docid_size = mem::size_of::<DocumentId>();
-        let docindex_size = mem::size_of::<DocIndex>();

        let (len_bytes, bytes) = bytes.split_at(u64_size);
        let docids_len = len_bytes.try_into().ok().map(u64::from_be_bytes)? as usize;
--- a/meilisearch-core/src/store/postings_lists.rs
+++ b/meilisearch-core/src/store/postings_lists.rs
@ -1,14 +1,12 @@
 use std::borrow::Cow;
-use std::convert::TryInto;
-use std::{mem, ptr};

 use heed::Result as ZResult;
-use heed::types::{ByteSlice, CowSlice};
+use heed::types::ByteSlice;
 use sdset::{Set, SetBuf};
 use slice_group_by::GroupBy;

 use crate::database::MainT;
-use crate::{DocIndex, DocumentId};
+use crate::DocIndex;
 use crate::store::{Postings, PostingsCodec};

 #[derive(Copy, Clone)]
--- a/meilisearch-core/src/store/prefix_postings_lists_cache.rs
+++ b/meilisearch-core/src/store/prefix_postings_lists_cache.rs
@ -1,15 +1,17 @@
 use std::borrow::Cow;

 use heed::Result as ZResult;
-use heed::types::{OwnedType, CowSlice};
+use heed::types::OwnedType;
 use sdset::{Set, SetBuf};
+use slice_group_by::GroupBy;

-use crate::DocIndex;
 use crate::database::MainT;
+use crate::DocIndex;
+use crate::store::{PostingsCodec, Postings};

 #[derive(Copy, Clone)]
 pub struct PrefixPostingsListsCache {
-    pub(crate) prefix_postings_lists_cache: heed::Database<OwnedType<[u8; 4]>, CowSlice<DocIndex>>,
+    pub(crate) prefix_postings_lists_cache: heed::Database<OwnedType<[u8; 4]>, PostingsCodec>,
 }

 impl PrefixPostingsListsCache {
@ -17,10 +19,15 @@ impl PrefixPostingsListsCache {
        self,
        writer: &mut heed::RwTxn<MainT>,
        prefix: [u8; 4],
-        postings_list: &Set<DocIndex>,
+        matches: &Set<DocIndex>,
    ) -> ZResult<()>
    {
-        self.prefix_postings_lists_cache.put(writer, &prefix, postings_list)
+        let docids = matches.linear_group_by_key(|m| m.document_id).map(|g| g[0].document_id).collect();
+        let docids = Cow::Owned(SetBuf::new_unchecked(docids));
+        let matches = Cow::Borrowed(matches);
+        let postings = Postings { docids, matches };
+
+        self.prefix_postings_lists_cache.put(writer, &prefix, &postings)
    }

    pub fn clear(self, writer: &mut heed::RwTxn<MainT>) -> ZResult<()> {
@ -31,12 +38,8 @@ impl PrefixPostingsListsCache {
        self,
        reader: &'txn heed::RoTxn<MainT>,
        prefix: [u8; 4],
-    ) -> ZResult<Option<Cow<'txn, Set<DocIndex>>>>
+    ) -> ZResult<Option<Postings<'txn>>>
    {
-        match self.prefix_postings_lists_cache.get(reader, &prefix)? {
-            Some(Cow::Owned(vec)) => Ok(Some(Cow::Owned(SetBuf::new_unchecked(vec)))),
-            Some(Cow::Borrowed(slice)) => Ok(Some(Cow::Borrowed(Set::new_unchecked(slice)))),
-            None => Ok(None),
-        }
+        self.prefix_postings_lists_cache.get(reader, &prefix)
    }
 }