diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index 54d30f8fb..91d108c72 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -441,7 +441,11 @@ where let word_docids_iter = word_docids_builder.build().into_stream_merger_iter()?; // Run the word prefix docids update operation. - let mut builder = WordPrefixDocids::new(self.wtxn, self.index); + let mut builder = WordPrefixDocids::new( + self.wtxn, + self.index.word_docids.clone(), + self.index.word_prefix_docids.clone(), + ); builder.chunk_compression_type = self.indexer_config.chunk_compression_type; builder.chunk_compression_level = self.indexer_config.chunk_compression_level; builder.max_nb_chunks = self.indexer_config.max_nb_chunks; diff --git a/milli/src/update/word_prefix_docids.rs b/milli/src/update/word_prefix_docids.rs index 4114f8baf..b166812a5 100644 --- a/milli/src/update/word_prefix_docids.rs +++ b/milli/src/update/word_prefix_docids.rs @@ -1,16 +1,18 @@ use std::collections::{HashMap, HashSet}; use grenad::CompressionType; -use heed::types::ByteSlice; +use heed::types::{ByteSlice, Str}; +use heed::Database; use crate::update::index_documents::{ create_sorter, merge_roaring_bitmaps, sorter_into_lmdb_database, CursorClonableMmap, MergeFn, }; -use crate::{Index, Result}; +use crate::{Result, RoaringBitmapCodec}; pub struct WordPrefixDocids<'t, 'u, 'i> { wtxn: &'t mut heed::RwTxn<'i, 'u>, - index: &'i Index, + word_docids: Database, + word_prefix_docids: Database, pub(crate) chunk_compression_type: CompressionType, pub(crate) chunk_compression_level: Option, pub(crate) max_nb_chunks: Option, @@ -20,11 +22,13 @@ pub struct WordPrefixDocids<'t, 'u, 'i> { impl<'t, 'u, 'i> WordPrefixDocids<'t, 'u, 'i> { pub fn new( wtxn: &'t mut heed::RwTxn<'i, 'u>, - index: &'i Index, + word_docids: Database, + word_prefixes_docids: Database, ) -> WordPrefixDocids<'t, 'u, 'i> { WordPrefixDocids { wtxn, - index, + word_docids, + word_prefix_docids: word_prefixes_docids, chunk_compression_type: CompressionType::None, chunk_compression_level: None, max_nb_chunks: None, @@ -83,7 +87,7 @@ impl<'t, 'u, 'i> WordPrefixDocids<'t, 'u, 'i> { } // We fetch the docids associated to the newly added word prefix fst only. - let db = self.index.word_docids.remap_data_type::(); + let db = self.word_docids.remap_data_type::(); for prefix in new_prefix_fst_words { let prefix = std::str::from_utf8(prefix.as_bytes())?; for result in db.prefix_iter(self.wtxn, prefix)? { @@ -93,7 +97,7 @@ impl<'t, 'u, 'i> WordPrefixDocids<'t, 'u, 'i> { } // We remove all the entries that are no more required in this word prefix docids database. - let mut iter = self.index.word_prefix_docids.iter_mut(self.wtxn)?.lazily_decode_data(); + let mut iter = self.word_prefix_docids.iter_mut(self.wtxn)?.lazily_decode_data(); while let Some((prefix, _)) = iter.next().transpose()? { if del_prefix_fst_words.contains(prefix.as_bytes()) { unsafe { iter.del_current()? }; @@ -105,7 +109,7 @@ impl<'t, 'u, 'i> WordPrefixDocids<'t, 'u, 'i> { // We finally write the word prefix docids into the LMDB database. sorter_into_lmdb_database( self.wtxn, - *self.index.word_prefix_docids.as_polymorph(), + *self.word_prefix_docids.as_polymorph(), prefix_docids_sorter, merge_roaring_bitmaps, )?;