Compute chunk size based on the input data size ant the number of indexing threads

This commit is contained in:
ManyTheFish 2024-01-22 16:23:12 +01:00 committed by Louis Dureuil
parent 023c2d755f
commit be1b054b05
No known key found for this signature in database
13 changed files with 991 additions and 795 deletions

View file

@ -52,7 +52,7 @@ impl<'t, 'i> WordPrefixIntegerDocids<'t, 'i> {
)]
pub fn execute(
self,
new_word_integer_docids: grenad::Reader<CursorClonableMmap>,
new_word_integer_docids: grenad::Merger<CursorClonableMmap, MergeFn>,
new_prefix_fst_words: &[String],
common_prefix_fst_words: &[&[String]],
del_prefix_fst_words: &HashSet<Vec<u8>>,
@ -69,14 +69,14 @@ impl<'t, 'i> WordPrefixIntegerDocids<'t, 'i> {
self.max_memory,
);
let mut new_word_integer_docids_iter = new_word_integer_docids.into_cursor()?;
if !common_prefix_fst_words.is_empty() {
// We fetch all the new common prefixes between the previous and new prefix fst.
let mut buffer = Vec::new();
let mut current_prefixes: Option<&&[String]> = None;
let mut prefixes_cache = HashMap::new();
while let Some((key, data)) = new_word_integer_docids_iter.move_on_next()? {
let mut new_word_integer_docids_iter =
new_word_integer_docids.into_stream_merger_iter()?;
while let Some((key, data)) = new_word_integer_docids_iter.next()? {
let (word, pos) =
StrBEU16Codec::bytes_decode(key).map_err(heed::Error::Decoding)?;