From 739c52a3cdc420f929e45ce6189f18d624dc904f Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Wed, 4 Dec 2024 16:16:48 +0100 Subject: [PATCH] Replace HashSets by BTreeSets for the prefixes --- .../milli/src/update/new/word_fst_builder.rs | 12 +++---- .../src/update/new/words_prefix_docids.rs | 36 +++++++++---------- 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/crates/milli/src/update/new/word_fst_builder.rs b/crates/milli/src/update/new/word_fst_builder.rs index 6bc72d91d..a9a5222be 100644 --- a/crates/milli/src/update/new/word_fst_builder.rs +++ b/crates/milli/src/update/new/word_fst_builder.rs @@ -1,4 +1,4 @@ -use std::collections::HashSet; +use std::collections::BTreeSet; use std::io::BufWriter; use fst::{Set, SetBuilder, Streamer}; @@ -75,8 +75,8 @@ pub struct PrefixData { #[derive(Debug)] pub struct PrefixDelta { - pub modified: HashSet, - pub deleted: HashSet, + pub modified: BTreeSet, + pub deleted: BTreeSet, } struct PrefixFstBuilder { @@ -86,7 +86,7 @@ struct PrefixFstBuilder { prefix_fst_builders: Vec>>, current_prefix: Vec, current_prefix_count: Vec, - modified_prefixes: HashSet, + modified_prefixes: BTreeSet, current_prefix_is_modified: Vec, } @@ -110,7 +110,7 @@ impl PrefixFstBuilder { prefix_fst_builders, current_prefix: vec![Prefix::new(); max_prefix_length], current_prefix_count: vec![0; max_prefix_length], - modified_prefixes: HashSet::new(), + modified_prefixes: BTreeSet::new(), current_prefix_is_modified: vec![false; max_prefix_length], }) } @@ -180,7 +180,7 @@ impl PrefixFstBuilder { let prefix_fst_mmap = unsafe { Mmap::map(&prefix_fst_file)? }; let new_prefix_fst = Set::new(&prefix_fst_mmap)?; let old_prefix_fst = index.words_prefixes_fst(rtxn)?; - let mut deleted_prefixes = HashSet::new(); + let mut deleted_prefixes = BTreeSet::new(); { let mut deleted_prefixes_stream = old_prefix_fst.op().add(&new_prefix_fst).difference(); while let Some(prefix) = deleted_prefixes_stream.next() { diff --git a/crates/milli/src/update/new/words_prefix_docids.rs b/crates/milli/src/update/new/words_prefix_docids.rs index 7e56beeae..bf64049c3 100644 --- a/crates/milli/src/update/new/words_prefix_docids.rs +++ b/crates/milli/src/update/new/words_prefix_docids.rs @@ -1,5 +1,5 @@ use std::cell::RefCell; -use std::collections::HashSet; +use std::collections::BTreeSet; use std::io::{BufReader, BufWriter, Read, Seek, Write}; use hashbrown::HashMap; @@ -37,8 +37,8 @@ impl WordPrefixDocids { fn execute( self, wtxn: &mut heed::RwTxn, - prefix_to_compute: &HashSet, - prefix_to_delete: &HashSet, + prefix_to_compute: &BTreeSet, + prefix_to_delete: &BTreeSet, ) -> Result<()> { delete_prefixes(wtxn, &self.prefix_database, prefix_to_delete)?; self.recompute_modified_prefixes(wtxn, prefix_to_compute) @@ -48,7 +48,7 @@ impl WordPrefixDocids { fn recompute_modified_prefixes( &self, wtxn: &mut RwTxn, - prefixes: &HashSet, + prefixes: &BTreeSet, ) -> Result<()> { // We fetch the docids associated to the newly added word prefix fst only. // And collect the CboRoaringBitmaps pointers in an HashMap. @@ -127,7 +127,7 @@ impl<'a, 'rtxn> FrozenPrefixBitmaps<'a, 'rtxn> { pub fn from_prefixes( database: Database, rtxn: &'rtxn RoTxn, - prefixes: &'a HashSet, + prefixes: &'a BTreeSet, ) -> heed::Result { let database = database.remap_data_type::(); @@ -173,8 +173,8 @@ impl WordPrefixIntegerDocids { fn execute( self, wtxn: &mut heed::RwTxn, - prefix_to_compute: &HashSet, - prefix_to_delete: &HashSet, + prefix_to_compute: &BTreeSet, + prefix_to_delete: &BTreeSet, ) -> Result<()> { delete_prefixes(wtxn, &self.prefix_database, prefix_to_delete)?; self.recompute_modified_prefixes(wtxn, prefix_to_compute) @@ -184,7 +184,7 @@ impl WordPrefixIntegerDocids { fn recompute_modified_prefixes( &self, wtxn: &mut RwTxn, - prefixes: &HashSet, + prefixes: &BTreeSet, ) -> Result<()> { // We fetch the docids associated to the newly added word prefix fst only. // And collect the CboRoaringBitmaps pointers in an HashMap. @@ -262,7 +262,7 @@ impl<'a, 'rtxn> FrozenPrefixIntegerBitmaps<'a, 'rtxn> { pub fn from_prefixes( database: Database, rtxn: &'rtxn RoTxn, - prefixes: &'a HashSet, + prefixes: &'a BTreeSet, ) -> heed::Result { let database = database.remap_data_type::(); @@ -291,7 +291,7 @@ unsafe impl<'a, 'rtxn> Sync for FrozenPrefixIntegerBitmaps<'a, 'rtxn> {} fn delete_prefixes( wtxn: &mut RwTxn, prefix_database: &Database, - prefixes: &HashSet, + prefixes: &BTreeSet, ) -> Result<()> { // We remove all the entries that are no more required in this word prefix docids database. for prefix in prefixes { @@ -309,8 +309,8 @@ fn delete_prefixes( pub fn compute_word_prefix_docids( wtxn: &mut RwTxn, index: &Index, - prefix_to_compute: &HashSet, - prefix_to_delete: &HashSet, + prefix_to_compute: &BTreeSet, + prefix_to_delete: &BTreeSet, grenad_parameters: GrenadParameters, ) -> Result<()> { WordPrefixDocids::new( @@ -325,8 +325,8 @@ pub fn compute_word_prefix_docids( pub fn compute_exact_word_prefix_docids( wtxn: &mut RwTxn, index: &Index, - prefix_to_compute: &HashSet, - prefix_to_delete: &HashSet, + prefix_to_compute: &BTreeSet, + prefix_to_delete: &BTreeSet, grenad_parameters: GrenadParameters, ) -> Result<()> { WordPrefixDocids::new( @@ -341,8 +341,8 @@ pub fn compute_exact_word_prefix_docids( pub fn compute_word_prefix_fid_docids( wtxn: &mut RwTxn, index: &Index, - prefix_to_compute: &HashSet, - prefix_to_delete: &HashSet, + prefix_to_compute: &BTreeSet, + prefix_to_delete: &BTreeSet, grenad_parameters: GrenadParameters, ) -> Result<()> { WordPrefixIntegerDocids::new( @@ -357,8 +357,8 @@ pub fn compute_word_prefix_fid_docids( pub fn compute_word_prefix_position_docids( wtxn: &mut RwTxn, index: &Index, - prefix_to_compute: &HashSet, - prefix_to_delete: &HashSet, + prefix_to_compute: &BTreeSet, + prefix_to_delete: &BTreeSet, grenad_parameters: GrenadParameters, ) -> Result<()> { WordPrefixIntegerDocids::new(