Replace HashSets by BTreeSets for the prefixes

This commit is contained in:
ManyTheFish 2024-12-04 16:16:48 +01:00
parent 5b8cd68abe
commit 739c52a3cd
2 changed files with 24 additions and 24 deletions

View File

@ -1,4 +1,4 @@
use std::collections::HashSet;
use std::collections::BTreeSet;
use std::io::BufWriter;
use fst::{Set, SetBuilder, Streamer};
@ -75,8 +75,8 @@ pub struct PrefixData {
#[derive(Debug)]
pub struct PrefixDelta {
pub modified: HashSet<Prefix>,
pub deleted: HashSet<Prefix>,
pub modified: BTreeSet<Prefix>,
pub deleted: BTreeSet<Prefix>,
}
struct PrefixFstBuilder {
@ -86,7 +86,7 @@ struct PrefixFstBuilder {
prefix_fst_builders: Vec<SetBuilder<Vec<u8>>>,
current_prefix: Vec<Prefix>,
current_prefix_count: Vec<usize>,
modified_prefixes: HashSet<Prefix>,
modified_prefixes: BTreeSet<Prefix>,
current_prefix_is_modified: Vec<bool>,
}
@ -110,7 +110,7 @@ impl PrefixFstBuilder {
prefix_fst_builders,
current_prefix: vec![Prefix::new(); max_prefix_length],
current_prefix_count: vec![0; max_prefix_length],
modified_prefixes: HashSet::new(),
modified_prefixes: BTreeSet::new(),
current_prefix_is_modified: vec![false; max_prefix_length],
})
}
@ -180,7 +180,7 @@ impl PrefixFstBuilder {
let prefix_fst_mmap = unsafe { Mmap::map(&prefix_fst_file)? };
let new_prefix_fst = Set::new(&prefix_fst_mmap)?;
let old_prefix_fst = index.words_prefixes_fst(rtxn)?;
let mut deleted_prefixes = HashSet::new();
let mut deleted_prefixes = BTreeSet::new();
{
let mut deleted_prefixes_stream = old_prefix_fst.op().add(&new_prefix_fst).difference();
while let Some(prefix) = deleted_prefixes_stream.next() {

View File

@ -1,5 +1,5 @@
use std::cell::RefCell;
use std::collections::HashSet;
use std::collections::BTreeSet;
use std::io::{BufReader, BufWriter, Read, Seek, Write};
use hashbrown::HashMap;
@ -37,8 +37,8 @@ impl WordPrefixDocids {
fn execute(
self,
wtxn: &mut heed::RwTxn,
prefix_to_compute: &HashSet<Prefix>,
prefix_to_delete: &HashSet<Prefix>,
prefix_to_compute: &BTreeSet<Prefix>,
prefix_to_delete: &BTreeSet<Prefix>,
) -> Result<()> {
delete_prefixes(wtxn, &self.prefix_database, prefix_to_delete)?;
self.recompute_modified_prefixes(wtxn, prefix_to_compute)
@ -48,7 +48,7 @@ impl WordPrefixDocids {
fn recompute_modified_prefixes(
&self,
wtxn: &mut RwTxn,
prefixes: &HashSet<Prefix>,
prefixes: &BTreeSet<Prefix>,
) -> Result<()> {
// We fetch the docids associated to the newly added word prefix fst only.
// And collect the CboRoaringBitmaps pointers in an HashMap.
@ -127,7 +127,7 @@ impl<'a, 'rtxn> FrozenPrefixBitmaps<'a, 'rtxn> {
pub fn from_prefixes(
database: Database<Bytes, CboRoaringBitmapCodec>,
rtxn: &'rtxn RoTxn,
prefixes: &'a HashSet<Prefix>,
prefixes: &'a BTreeSet<Prefix>,
) -> heed::Result<Self> {
let database = database.remap_data_type::<Bytes>();
@ -173,8 +173,8 @@ impl WordPrefixIntegerDocids {
fn execute(
self,
wtxn: &mut heed::RwTxn,
prefix_to_compute: &HashSet<Prefix>,
prefix_to_delete: &HashSet<Prefix>,
prefix_to_compute: &BTreeSet<Prefix>,
prefix_to_delete: &BTreeSet<Prefix>,
) -> Result<()> {
delete_prefixes(wtxn, &self.prefix_database, prefix_to_delete)?;
self.recompute_modified_prefixes(wtxn, prefix_to_compute)
@ -184,7 +184,7 @@ impl WordPrefixIntegerDocids {
fn recompute_modified_prefixes(
&self,
wtxn: &mut RwTxn,
prefixes: &HashSet<Prefix>,
prefixes: &BTreeSet<Prefix>,
) -> Result<()> {
// We fetch the docids associated to the newly added word prefix fst only.
// And collect the CboRoaringBitmaps pointers in an HashMap.
@ -262,7 +262,7 @@ impl<'a, 'rtxn> FrozenPrefixIntegerBitmaps<'a, 'rtxn> {
pub fn from_prefixes(
database: Database<Bytes, CboRoaringBitmapCodec>,
rtxn: &'rtxn RoTxn,
prefixes: &'a HashSet<Prefix>,
prefixes: &'a BTreeSet<Prefix>,
) -> heed::Result<Self> {
let database = database.remap_data_type::<Bytes>();
@ -291,7 +291,7 @@ unsafe impl<'a, 'rtxn> Sync for FrozenPrefixIntegerBitmaps<'a, 'rtxn> {}
fn delete_prefixes(
wtxn: &mut RwTxn,
prefix_database: &Database<Bytes, CboRoaringBitmapCodec>,
prefixes: &HashSet<Prefix>,
prefixes: &BTreeSet<Prefix>,
) -> Result<()> {
// We remove all the entries that are no more required in this word prefix docids database.
for prefix in prefixes {
@ -309,8 +309,8 @@ fn delete_prefixes(
pub fn compute_word_prefix_docids(
wtxn: &mut RwTxn,
index: &Index,
prefix_to_compute: &HashSet<Prefix>,
prefix_to_delete: &HashSet<Prefix>,
prefix_to_compute: &BTreeSet<Prefix>,
prefix_to_delete: &BTreeSet<Prefix>,
grenad_parameters: GrenadParameters,
) -> Result<()> {
WordPrefixDocids::new(
@ -325,8 +325,8 @@ pub fn compute_word_prefix_docids(
pub fn compute_exact_word_prefix_docids(
wtxn: &mut RwTxn,
index: &Index,
prefix_to_compute: &HashSet<Prefix>,
prefix_to_delete: &HashSet<Prefix>,
prefix_to_compute: &BTreeSet<Prefix>,
prefix_to_delete: &BTreeSet<Prefix>,
grenad_parameters: GrenadParameters,
) -> Result<()> {
WordPrefixDocids::new(
@ -341,8 +341,8 @@ pub fn compute_exact_word_prefix_docids(
pub fn compute_word_prefix_fid_docids(
wtxn: &mut RwTxn,
index: &Index,
prefix_to_compute: &HashSet<Prefix>,
prefix_to_delete: &HashSet<Prefix>,
prefix_to_compute: &BTreeSet<Prefix>,
prefix_to_delete: &BTreeSet<Prefix>,
grenad_parameters: GrenadParameters,
) -> Result<()> {
WordPrefixIntegerDocids::new(
@ -357,8 +357,8 @@ pub fn compute_word_prefix_fid_docids(
pub fn compute_word_prefix_position_docids(
wtxn: &mut RwTxn,
index: &Index,
prefix_to_compute: &HashSet<Prefix>,
prefix_to_delete: &HashSet<Prefix>,
prefix_to_compute: &BTreeSet<Prefix>,
prefix_to_delete: &BTreeSet<Prefix>,
grenad_parameters: GrenadParameters,
) -> Result<()> {
WordPrefixIntegerDocids::new(