Replace HashSets by BTreeSets for the prefixes

This commit is contained in:
ManyTheFish 2024-12-04 16:16:48 +01:00
parent 5b8cd68abe
commit 739c52a3cd
2 changed files with 24 additions and 24 deletions

View File

@ -1,4 +1,4 @@
use std::collections::HashSet; use std::collections::BTreeSet;
use std::io::BufWriter; use std::io::BufWriter;
use fst::{Set, SetBuilder, Streamer}; use fst::{Set, SetBuilder, Streamer};
@ -75,8 +75,8 @@ pub struct PrefixData {
#[derive(Debug)] #[derive(Debug)]
pub struct PrefixDelta { pub struct PrefixDelta {
pub modified: HashSet<Prefix>, pub modified: BTreeSet<Prefix>,
pub deleted: HashSet<Prefix>, pub deleted: BTreeSet<Prefix>,
} }
struct PrefixFstBuilder { struct PrefixFstBuilder {
@ -86,7 +86,7 @@ struct PrefixFstBuilder {
prefix_fst_builders: Vec<SetBuilder<Vec<u8>>>, prefix_fst_builders: Vec<SetBuilder<Vec<u8>>>,
current_prefix: Vec<Prefix>, current_prefix: Vec<Prefix>,
current_prefix_count: Vec<usize>, current_prefix_count: Vec<usize>,
modified_prefixes: HashSet<Prefix>, modified_prefixes: BTreeSet<Prefix>,
current_prefix_is_modified: Vec<bool>, current_prefix_is_modified: Vec<bool>,
} }
@ -110,7 +110,7 @@ impl PrefixFstBuilder {
prefix_fst_builders, prefix_fst_builders,
current_prefix: vec![Prefix::new(); max_prefix_length], current_prefix: vec![Prefix::new(); max_prefix_length],
current_prefix_count: vec![0; max_prefix_length], current_prefix_count: vec![0; max_prefix_length],
modified_prefixes: HashSet::new(), modified_prefixes: BTreeSet::new(),
current_prefix_is_modified: vec![false; max_prefix_length], current_prefix_is_modified: vec![false; max_prefix_length],
}) })
} }
@ -180,7 +180,7 @@ impl PrefixFstBuilder {
let prefix_fst_mmap = unsafe { Mmap::map(&prefix_fst_file)? }; let prefix_fst_mmap = unsafe { Mmap::map(&prefix_fst_file)? };
let new_prefix_fst = Set::new(&prefix_fst_mmap)?; let new_prefix_fst = Set::new(&prefix_fst_mmap)?;
let old_prefix_fst = index.words_prefixes_fst(rtxn)?; let old_prefix_fst = index.words_prefixes_fst(rtxn)?;
let mut deleted_prefixes = HashSet::new(); let mut deleted_prefixes = BTreeSet::new();
{ {
let mut deleted_prefixes_stream = old_prefix_fst.op().add(&new_prefix_fst).difference(); let mut deleted_prefixes_stream = old_prefix_fst.op().add(&new_prefix_fst).difference();
while let Some(prefix) = deleted_prefixes_stream.next() { while let Some(prefix) = deleted_prefixes_stream.next() {

View File

@ -1,5 +1,5 @@
use std::cell::RefCell; use std::cell::RefCell;
use std::collections::HashSet; use std::collections::BTreeSet;
use std::io::{BufReader, BufWriter, Read, Seek, Write}; use std::io::{BufReader, BufWriter, Read, Seek, Write};
use hashbrown::HashMap; use hashbrown::HashMap;
@ -37,8 +37,8 @@ impl WordPrefixDocids {
fn execute( fn execute(
self, self,
wtxn: &mut heed::RwTxn, wtxn: &mut heed::RwTxn,
prefix_to_compute: &HashSet<Prefix>, prefix_to_compute: &BTreeSet<Prefix>,
prefix_to_delete: &HashSet<Prefix>, prefix_to_delete: &BTreeSet<Prefix>,
) -> Result<()> { ) -> Result<()> {
delete_prefixes(wtxn, &self.prefix_database, prefix_to_delete)?; delete_prefixes(wtxn, &self.prefix_database, prefix_to_delete)?;
self.recompute_modified_prefixes(wtxn, prefix_to_compute) self.recompute_modified_prefixes(wtxn, prefix_to_compute)
@ -48,7 +48,7 @@ impl WordPrefixDocids {
fn recompute_modified_prefixes( fn recompute_modified_prefixes(
&self, &self,
wtxn: &mut RwTxn, wtxn: &mut RwTxn,
prefixes: &HashSet<Prefix>, prefixes: &BTreeSet<Prefix>,
) -> Result<()> { ) -> Result<()> {
// We fetch the docids associated to the newly added word prefix fst only. // We fetch the docids associated to the newly added word prefix fst only.
// And collect the CboRoaringBitmaps pointers in an HashMap. // And collect the CboRoaringBitmaps pointers in an HashMap.
@ -127,7 +127,7 @@ impl<'a, 'rtxn> FrozenPrefixBitmaps<'a, 'rtxn> {
pub fn from_prefixes( pub fn from_prefixes(
database: Database<Bytes, CboRoaringBitmapCodec>, database: Database<Bytes, CboRoaringBitmapCodec>,
rtxn: &'rtxn RoTxn, rtxn: &'rtxn RoTxn,
prefixes: &'a HashSet<Prefix>, prefixes: &'a BTreeSet<Prefix>,
) -> heed::Result<Self> { ) -> heed::Result<Self> {
let database = database.remap_data_type::<Bytes>(); let database = database.remap_data_type::<Bytes>();
@ -173,8 +173,8 @@ impl WordPrefixIntegerDocids {
fn execute( fn execute(
self, self,
wtxn: &mut heed::RwTxn, wtxn: &mut heed::RwTxn,
prefix_to_compute: &HashSet<Prefix>, prefix_to_compute: &BTreeSet<Prefix>,
prefix_to_delete: &HashSet<Prefix>, prefix_to_delete: &BTreeSet<Prefix>,
) -> Result<()> { ) -> Result<()> {
delete_prefixes(wtxn, &self.prefix_database, prefix_to_delete)?; delete_prefixes(wtxn, &self.prefix_database, prefix_to_delete)?;
self.recompute_modified_prefixes(wtxn, prefix_to_compute) self.recompute_modified_prefixes(wtxn, prefix_to_compute)
@ -184,7 +184,7 @@ impl WordPrefixIntegerDocids {
fn recompute_modified_prefixes( fn recompute_modified_prefixes(
&self, &self,
wtxn: &mut RwTxn, wtxn: &mut RwTxn,
prefixes: &HashSet<Prefix>, prefixes: &BTreeSet<Prefix>,
) -> Result<()> { ) -> Result<()> {
// We fetch the docids associated to the newly added word prefix fst only. // We fetch the docids associated to the newly added word prefix fst only.
// And collect the CboRoaringBitmaps pointers in an HashMap. // And collect the CboRoaringBitmaps pointers in an HashMap.
@ -262,7 +262,7 @@ impl<'a, 'rtxn> FrozenPrefixIntegerBitmaps<'a, 'rtxn> {
pub fn from_prefixes( pub fn from_prefixes(
database: Database<Bytes, CboRoaringBitmapCodec>, database: Database<Bytes, CboRoaringBitmapCodec>,
rtxn: &'rtxn RoTxn, rtxn: &'rtxn RoTxn,
prefixes: &'a HashSet<Prefix>, prefixes: &'a BTreeSet<Prefix>,
) -> heed::Result<Self> { ) -> heed::Result<Self> {
let database = database.remap_data_type::<Bytes>(); let database = database.remap_data_type::<Bytes>();
@ -291,7 +291,7 @@ unsafe impl<'a, 'rtxn> Sync for FrozenPrefixIntegerBitmaps<'a, 'rtxn> {}
fn delete_prefixes( fn delete_prefixes(
wtxn: &mut RwTxn, wtxn: &mut RwTxn,
prefix_database: &Database<Bytes, CboRoaringBitmapCodec>, prefix_database: &Database<Bytes, CboRoaringBitmapCodec>,
prefixes: &HashSet<Prefix>, prefixes: &BTreeSet<Prefix>,
) -> Result<()> { ) -> Result<()> {
// We remove all the entries that are no more required in this word prefix docids database. // We remove all the entries that are no more required in this word prefix docids database.
for prefix in prefixes { for prefix in prefixes {
@ -309,8 +309,8 @@ fn delete_prefixes(
pub fn compute_word_prefix_docids( pub fn compute_word_prefix_docids(
wtxn: &mut RwTxn, wtxn: &mut RwTxn,
index: &Index, index: &Index,
prefix_to_compute: &HashSet<Prefix>, prefix_to_compute: &BTreeSet<Prefix>,
prefix_to_delete: &HashSet<Prefix>, prefix_to_delete: &BTreeSet<Prefix>,
grenad_parameters: GrenadParameters, grenad_parameters: GrenadParameters,
) -> Result<()> { ) -> Result<()> {
WordPrefixDocids::new( WordPrefixDocids::new(
@ -325,8 +325,8 @@ pub fn compute_word_prefix_docids(
pub fn compute_exact_word_prefix_docids( pub fn compute_exact_word_prefix_docids(
wtxn: &mut RwTxn, wtxn: &mut RwTxn,
index: &Index, index: &Index,
prefix_to_compute: &HashSet<Prefix>, prefix_to_compute: &BTreeSet<Prefix>,
prefix_to_delete: &HashSet<Prefix>, prefix_to_delete: &BTreeSet<Prefix>,
grenad_parameters: GrenadParameters, grenad_parameters: GrenadParameters,
) -> Result<()> { ) -> Result<()> {
WordPrefixDocids::new( WordPrefixDocids::new(
@ -341,8 +341,8 @@ pub fn compute_exact_word_prefix_docids(
pub fn compute_word_prefix_fid_docids( pub fn compute_word_prefix_fid_docids(
wtxn: &mut RwTxn, wtxn: &mut RwTxn,
index: &Index, index: &Index,
prefix_to_compute: &HashSet<Prefix>, prefix_to_compute: &BTreeSet<Prefix>,
prefix_to_delete: &HashSet<Prefix>, prefix_to_delete: &BTreeSet<Prefix>,
grenad_parameters: GrenadParameters, grenad_parameters: GrenadParameters,
) -> Result<()> { ) -> Result<()> {
WordPrefixIntegerDocids::new( WordPrefixIntegerDocids::new(
@ -357,8 +357,8 @@ pub fn compute_word_prefix_fid_docids(
pub fn compute_word_prefix_position_docids( pub fn compute_word_prefix_position_docids(
wtxn: &mut RwTxn, wtxn: &mut RwTxn,
index: &Index, index: &Index,
prefix_to_compute: &HashSet<Prefix>, prefix_to_compute: &BTreeSet<Prefix>,
prefix_to_delete: &HashSet<Prefix>, prefix_to_delete: &BTreeSet<Prefix>,
grenad_parameters: GrenadParameters, grenad_parameters: GrenadParameters,
) -> Result<()> { ) -> Result<()> {
WordPrefixIntegerDocids::new( WordPrefixIntegerDocids::new(