mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-10 21:44:34 +01:00
Replace HashSets by BTreeSets for the prefixes
This commit is contained in:
parent
5b8cd68abe
commit
739c52a3cd
@ -1,4 +1,4 @@
|
|||||||
use std::collections::HashSet;
|
use std::collections::BTreeSet;
|
||||||
use std::io::BufWriter;
|
use std::io::BufWriter;
|
||||||
|
|
||||||
use fst::{Set, SetBuilder, Streamer};
|
use fst::{Set, SetBuilder, Streamer};
|
||||||
@ -75,8 +75,8 @@ pub struct PrefixData {
|
|||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct PrefixDelta {
|
pub struct PrefixDelta {
|
||||||
pub modified: HashSet<Prefix>,
|
pub modified: BTreeSet<Prefix>,
|
||||||
pub deleted: HashSet<Prefix>,
|
pub deleted: BTreeSet<Prefix>,
|
||||||
}
|
}
|
||||||
|
|
||||||
struct PrefixFstBuilder {
|
struct PrefixFstBuilder {
|
||||||
@ -86,7 +86,7 @@ struct PrefixFstBuilder {
|
|||||||
prefix_fst_builders: Vec<SetBuilder<Vec<u8>>>,
|
prefix_fst_builders: Vec<SetBuilder<Vec<u8>>>,
|
||||||
current_prefix: Vec<Prefix>,
|
current_prefix: Vec<Prefix>,
|
||||||
current_prefix_count: Vec<usize>,
|
current_prefix_count: Vec<usize>,
|
||||||
modified_prefixes: HashSet<Prefix>,
|
modified_prefixes: BTreeSet<Prefix>,
|
||||||
current_prefix_is_modified: Vec<bool>,
|
current_prefix_is_modified: Vec<bool>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -110,7 +110,7 @@ impl PrefixFstBuilder {
|
|||||||
prefix_fst_builders,
|
prefix_fst_builders,
|
||||||
current_prefix: vec![Prefix::new(); max_prefix_length],
|
current_prefix: vec![Prefix::new(); max_prefix_length],
|
||||||
current_prefix_count: vec![0; max_prefix_length],
|
current_prefix_count: vec![0; max_prefix_length],
|
||||||
modified_prefixes: HashSet::new(),
|
modified_prefixes: BTreeSet::new(),
|
||||||
current_prefix_is_modified: vec![false; max_prefix_length],
|
current_prefix_is_modified: vec![false; max_prefix_length],
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@ -180,7 +180,7 @@ impl PrefixFstBuilder {
|
|||||||
let prefix_fst_mmap = unsafe { Mmap::map(&prefix_fst_file)? };
|
let prefix_fst_mmap = unsafe { Mmap::map(&prefix_fst_file)? };
|
||||||
let new_prefix_fst = Set::new(&prefix_fst_mmap)?;
|
let new_prefix_fst = Set::new(&prefix_fst_mmap)?;
|
||||||
let old_prefix_fst = index.words_prefixes_fst(rtxn)?;
|
let old_prefix_fst = index.words_prefixes_fst(rtxn)?;
|
||||||
let mut deleted_prefixes = HashSet::new();
|
let mut deleted_prefixes = BTreeSet::new();
|
||||||
{
|
{
|
||||||
let mut deleted_prefixes_stream = old_prefix_fst.op().add(&new_prefix_fst).difference();
|
let mut deleted_prefixes_stream = old_prefix_fst.op().add(&new_prefix_fst).difference();
|
||||||
while let Some(prefix) = deleted_prefixes_stream.next() {
|
while let Some(prefix) = deleted_prefixes_stream.next() {
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
use std::cell::RefCell;
|
use std::cell::RefCell;
|
||||||
use std::collections::HashSet;
|
use std::collections::BTreeSet;
|
||||||
use std::io::{BufReader, BufWriter, Read, Seek, Write};
|
use std::io::{BufReader, BufWriter, Read, Seek, Write};
|
||||||
|
|
||||||
use hashbrown::HashMap;
|
use hashbrown::HashMap;
|
||||||
@ -37,8 +37,8 @@ impl WordPrefixDocids {
|
|||||||
fn execute(
|
fn execute(
|
||||||
self,
|
self,
|
||||||
wtxn: &mut heed::RwTxn,
|
wtxn: &mut heed::RwTxn,
|
||||||
prefix_to_compute: &HashSet<Prefix>,
|
prefix_to_compute: &BTreeSet<Prefix>,
|
||||||
prefix_to_delete: &HashSet<Prefix>,
|
prefix_to_delete: &BTreeSet<Prefix>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
delete_prefixes(wtxn, &self.prefix_database, prefix_to_delete)?;
|
delete_prefixes(wtxn, &self.prefix_database, prefix_to_delete)?;
|
||||||
self.recompute_modified_prefixes(wtxn, prefix_to_compute)
|
self.recompute_modified_prefixes(wtxn, prefix_to_compute)
|
||||||
@ -48,7 +48,7 @@ impl WordPrefixDocids {
|
|||||||
fn recompute_modified_prefixes(
|
fn recompute_modified_prefixes(
|
||||||
&self,
|
&self,
|
||||||
wtxn: &mut RwTxn,
|
wtxn: &mut RwTxn,
|
||||||
prefixes: &HashSet<Prefix>,
|
prefixes: &BTreeSet<Prefix>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
// We fetch the docids associated to the newly added word prefix fst only.
|
// We fetch the docids associated to the newly added word prefix fst only.
|
||||||
// And collect the CboRoaringBitmaps pointers in an HashMap.
|
// And collect the CboRoaringBitmaps pointers in an HashMap.
|
||||||
@ -127,7 +127,7 @@ impl<'a, 'rtxn> FrozenPrefixBitmaps<'a, 'rtxn> {
|
|||||||
pub fn from_prefixes(
|
pub fn from_prefixes(
|
||||||
database: Database<Bytes, CboRoaringBitmapCodec>,
|
database: Database<Bytes, CboRoaringBitmapCodec>,
|
||||||
rtxn: &'rtxn RoTxn,
|
rtxn: &'rtxn RoTxn,
|
||||||
prefixes: &'a HashSet<Prefix>,
|
prefixes: &'a BTreeSet<Prefix>,
|
||||||
) -> heed::Result<Self> {
|
) -> heed::Result<Self> {
|
||||||
let database = database.remap_data_type::<Bytes>();
|
let database = database.remap_data_type::<Bytes>();
|
||||||
|
|
||||||
@ -173,8 +173,8 @@ impl WordPrefixIntegerDocids {
|
|||||||
fn execute(
|
fn execute(
|
||||||
self,
|
self,
|
||||||
wtxn: &mut heed::RwTxn,
|
wtxn: &mut heed::RwTxn,
|
||||||
prefix_to_compute: &HashSet<Prefix>,
|
prefix_to_compute: &BTreeSet<Prefix>,
|
||||||
prefix_to_delete: &HashSet<Prefix>,
|
prefix_to_delete: &BTreeSet<Prefix>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
delete_prefixes(wtxn, &self.prefix_database, prefix_to_delete)?;
|
delete_prefixes(wtxn, &self.prefix_database, prefix_to_delete)?;
|
||||||
self.recompute_modified_prefixes(wtxn, prefix_to_compute)
|
self.recompute_modified_prefixes(wtxn, prefix_to_compute)
|
||||||
@ -184,7 +184,7 @@ impl WordPrefixIntegerDocids {
|
|||||||
fn recompute_modified_prefixes(
|
fn recompute_modified_prefixes(
|
||||||
&self,
|
&self,
|
||||||
wtxn: &mut RwTxn,
|
wtxn: &mut RwTxn,
|
||||||
prefixes: &HashSet<Prefix>,
|
prefixes: &BTreeSet<Prefix>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
// We fetch the docids associated to the newly added word prefix fst only.
|
// We fetch the docids associated to the newly added word prefix fst only.
|
||||||
// And collect the CboRoaringBitmaps pointers in an HashMap.
|
// And collect the CboRoaringBitmaps pointers in an HashMap.
|
||||||
@ -262,7 +262,7 @@ impl<'a, 'rtxn> FrozenPrefixIntegerBitmaps<'a, 'rtxn> {
|
|||||||
pub fn from_prefixes(
|
pub fn from_prefixes(
|
||||||
database: Database<Bytes, CboRoaringBitmapCodec>,
|
database: Database<Bytes, CboRoaringBitmapCodec>,
|
||||||
rtxn: &'rtxn RoTxn,
|
rtxn: &'rtxn RoTxn,
|
||||||
prefixes: &'a HashSet<Prefix>,
|
prefixes: &'a BTreeSet<Prefix>,
|
||||||
) -> heed::Result<Self> {
|
) -> heed::Result<Self> {
|
||||||
let database = database.remap_data_type::<Bytes>();
|
let database = database.remap_data_type::<Bytes>();
|
||||||
|
|
||||||
@ -291,7 +291,7 @@ unsafe impl<'a, 'rtxn> Sync for FrozenPrefixIntegerBitmaps<'a, 'rtxn> {}
|
|||||||
fn delete_prefixes(
|
fn delete_prefixes(
|
||||||
wtxn: &mut RwTxn,
|
wtxn: &mut RwTxn,
|
||||||
prefix_database: &Database<Bytes, CboRoaringBitmapCodec>,
|
prefix_database: &Database<Bytes, CboRoaringBitmapCodec>,
|
||||||
prefixes: &HashSet<Prefix>,
|
prefixes: &BTreeSet<Prefix>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
// We remove all the entries that are no more required in this word prefix docids database.
|
// We remove all the entries that are no more required in this word prefix docids database.
|
||||||
for prefix in prefixes {
|
for prefix in prefixes {
|
||||||
@ -309,8 +309,8 @@ fn delete_prefixes(
|
|||||||
pub fn compute_word_prefix_docids(
|
pub fn compute_word_prefix_docids(
|
||||||
wtxn: &mut RwTxn,
|
wtxn: &mut RwTxn,
|
||||||
index: &Index,
|
index: &Index,
|
||||||
prefix_to_compute: &HashSet<Prefix>,
|
prefix_to_compute: &BTreeSet<Prefix>,
|
||||||
prefix_to_delete: &HashSet<Prefix>,
|
prefix_to_delete: &BTreeSet<Prefix>,
|
||||||
grenad_parameters: GrenadParameters,
|
grenad_parameters: GrenadParameters,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
WordPrefixDocids::new(
|
WordPrefixDocids::new(
|
||||||
@ -325,8 +325,8 @@ pub fn compute_word_prefix_docids(
|
|||||||
pub fn compute_exact_word_prefix_docids(
|
pub fn compute_exact_word_prefix_docids(
|
||||||
wtxn: &mut RwTxn,
|
wtxn: &mut RwTxn,
|
||||||
index: &Index,
|
index: &Index,
|
||||||
prefix_to_compute: &HashSet<Prefix>,
|
prefix_to_compute: &BTreeSet<Prefix>,
|
||||||
prefix_to_delete: &HashSet<Prefix>,
|
prefix_to_delete: &BTreeSet<Prefix>,
|
||||||
grenad_parameters: GrenadParameters,
|
grenad_parameters: GrenadParameters,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
WordPrefixDocids::new(
|
WordPrefixDocids::new(
|
||||||
@ -341,8 +341,8 @@ pub fn compute_exact_word_prefix_docids(
|
|||||||
pub fn compute_word_prefix_fid_docids(
|
pub fn compute_word_prefix_fid_docids(
|
||||||
wtxn: &mut RwTxn,
|
wtxn: &mut RwTxn,
|
||||||
index: &Index,
|
index: &Index,
|
||||||
prefix_to_compute: &HashSet<Prefix>,
|
prefix_to_compute: &BTreeSet<Prefix>,
|
||||||
prefix_to_delete: &HashSet<Prefix>,
|
prefix_to_delete: &BTreeSet<Prefix>,
|
||||||
grenad_parameters: GrenadParameters,
|
grenad_parameters: GrenadParameters,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
WordPrefixIntegerDocids::new(
|
WordPrefixIntegerDocids::new(
|
||||||
@ -357,8 +357,8 @@ pub fn compute_word_prefix_fid_docids(
|
|||||||
pub fn compute_word_prefix_position_docids(
|
pub fn compute_word_prefix_position_docids(
|
||||||
wtxn: &mut RwTxn,
|
wtxn: &mut RwTxn,
|
||||||
index: &Index,
|
index: &Index,
|
||||||
prefix_to_compute: &HashSet<Prefix>,
|
prefix_to_compute: &BTreeSet<Prefix>,
|
||||||
prefix_to_delete: &HashSet<Prefix>,
|
prefix_to_delete: &BTreeSet<Prefix>,
|
||||||
grenad_parameters: GrenadParameters,
|
grenad_parameters: GrenadParameters,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
WordPrefixIntegerDocids::new(
|
WordPrefixIntegerDocids::new(
|
||||||
|
Loading…
x
Reference in New Issue
Block a user