Dumpless upgrade

This commit is contained in:
ManyTheFish 2025-04-29 15:14:30 +02:00
parent 0cb2bf34a5
commit 8b23eddc10
4 changed files with 55 additions and 2 deletions

View File

@ -9,6 +9,7 @@ pub use document_operation::{DocumentOperation, PayloadStats};
use hashbrown::HashMap;
use heed::RwTxn;
pub use partial_dump::PartialDump;
pub use post_processing::recompute_word_fst_from_word_docids_database;
pub use update_by_function::UpdateByFunction;
pub use write::ChannelCongestion;
use write::{build_vectors, update_index, write_to_db};

View File

@ -131,6 +131,20 @@ fn compute_word_fst(
}
}
pub fn recompute_word_fst_from_word_docids_database(index: &Index, wtxn: &mut RwTxn) -> Result<()> {
let fst = fst::Set::default().map_data(std::borrow::Cow::Owned)?;
let mut word_fst_builder = WordFstBuilder::new(&fst)?;
let words = index.word_docids.iter(wtxn)?.remap_data_type::<DecodeIgnore>();
for res in words {
let (word, _) = res?;
word_fst_builder.register_word(DelAdd::Addition, word.as_ref())?;
}
let (word_fst_mmap, _) = word_fst_builder.build(index, wtxn)?;
index.main.remap_types::<Str, Bytes>().put(wtxn, WORDS_FST_KEY, &word_fst_mmap)?;
Ok(())
}
#[tracing::instrument(level = "trace", skip_all, target = "indexing::facet_search")]
fn compute_facet_search_database(
index: &Index,

View File

@ -1,11 +1,12 @@
mod v1_12;
mod v1_13;
mod v1_14;
mod v1_15;
use heed::RwTxn;
use v1_12::{V1_12_3_To_V1_13_0, V1_12_To_V1_12_3};
use v1_13::{V1_13_0_To_V1_13_1, V1_13_1_To_Latest_V1_13};
use v1_14::Latest_V1_13_To_Latest_V1_14;
use v1_15::Latest_V1_14_To_Latest_V1_15;
use crate::progress::{Progress, VariableNameStep};
use crate::{Index, InternalError, Result};
@ -36,6 +37,7 @@ pub fn upgrade(
&V1_13_0_To_V1_13_1 {},
&V1_13_1_To_Latest_V1_13 {},
&Latest_V1_13_To_Latest_V1_14 {},
&Latest_V1_14_To_Latest_V1_15 {},
];
let start = match from {
@ -43,8 +45,9 @@ pub fn upgrade(
(1, 12, 3..) => 1,
(1, 13, 0) => 2,
(1, 13, _) => 4,
(1, 14, _) => 5,
// We must handle the current version in the match because in case of a failure some index may have been upgraded but not other.
(1, 14, _) => 4,
(1, 15, _) => 5,
(major, minor, patch) => {
return Err(InternalError::CannotUpgradeToVersion(major, minor, patch).into())
}

View File

@ -0,0 +1,35 @@
use heed::RwTxn;
use super::UpgradeIndex;
use crate::progress::Progress;
use crate::update::new::indexer::recompute_word_fst_from_word_docids_database;
use crate::{make_enum_progress, Index, Result};
#[allow(non_camel_case_types)]
pub(super) struct Latest_V1_14_To_Latest_V1_15();
impl UpgradeIndex for Latest_V1_14_To_Latest_V1_15 {
fn upgrade(
&self,
wtxn: &mut RwTxn,
index: &Index,
_original: (u32, u32, u32),
progress: Progress,
) -> Result<bool> {
// Recompute the word FST from the word docids database.
make_enum_progress! {
enum TypoTolerance {
RecomputeWordFst,
}
};
progress.update_progress(TypoTolerance::RecomputeWordFst);
recompute_word_fst_from_word_docids_database(index, wtxn)?;
Ok(false)
}
fn target_version(&self) -> (u32, u32, u32) {
(1, 15, 0)
}
}