diff --git a/crates/milli/src/update/new/indexer/mod.rs b/crates/milli/src/update/new/indexer/mod.rs index d2a88f4ff..2ea3c787e 100644 --- a/crates/milli/src/update/new/indexer/mod.rs +++ b/crates/milli/src/update/new/indexer/mod.rs @@ -9,6 +9,7 @@ pub use document_operation::{DocumentOperation, PayloadStats}; use hashbrown::HashMap; use heed::RwTxn; pub use partial_dump::PartialDump; +pub use post_processing::recompute_word_fst_from_word_docids_database; pub use update_by_function::UpdateByFunction; pub use write::ChannelCongestion; use write::{build_vectors, update_index, write_to_db}; diff --git a/crates/milli/src/update/new/indexer/post_processing.rs b/crates/milli/src/update/new/indexer/post_processing.rs index aace70cff..b5c89d0d9 100644 --- a/crates/milli/src/update/new/indexer/post_processing.rs +++ b/crates/milli/src/update/new/indexer/post_processing.rs @@ -131,6 +131,20 @@ fn compute_word_fst( } } +pub fn recompute_word_fst_from_word_docids_database(index: &Index, wtxn: &mut RwTxn) -> Result<()> { + let fst = fst::Set::default().map_data(std::borrow::Cow::Owned)?; + let mut word_fst_builder = WordFstBuilder::new(&fst)?; + let words = index.word_docids.iter(wtxn)?.remap_data_type::(); + for res in words { + let (word, _) = res?; + word_fst_builder.register_word(DelAdd::Addition, word.as_ref())?; + } + let (word_fst_mmap, _) = word_fst_builder.build(index, wtxn)?; + index.main.remap_types::().put(wtxn, WORDS_FST_KEY, &word_fst_mmap)?; + + Ok(()) +} + #[tracing::instrument(level = "trace", skip_all, target = "indexing::facet_search")] fn compute_facet_search_database( index: &Index, diff --git a/crates/milli/src/update/upgrade/mod.rs b/crates/milli/src/update/upgrade/mod.rs index 7c8dcf64a..d471107ec 100644 --- a/crates/milli/src/update/upgrade/mod.rs +++ b/crates/milli/src/update/upgrade/mod.rs @@ -1,11 +1,12 @@ mod v1_12; mod v1_13; mod v1_14; - +mod v1_15; use heed::RwTxn; use v1_12::{V1_12_3_To_V1_13_0, V1_12_To_V1_12_3}; use v1_13::{V1_13_0_To_V1_13_1, V1_13_1_To_Latest_V1_13}; use v1_14::Latest_V1_13_To_Latest_V1_14; +use v1_15::Latest_V1_14_To_Latest_V1_15; use crate::progress::{Progress, VariableNameStep}; use crate::{Index, InternalError, Result}; @@ -36,6 +37,7 @@ pub fn upgrade( &V1_13_0_To_V1_13_1 {}, &V1_13_1_To_Latest_V1_13 {}, &Latest_V1_13_To_Latest_V1_14 {}, + &Latest_V1_14_To_Latest_V1_15 {}, ]; let start = match from { @@ -43,8 +45,9 @@ pub fn upgrade( (1, 12, 3..) => 1, (1, 13, 0) => 2, (1, 13, _) => 4, + (1, 14, _) => 5, // We must handle the current version in the match because in case of a failure some index may have been upgraded but not other. - (1, 14, _) => 4, + (1, 15, _) => 5, (major, minor, patch) => { return Err(InternalError::CannotUpgradeToVersion(major, minor, patch).into()) } diff --git a/crates/milli/src/update/upgrade/v1_15.rs b/crates/milli/src/update/upgrade/v1_15.rs new file mode 100644 index 000000000..2c3cff355 --- /dev/null +++ b/crates/milli/src/update/upgrade/v1_15.rs @@ -0,0 +1,35 @@ +use heed::RwTxn; + +use super::UpgradeIndex; +use crate::progress::Progress; +use crate::update::new::indexer::recompute_word_fst_from_word_docids_database; +use crate::{make_enum_progress, Index, Result}; + +#[allow(non_camel_case_types)] +pub(super) struct Latest_V1_14_To_Latest_V1_15(); + +impl UpgradeIndex for Latest_V1_14_To_Latest_V1_15 { + fn upgrade( + &self, + wtxn: &mut RwTxn, + index: &Index, + _original: (u32, u32, u32), + progress: Progress, + ) -> Result { + // Recompute the word FST from the word docids database. + make_enum_progress! { + enum TypoTolerance { + RecomputeWordFst, + } + }; + + progress.update_progress(TypoTolerance::RecomputeWordFst); + recompute_word_fst_from_word_docids_database(index, wtxn)?; + + Ok(false) + } + + fn target_version(&self) -> (u32, u32, u32) { + (1, 15, 0) + } +}