Merge branch 'main' into all-cpus-in-import-dump

This commit is contained in:
nnethercott 2025-05-12 21:48:12 +02:00
commit 75a7e40a27
74 changed files with 2402 additions and 1726 deletions

View file

@ -1,6 +1,13 @@
pub static VERSION_MAJOR: &str = env!("CARGO_PKG_VERSION_MAJOR");
pub static VERSION_MINOR: &str = env!("CARGO_PKG_VERSION_MINOR");
pub static VERSION_PATCH: &str = env!("CARGO_PKG_VERSION_PATCH");
pub const VERSION_MAJOR: u32 = parse_u32(env!("CARGO_PKG_VERSION_MAJOR"));
pub const VERSION_MINOR: u32 = parse_u32(env!("CARGO_PKG_VERSION_MINOR"));
pub const VERSION_PATCH: u32 = parse_u32(env!("CARGO_PKG_VERSION_PATCH"));
const fn parse_u32(s: &str) -> u32 {
match u32::from_str_radix(s, 10) {
Ok(version) => version,
Err(_) => panic!("could not parse as u32"),
}
}
pub const RESERVED_VECTORS_FIELD_NAME: &str = "_vectors";
pub const RESERVED_GEO_FIELD_NAME: &str = "_geo";

View file

@ -0,0 +1,50 @@
use heed::{
types::{SerdeJson, Str},
RoTxn, RwTxn,
};
use serde::{Deserialize, Serialize};
use crate::{index::main_key, Index};
#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
#[serde(rename_all = "camelCase")]
pub struct DisabledTyposTerms {
pub disable_on_numbers: bool,
}
impl Index {
pub fn disabled_typos_terms(&self, txn: &RoTxn<'_>) -> heed::Result<DisabledTyposTerms> {
self.main
.remap_types::<Str, SerdeJson<DisabledTyposTerms>>()
.get(txn, main_key::DISABLED_TYPOS_TERMS)
.map(|option| option.unwrap_or_default())
}
pub(crate) fn put_disabled_typos_terms(
&self,
txn: &mut RwTxn<'_>,
disabled_typos_terms: &DisabledTyposTerms,
) -> heed::Result<()> {
self.main.remap_types::<Str, SerdeJson<DisabledTyposTerms>>().put(
txn,
main_key::DISABLED_TYPOS_TERMS,
disabled_typos_terms,
)?;
Ok(())
}
pub(crate) fn delete_disabled_typos_terms(&self, txn: &mut RwTxn<'_>) -> heed::Result<()> {
self.main
.remap_types::<Str, SerdeJson<DisabledTyposTerms>>()
.delete(txn, main_key::DISABLED_TYPOS_TERMS)?;
Ok(())
}
}
impl DisabledTyposTerms {
pub fn is_exact(&self, word: &str) -> bool {
// If disable_on_numbers is true, we disable the word if it contains only numbers or punctuation
self.disable_on_numbers && word.chars().all(|c| c.is_numeric() || c.is_ascii_punctuation())
}
}

File diff suppressed because it is too large Load diff

View file

@ -12,6 +12,7 @@ mod asc_desc;
mod attribute_patterns;
mod criterion;
pub mod database_stats;
pub mod disabled_typos_terms;
mod error;
mod external_documents_ids;
pub mod facet;

View file

@ -1,10 +1,11 @@
use std::collections::BTreeSet;
use std::fmt::{Debug, Display};
use std::ops::Bound::{self, Excluded, Included};
use std::ops::Bound::{self, Excluded, Included, Unbounded};
use either::Either;
pub use filter_parser::{Condition, Error as FPError, FilterCondition, Token};
use heed::types::LazyDecode;
use heed::BytesEncode;
use memchr::memmem::Finder;
use roaring::{MultiOps, RoaringBitmap};
use serde_json::Value;
@ -14,7 +15,7 @@ use crate::constants::RESERVED_GEO_FIELD_NAME;
use crate::error::{Error, UserError};
use crate::filterable_attributes_rules::{filtered_matching_patterns, matching_features};
use crate::heed_codec::facet::{
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec, OrderedF64Codec,
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
};
use crate::index::db_name::FACET_ID_STRING_DOCIDS;
use crate::{
@ -271,7 +272,7 @@ impl<'a> Filter<'a> {
// as the facets values are all in the same database and prefixed by the
// field id and the level.
let (left, right) = match operator {
let (number_bounds, (left_str, right_str)) = match operator {
// return an error if the filter is not allowed for this field
Condition::GreaterThan(_)
| Condition::GreaterThanOrEqual(_)
@ -305,17 +306,37 @@ impl<'a> Filter<'a> {
));
}
Condition::GreaterThan(val) => {
(Excluded(val.parse_finite_float()?), Included(f64::MAX))
let number = val.parse_finite_float().ok();
let number_bounds = number.map(|number| (Excluded(number), Included(f64::MAX)));
let str_bounds = (Excluded(val.value()), Unbounded);
(number_bounds, str_bounds)
}
Condition::GreaterThanOrEqual(val) => {
(Included(val.parse_finite_float()?), Included(f64::MAX))
let number = val.parse_finite_float().ok();
let number_bounds = number.map(|number| (Included(number), Included(f64::MAX)));
let str_bounds = (Included(val.value()), Unbounded);
(number_bounds, str_bounds)
}
Condition::LowerThan(val) => {
let number = val.parse_finite_float().ok();
let number_bounds = number.map(|number| (Included(f64::MIN), Excluded(number)));
let str_bounds = (Unbounded, Excluded(val.value()));
(number_bounds, str_bounds)
}
Condition::LowerThan(val) => (Included(f64::MIN), Excluded(val.parse_finite_float()?)),
Condition::LowerThanOrEqual(val) => {
(Included(f64::MIN), Included(val.parse_finite_float()?))
let number = val.parse_finite_float().ok();
let number_bounds = number.map(|number| (Included(f64::MIN), Included(number)));
let str_bounds = (Unbounded, Included(val.value()));
(number_bounds, str_bounds)
}
Condition::Between { from, to } => {
(Included(from.parse_finite_float()?), Included(to.parse_finite_float()?))
let from_number = from.parse_finite_float().ok();
let to_number = to.parse_finite_float().ok();
let number_bounds =
from_number.zip(to_number).map(|(from, to)| (Included(from), Included(to)));
let str_bounds = (Included(from.value()), Included(to.value()));
(number_bounds, str_bounds)
}
Condition::Null => {
let is_null = index.null_faceted_documents_ids(rtxn, field_id)?;
@ -415,29 +436,47 @@ impl<'a> Filter<'a> {
};
let mut output = RoaringBitmap::new();
Self::explore_facet_number_levels(
if let Some((left_number, right_number)) = number_bounds {
Self::explore_facet_levels(
rtxn,
numbers_db,
field_id,
&left_number,
&right_number,
universe,
&mut output,
)?;
}
Self::explore_facet_levels(
rtxn,
numbers_db,
strings_db,
field_id,
left,
right,
&left_str,
&right_str,
universe,
&mut output,
)?;
Ok(output)
}
/// Aggregates the documents ids that are part of the specified range automatically
/// going deeper through the levels.
fn explore_facet_number_levels(
rtxn: &heed::RoTxn<'_>,
db: heed::Database<FacetGroupKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
fn explore_facet_levels<'data, BoundCodec>(
rtxn: &'data heed::RoTxn<'data>,
db: heed::Database<FacetGroupKeyCodec<BoundCodec>, FacetGroupValueCodec>,
field_id: FieldId,
left: Bound<f64>,
right: Bound<f64>,
left: &'data Bound<<BoundCodec as heed::BytesEncode<'data>>::EItem>,
right: &'data Bound<<BoundCodec as heed::BytesEncode<'data>>::EItem>,
universe: Option<&RoaringBitmap>,
output: &mut RoaringBitmap,
) -> Result<()> {
) -> Result<()>
where
BoundCodec: for<'b> BytesEncode<'b>,
for<'b> <BoundCodec as BytesEncode<'b>>::EItem: Sized + PartialOrd,
{
match (left, right) {
// lower TO upper when lower > upper must return no result
(Included(l), Included(r)) if l > r => return Ok(()),
@ -446,8 +485,8 @@ impl<'a> Filter<'a> {
(Excluded(l), Included(r)) if l >= r => return Ok(()),
(_, _) => (),
}
facet_range_search::find_docids_of_facet_within_bounds::<OrderedF64Codec>(
rtxn, db, field_id, &left, &right, universe, output,
facet_range_search::find_docids_of_facet_within_bounds::<BoundCodec>(
rtxn, db, field_id, left, right, universe, output,
)?;
Ok(())
@ -1249,28 +1288,24 @@ mod tests {
let result = filter.evaluate(&rtxn, &index).unwrap();
assert!(result.contains(0));
let filter = Filter::from_str("price < inf").unwrap().unwrap();
assert!(matches!(
filter.evaluate(&rtxn, &index),
Err(crate::Error::UserError(crate::error::UserError::InvalidFilter(_)))
));
let result = filter.evaluate(&rtxn, &index).unwrap();
// this is allowed due to filters with strings
assert!(result.contains(1));
let filter = Filter::from_str("price = NaN").unwrap().unwrap();
let result = filter.evaluate(&rtxn, &index).unwrap();
assert!(result.is_empty());
let filter = Filter::from_str("price < NaN").unwrap().unwrap();
assert!(matches!(
filter.evaluate(&rtxn, &index),
Err(crate::Error::UserError(crate::error::UserError::InvalidFilter(_)))
));
let result = filter.evaluate(&rtxn, &index).unwrap();
assert!(result.contains(1));
let filter = Filter::from_str("price = infinity").unwrap().unwrap();
let result = filter.evaluate(&rtxn, &index).unwrap();
assert!(result.contains(2));
let filter = Filter::from_str("price < infinity").unwrap().unwrap();
assert!(matches!(
filter.evaluate(&rtxn, &index),
Err(crate::Error::UserError(crate::error::UserError::InvalidFilter(_)))
));
let result = filter.evaluate(&rtxn, &index).unwrap();
assert!(result.contains(0));
assert!(result.contains(1));
}
#[test]

View file

@ -8,7 +8,7 @@ use roaring::bitmap::RoaringBitmap;
pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET};
pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords};
use self::new::{execute_vector_search, PartialSearchResult};
use self::new::{execute_vector_search, PartialSearchResult, VectorStoreStats};
use crate::filterable_attributes_rules::{filtered_matching_patterns, matching_features};
use crate::score_details::{ScoreDetails, ScoringStrategy};
use crate::vector::Embedder;
@ -269,6 +269,12 @@ impl<'a> Search<'a> {
)?,
};
if let Some(VectorStoreStats { total_time, total_queries, total_results }) =
ctx.vector_store_stats
{
tracing::debug!("Vector store stats: total_time={total_time:.02?}, total_queries={total_queries}, total_results={total_results}");
}
// consume context and located_query_terms to build MatchingWords.
let matching_words = match located_query_terms {
Some(located_query_terms) => MatchingWords::new(ctx, located_query_terms),

View file

@ -22,6 +22,8 @@ mod vector_sort;
mod tests;
use std::collections::HashSet;
use std::ops::AddAssign;
use std::time::Duration;
use bucket_sort::{bucket_sort, BucketSortOutput};
use charabia::{Language, TokenizerBuilder};
@ -72,6 +74,7 @@ pub struct SearchContext<'ctx> {
pub phrase_docids: PhraseDocIdsCache,
pub restricted_fids: Option<RestrictedFids>,
pub prefix_search: PrefixSearch,
pub vector_store_stats: Option<VectorStoreStats>,
}
impl<'ctx> SearchContext<'ctx> {
@ -101,6 +104,7 @@ impl<'ctx> SearchContext<'ctx> {
phrase_docids: <_>::default(),
restricted_fids: None,
prefix_search,
vector_store_stats: None,
})
}
@ -166,6 +170,25 @@ impl<'ctx> SearchContext<'ctx> {
}
}
#[derive(Debug, Default)]
pub struct VectorStoreStats {
/// The total time spent on vector search.
pub total_time: Duration,
/// The number of searches performed.
pub total_queries: usize,
/// The number of nearest neighbors found.
pub total_results: usize,
}
impl AddAssign for VectorStoreStats {
fn add_assign(&mut self, other: Self) {
let Self { total_time, total_queries, total_results } = self;
*total_time += other.total_time;
*total_queries += other.total_queries;
*total_results += other.total_results;
}
}
#[derive(Clone, Copy, PartialEq, PartialOrd, Ord, Eq)]
pub enum Word {
Original(Interned<String>),

View file

@ -1,8 +1,10 @@
use std::iter::FromIterator;
use std::time::Instant;
use roaring::RoaringBitmap;
use super::ranking_rules::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait};
use super::VectorStoreStats;
use crate::score_details::{self, ScoreDetails};
use crate::vector::{ArroyWrapper, DistributionShift, Embedder};
use crate::{DocumentId, Result, SearchContext, SearchLogger};
@ -53,9 +55,15 @@ impl<Q: RankingRuleQueryTrait> VectorSort<Q> {
) -> Result<()> {
let target = &self.target;
let before = Instant::now();
let reader = ArroyWrapper::new(ctx.index.vector_arroy, self.embedder_index, self.quantized);
let results = reader.nns_by_vector(ctx.txn, target, self.limit, Some(vector_candidates))?;
self.cached_sorted_docids = results.into_iter();
*ctx.vector_store_stats.get_or_insert_default() += VectorStoreStats {
total_time: before.elapsed(),
total_queries: 1,
total_results: self.cached_sorted_docids.len(),
};
Ok(())
}

View file

@ -1,4 +0,0 @@
---
source: milli/src/index.rs
---
[0, ]

View file

@ -0,0 +1,4 @@
---
source: crates/milli/src/test_index.rs
---
[0, ]

View file

@ -0,0 +1,4 @@
---
source: crates/milli/src/test_index.rs
---
[]

File diff suppressed because it is too large Load diff

View file

@ -127,7 +127,8 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
// merge all deletions
let obkv = KvReaderDelAdd::from_slice(value);
if let Some(value) = obkv.get(DelAdd::Deletion) {
let delete_from_exact = settings_diff.old.exact_attributes.contains(&fid);
let delete_from_exact = settings_diff.old.exact_attributes.contains(&fid)
|| settings_diff.old.disabled_typos_terms.is_exact(w);
buffer.clear();
let mut obkv = KvWriterDelAdd::new(&mut buffer);
obkv.insert(DelAdd::Deletion, value)?;
@ -139,7 +140,8 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
}
// merge all additions
if let Some(value) = obkv.get(DelAdd::Addition) {
let add_in_exact = settings_diff.new.exact_attributes.contains(&fid);
let add_in_exact = settings_diff.new.exact_attributes.contains(&fid)
|| settings_diff.new.disabled_typos_terms.is_exact(w);
buffer.clear();
let mut obkv = KvWriterDelAdd::new(&mut buffer);
obkv.insert(DelAdd::Addition, value)?;

View file

@ -273,14 +273,11 @@ pub(crate) fn write_typed_chunk_into_index(
unreachable!();
};
let clonable_word_docids = unsafe { as_cloneable_grenad(&word_docids_reader) }?;
let clonable_exact_word_docids =
unsafe { as_cloneable_grenad(&exact_word_docids_reader) }?;
word_docids_builder.push(word_docids_reader.into_cursor()?);
exact_word_docids_builder.push(exact_word_docids_reader.into_cursor()?);
word_fid_docids_builder.push(word_fid_docids_reader.into_cursor()?);
fst_merger_builder.push(clonable_word_docids.into_cursor()?);
fst_merger_builder.push(clonable_exact_word_docids.into_cursor()?);
}
let word_docids_merger = word_docids_builder.build();

View file

@ -319,8 +319,11 @@ impl WordDocidsExtractors {
let doc_alloc = &context.doc_alloc;
let exact_attributes = index.exact_attributes(rtxn)?;
let is_exact_attribute =
|fname: &str| exact_attributes.iter().any(|attr| contained_in(fname, attr));
let disabled_typos_terms = index.disabled_typos_terms(rtxn)?;
let is_exact = |fname: &str, word: &str| {
exact_attributes.iter().any(|attr| contained_in(fname, attr))
|| disabled_typos_terms.is_exact(word)
};
match document_change {
DocumentChange::Deletion(inner) => {
let mut token_fn = |fname: &str, fid, pos, word: &str| {
@ -328,7 +331,7 @@ impl WordDocidsExtractors {
fid,
pos,
word,
is_exact_attribute(fname),
is_exact(fname, word),
inner.docid(),
doc_alloc,
)
@ -356,7 +359,7 @@ impl WordDocidsExtractors {
fid,
pos,
word,
is_exact_attribute(fname),
is_exact(fname, word),
inner.docid(),
doc_alloc,
)
@ -372,7 +375,7 @@ impl WordDocidsExtractors {
fid,
pos,
word,
is_exact_attribute(fname),
is_exact(fname, word),
inner.docid(),
doc_alloc,
)
@ -389,7 +392,7 @@ impl WordDocidsExtractors {
fid,
pos,
word,
is_exact_attribute(fname),
is_exact(fname, word),
inner.docid(),
doc_alloc,
)

View file

@ -9,6 +9,7 @@ pub use document_operation::{DocumentOperation, PayloadStats};
use hashbrown::HashMap;
use heed::RwTxn;
pub use partial_dump::PartialDump;
pub use post_processing::recompute_word_fst_from_word_docids_database;
pub use update_by_function::UpdateByFunction;
pub use write::ChannelCongestion;
use write::{build_vectors, update_index, write_to_db};

View file

@ -131,6 +131,20 @@ fn compute_word_fst(
}
}
pub fn recompute_word_fst_from_word_docids_database(index: &Index, wtxn: &mut RwTxn) -> Result<()> {
let fst = fst::Set::default().map_data(std::borrow::Cow::Owned)?;
let mut word_fst_builder = WordFstBuilder::new(&fst)?;
let words = index.word_docids.iter(wtxn)?.remap_data_type::<DecodeIgnore>();
for res in words {
let (word, _) = res?;
word_fst_builder.register_word(DelAdd::Addition, word.as_ref())?;
}
let (word_fst_mmap, _) = word_fst_builder.build(index, wtxn)?;
index.main.remap_types::<Str, Bytes>().put(wtxn, WORDS_FST_KEY, &word_fst_mmap)?;
Ok(())
}
#[tracing::instrument(level = "trace", skip_all, target = "indexing::facet_search")]
fn compute_facet_search_database(
index: &Index,

View file

@ -17,6 +17,7 @@ use super::IndexerConfig;
use crate::attribute_patterns::PatternMatch;
use crate::constants::RESERVED_GEO_FIELD_NAME;
use crate::criterion::Criterion;
use crate::disabled_typos_terms::DisabledTyposTerms;
use crate::error::UserError;
use crate::fields_ids_map::metadata::{FieldIdMapWithMetadata, MetadataBuilder};
use crate::filterable_attributes_rules::match_faceted_field;
@ -169,6 +170,7 @@ pub struct Settings<'a, 't, 'i> {
synonyms: Setting<BTreeMap<String, Vec<String>>>,
primary_key: Setting<String>,
authorize_typos: Setting<bool>,
disable_on_numbers: Setting<bool>,
min_word_len_two_typos: Setting<u8>,
min_word_len_one_typo: Setting<u8>,
exact_words: Setting<BTreeSet<String>>,
@ -207,6 +209,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
synonyms: Setting::NotSet,
primary_key: Setting::NotSet,
authorize_typos: Setting::NotSet,
disable_on_numbers: Setting::NotSet,
exact_words: Setting::NotSet,
min_word_len_two_typos: Setting::NotSet,
min_word_len_one_typo: Setting::NotSet,
@ -354,6 +357,14 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
self.min_word_len_one_typo = Setting::Reset;
}
pub fn set_disable_on_numbers(&mut self, disable_on_numbers: bool) {
self.disable_on_numbers = Setting::Set(disable_on_numbers);
}
pub fn reset_disable_on_numbers(&mut self) {
self.disable_on_numbers = Setting::Reset;
}
pub fn set_exact_words(&mut self, words: BTreeSet<String>) {
self.exact_words = Setting::Set(words);
}
@ -866,6 +877,24 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
Ok(())
}
fn update_disabled_typos_terms(&mut self) -> Result<()> {
let mut disabled_typos_terms = self.index.disabled_typos_terms(self.wtxn)?;
match self.disable_on_numbers {
Setting::Set(disable_on_numbers) => {
disabled_typos_terms.disable_on_numbers = disable_on_numbers;
}
Setting::Reset => {
self.index.delete_disabled_typos_terms(self.wtxn)?;
disabled_typos_terms.disable_on_numbers =
DisabledTyposTerms::default().disable_on_numbers;
}
Setting::NotSet => (),
}
self.index.put_disabled_typos_terms(self.wtxn, &disabled_typos_terms)?;
Ok(())
}
fn update_exact_words(&mut self) -> Result<()> {
match self.exact_words {
Setting::Set(ref mut words) => {
@ -1246,6 +1275,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
self.update_prefix_search()?;
self.update_facet_search()?;
self.update_localized_attributes_rules()?;
self.update_disabled_typos_terms()?;
let embedding_config_updates = self.update_embedding_configs()?;
@ -1327,6 +1357,7 @@ impl InnerIndexSettingsDiff {
|| old_settings.prefix_search != new_settings.prefix_search
|| old_settings.localized_attributes_rules
!= new_settings.localized_attributes_rules
|| old_settings.disabled_typos_terms != new_settings.disabled_typos_terms
};
let cache_exact_attributes = old_settings.exact_attributes != new_settings.exact_attributes;
@ -1526,6 +1557,7 @@ pub(crate) struct InnerIndexSettings {
pub user_defined_searchable_attributes: Option<Vec<String>>,
pub sortable_fields: HashSet<String>,
pub exact_attributes: HashSet<FieldId>,
pub disabled_typos_terms: DisabledTyposTerms,
pub proximity_precision: ProximityPrecision,
pub embedding_configs: EmbeddingConfigs,
pub geo_fields_ids: Option<(FieldId, FieldId)>,
@ -1574,7 +1606,7 @@ impl InnerIndexSettings {
.map(|fields| fields.into_iter().map(|f| f.to_string()).collect());
let builder = MetadataBuilder::from_index(index, rtxn)?;
let fields_ids_map = FieldIdMapWithMetadata::new(fields_ids_map, builder);
let disabled_typos_terms = index.disabled_typos_terms(rtxn)?;
Ok(Self {
stop_words,
allowed_separators,
@ -1592,6 +1624,7 @@ impl InnerIndexSettings {
geo_fields_ids,
prefix_search,
facet_search,
disabled_typos_terms,
})
}

View file

@ -896,6 +896,7 @@ fn test_correct_settings_init() {
localized_attributes_rules,
prefix_search,
facet_search,
disable_on_numbers,
} = settings;
assert!(matches!(searchable_fields, Setting::NotSet));
assert!(matches!(displayed_fields, Setting::NotSet));
@ -923,6 +924,7 @@ fn test_correct_settings_init() {
assert!(matches!(localized_attributes_rules, Setting::NotSet));
assert!(matches!(prefix_search, Setting::NotSet));
assert!(matches!(facet_search, Setting::NotSet));
assert!(matches!(disable_on_numbers, Setting::NotSet));
})
.unwrap();
}

View file

@ -1,12 +1,14 @@
mod v1_12;
mod v1_13;
mod v1_14;
mod v1_15;
use heed::RwTxn;
use v1_12::{V1_12_3_To_V1_13_0, V1_12_To_V1_12_3};
use v1_13::{V1_13_0_To_V1_13_1, V1_13_1_To_Latest_V1_13};
use v1_14::Latest_V1_13_To_Latest_V1_14;
use v1_15::Latest_V1_14_To_Latest_V1_15;
use crate::constants::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
use crate::progress::{Progress, VariableNameStep};
use crate::{Index, InternalError, Result};
@ -23,12 +25,16 @@ trait UpgradeIndex {
}
/// Return true if the cached stats of the index must be regenerated
pub fn upgrade(
pub fn upgrade<MSP>(
wtxn: &mut RwTxn,
index: &Index,
db_version: (u32, u32, u32),
must_stop_processing: MSP,
progress: Progress,
) -> Result<bool> {
) -> Result<bool>
where
MSP: Fn() -> bool + Sync,
{
let from = index.get_version(wtxn)?.unwrap_or(db_version);
let upgrade_functions: &[&dyn UpgradeIndex] = &[
&V1_12_To_V1_12_3 {},
@ -36,6 +42,10 @@ pub fn upgrade(
&V1_13_0_To_V1_13_1 {},
&V1_13_1_To_Latest_V1_13 {},
&Latest_V1_13_To_Latest_V1_14 {},
&Latest_V1_14_To_Latest_V1_15 {},
// This is the last upgrade function, it will be called when the index is up to date.
// any other upgrade function should be added before this one.
&ToCurrentNoOp {},
];
let start = match from {
@ -43,8 +53,9 @@ pub fn upgrade(
(1, 12, 3..) => 1,
(1, 13, 0) => 2,
(1, 13, _) => 4,
(1, 14, _) => 5,
// We must handle the current version in the match because in case of a failure some index may have been upgraded but not other.
(1, 14, _) => 4,
(1, 15, _) => 6,
(major, minor, patch) => {
return Err(InternalError::CannotUpgradeToVersion(major, minor, patch).into())
}
@ -56,6 +67,9 @@ pub fn upgrade(
let mut current_version = from;
let mut regenerate_stats = false;
for (i, upgrade) in upgrade_path.iter().enumerate() {
if (must_stop_processing)() {
return Err(crate::Error::InternalError(InternalError::AbortedIndexation));
}
let target = upgrade.target_version();
progress.update_progress(VariableNameStep::<UpgradeVersion>::new(
format!(
@ -77,3 +91,22 @@ pub fn upgrade(
Ok(regenerate_stats)
}
#[allow(non_camel_case_types)]
struct ToCurrentNoOp {}
impl UpgradeIndex for ToCurrentNoOp {
fn upgrade(
&self,
_wtxn: &mut RwTxn,
_index: &Index,
_original: (u32, u32, u32),
_progress: Progress,
) -> Result<bool> {
Ok(false)
}
fn target_version(&self) -> (u32, u32, u32) {
(VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH)
}
}

View file

@ -1,7 +1,6 @@
use heed::RwTxn;
use super::UpgradeIndex;
use crate::constants::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
use crate::database_stats::DatabaseStats;
use crate::progress::Progress;
use crate::{make_enum_progress, Index, Result};
@ -51,10 +50,6 @@ impl UpgradeIndex for V1_13_1_To_Latest_V1_13 {
}
fn target_version(&self) -> (u32, u32, u32) {
(
VERSION_MAJOR.parse().unwrap(),
VERSION_MINOR.parse().unwrap(),
VERSION_PATCH.parse().unwrap(),
)
(1, 13, 3)
}
}

View file

@ -0,0 +1,35 @@
use heed::RwTxn;
use super::UpgradeIndex;
use crate::progress::Progress;
use crate::update::new::indexer::recompute_word_fst_from_word_docids_database;
use crate::{make_enum_progress, Index, Result};
#[allow(non_camel_case_types)]
pub(super) struct Latest_V1_14_To_Latest_V1_15();
impl UpgradeIndex for Latest_V1_14_To_Latest_V1_15 {
fn upgrade(
&self,
wtxn: &mut RwTxn,
index: &Index,
_original: (u32, u32, u32),
progress: Progress,
) -> Result<bool> {
// Recompute the word FST from the word docids database.
make_enum_progress! {
enum TypoTolerance {
RecomputeWordFst,
}
};
progress.update_progress(TypoTolerance::RecomputeWordFst);
recompute_word_fst_from_word_docids_database(index, wtxn)?;
Ok(false)
}
fn target_version(&self) -> (u32, u32, u32) {
(1, 15, 0)
}
}