Add the new tasks with most of the job done

This commit is contained in:
Tamo 2025-01-14 14:54:00 +01:00 committed by Louis Dureuil
parent b15de68831
commit d3654906bf
No known key found for this signature in database
38 changed files with 572 additions and 204 deletions

View file

@ -1,2 +1,6 @@
pub static VERSION_MAJOR: &str = env!("CARGO_PKG_VERSION_MAJOR");
pub static VERSION_MINOR: &str = env!("CARGO_PKG_VERSION_MINOR");
pub static VERSION_PATCH: &str = env!("CARGO_PKG_VERSION_PATCH");
pub const RESERVED_VECTORS_FIELD_NAME: &str = "_vectors";
pub const RESERVED_GEO_FIELD_NAME: &str = "_geo";

View file

@ -10,7 +10,7 @@ use rhai::EvalAltResult;
use serde_json::Value;
use thiserror::Error;
use crate::constants::RESERVED_GEO_FIELD_NAME;
use crate::constants::{RESERVED_GEO_FIELD_NAME, VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
use crate::documents::{self, DocumentsBatchCursorError};
use crate::thread_pool_no_abort::PanicCatched;
use crate::{CriterionError, DocumentId, FieldId, Object, SortError};
@ -288,6 +288,12 @@ and can not be more than 511 bytes.", .document_id.to_string()
DocumentEditionCompilationError(rhai::ParseError),
#[error("{0}")]
DocumentEmbeddingError(String),
#[error("Upgrade could not be processed because v{0}.{1}.{2} of the database is too old. Please re-open the v{0}.{1}.{2} and use a dump to upgrade your version. The oldest version meilisearch can upgrade from is v1.12.0.")]
TooOldForUpgrade(u32, u32, u32),
#[error("Upgrade could not be processed because the database version (v{0}.{1}.{2}) is newer than the targeted version (v{}.{}.{})", VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH)]
CannotDowngrade(u32, u32, u32),
#[error("Cannot upgrade to unknown version v{0}.{1}.{2}.")]
CannotUpgradeToUnknownVersion(u32, u32, u32),
}
impl From<crate::vector::Error> for Error {

View file

@ -10,6 +10,7 @@ mod roaring_bitmap_length;
mod str_beu32_codec;
mod str_ref;
mod str_str_u8_codec;
pub mod version;
pub use byte_slice_ref::BytesRefCodec;
use heed::BoxedError;

View file

@ -0,0 +1,44 @@
use std::mem::size_of;
use std::{borrow::Cow, mem::size_of_val};
use byteorder::{BigEndian, ByteOrder};
use heed::{BoxedError, BytesDecode, BytesEncode};
const VERSION_SIZE: usize = std::mem::size_of::<u32>() * 3;
#[derive(thiserror::Error, Debug)]
#[error(
"Could not decode the version: Expected {} bytes but instead received {0} bytes",
VERSION_SIZE
)]
pub struct DecodeVersionError(usize);
pub struct VersionCodec;
impl<'a> BytesEncode<'a> for VersionCodec {
type EItem = (u32, u32, u32);
fn bytes_encode(item: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
let mut ret = Vec::with_capacity(size_of::<u32>() * 3);
ret.extend(&item.0.to_be_bytes());
ret.extend(&item.1.to_be_bytes());
ret.extend(&item.2.to_be_bytes());
Ok(Cow::Owned(ret))
}
}
impl<'a> BytesDecode<'a> for VersionCodec {
type DItem = (u32, u32, u32);
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
if bytes.len() != VERSION_SIZE {
Err(Box::new(DecodeVersionError(bytes.len())))
} else {
let major = BigEndian::read_u32(bytes);
let bytes = &bytes[size_of_val(&major)..];
let minor = BigEndian::read_u32(bytes);
let bytes = &bytes[size_of_val(&major)..];
let patch = BigEndian::read_u32(bytes);
Ok((major, minor, patch))
}
}
}

View file

@ -10,7 +10,7 @@ use roaring::RoaringBitmap;
use rstar::RTree;
use serde::{Deserialize, Serialize};
use crate::constants::RESERVED_VECTORS_FIELD_NAME;
use crate::constants::{self, RESERVED_VECTORS_FIELD_NAME};
use crate::documents::PrimaryKey;
use crate::error::{InternalError, UserError};
use crate::fields_ids_map::FieldsIdsMap;
@ -18,6 +18,7 @@ use crate::heed_codec::facet::{
FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec,
FieldIdCodec, OrderedF64Codec,
};
use crate::heed_codec::version::VersionCodec;
use crate::heed_codec::{BEU16StrCodec, FstSetCodec, StrBEU16Codec, StrRefCodec};
use crate::order_by_map::OrderByMap;
use crate::proximity::ProximityPrecision;
@ -33,6 +34,7 @@ pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5;
pub const DEFAULT_MIN_WORD_LEN_TWO_TYPOS: u8 = 9;
pub mod main_key {
pub const VERSION_KEY: &str = "version";
pub const CRITERIA_KEY: &str = "criteria";
pub const DISPLAYED_FIELDS_KEY: &str = "displayed-fields";
pub const DISTINCT_FIELD_KEY: &str = "distinct-field-key";
@ -223,12 +225,9 @@ impl Index {
let vector_arroy = env.create_database(&mut wtxn, Some(VECTOR_ARROY))?;
let documents = env.create_database(&mut wtxn, Some(DOCUMENTS))?;
wtxn.commit()?;
Index::set_creation_dates(&env, main, created_at, updated_at)?;
Ok(Index {
env,
let this = Index {
env: env.clone(),
main,
external_documents_ids,
word_docids,
@ -253,7 +252,22 @@ impl Index {
vector_arroy,
embedder_category_id,
documents,
})
};
if this.get_version(&wtxn)?.is_none() {
this.put_version(
&mut wtxn,
(
constants::VERSION_MAJOR.parse().unwrap(),
constants::VERSION_MINOR.parse().unwrap(),
constants::VERSION_PATCH.parse().unwrap(),
),
)?;
}
wtxn.commit()?;
Index::set_creation_dates(&this.env, this.main, created_at, updated_at)?;
Ok(this)
}
pub fn new<P: AsRef<Path>>(options: heed::EnvOpenOptions, path: P) -> Result<Index> {
@ -331,6 +345,26 @@ impl Index {
self.env.prepare_for_closing()
}
/* version */
/// Writes the version of the database.
pub(crate) fn put_version(
&self,
wtxn: &mut RwTxn<'_>,
(major, minor, patch): (u32, u32, u32),
) -> heed::Result<()> {
self.main.remap_types::<Str, VersionCodec>().put(
wtxn,
main_key::VERSION_KEY,
&(major, minor, patch),
)
}
/// Get the version of the database. `None` if it was never set.
pub(crate) fn get_version(&self, rtxn: &RoTxn<'_>) -> heed::Result<Option<(u32, u32, u32)>> {
self.main.remap_types::<Str, VersionCodec>().get(rtxn, main_key::VERSION_KEY)
}
/* documents ids */
/// Writes the documents ids that corresponds to the user-ids-documents-ids FST.

View file

@ -1,5 +1,6 @@
use std::any::TypeId;
use std::borrow::Cow;
use std::marker::PhantomData;
use std::sync::atomic::{AtomicU32, Ordering};
use std::sync::{Arc, RwLock};
@ -153,3 +154,41 @@ pub struct ProgressStepView {
pub finished: u32,
pub total: u32,
}
/// Used when the name can change but it's still the same step.
/// To avoid conflicts on the `TypeId`, create a unique type every time you use this step:
/// ```text
/// enum UpgradeVersion {}
///
/// progress.update_progress(VariableNameStep::<UpgradeVersion>::new(
/// "v1 to v2",
/// 0,
/// 10,
/// ));
/// ```
pub struct VariableNameStep<U: Send + Sync + 'static> {
name: String,
current: u32,
total: u32,
phantom: PhantomData<U>,
}
impl<U: Send + Sync + 'static> VariableNameStep<U> {
pub fn new(name: impl Into<String>, current: u32, total: u32) -> Self {
Self { name: name.into(), current, total, phantom: PhantomData }
}
}
impl<U: Send + Sync + 'static> Step for VariableNameStep<U> {
fn name(&self) -> Cow<'static, str> {
self.name.clone().into()
}
fn current(&self) -> u32 {
self.current
}
fn total(&self) -> u32 {
self.total
}
}

View file

@ -21,6 +21,7 @@ mod indexer_config;
pub mod new;
pub(crate) mod settings;
mod update_step;
pub mod upgrade;
mod word_prefix_docids;
mod words_prefix_integer_docids;
mod words_prefixes_fst;

View file

@ -0,0 +1,65 @@
use crate::constants::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
use crate::progress::{Progress, VariableNameStep};
use crate::{Index, Result, UserError};
pub fn upgrade(index: &Index, base_version: (u32, u32, u32), progress: Progress) -> Result<()> {
let wtxn = index.env.write_txn()?;
let from = index.get_version(&wtxn)?;
let upgrade_functions =
[(v1_12_to_v1_13 as fn(&Index, Progress) -> Result<()>, "Upgrading from v1.12 to v1.13")];
let current_major: u32 = VERSION_MAJOR.parse().unwrap();
let current_minor: u32 = VERSION_MINOR.parse().unwrap();
let current_patch: u32 = VERSION_PATCH.parse().unwrap();
let start = match from {
// If there was no version it means we're coming from the base version specified by the index-scheduler
None if base_version.0 == 1 && base_version.1 == 12 => 0,
Some((1, 12, _)) => 0,
// --- Error handling
None => {
return Err(UserError::TooOldForUpgrade(
base_version.0,
base_version.1,
base_version.2,
)
.into());
}
Some((major, minor, patch)) if major == 0 || (major == 1 && minor < 12) => {
return Err(UserError::TooOldForUpgrade(major, minor, patch).into());
}
Some((major, minor, patch)) if major > current_major => {
return Err(UserError::CannotDowngrade(major, minor, patch).into());
}
Some((major, minor, patch)) if major == current_major && minor > current_minor => {
return Err(UserError::CannotDowngrade(major, minor, patch).into());
}
Some((major, minor, patch))
if major == current_major && minor == current_minor && patch > current_patch =>
{
return Err(UserError::CannotDowngrade(major, minor, patch).into());
}
Some((major, minor, patch)) => {
return Err(UserError::CannotUpgradeToUnknownVersion(major, minor, patch).into())
}
};
enum UpgradeVersion {}
let upgrade_path = &upgrade_functions[start..];
for (i, (upgrade_function, upgrade_msg)) in upgrade_path.iter().enumerate() {
progress.update_progress(VariableNameStep::<UpgradeVersion>::new(
upgrade_msg.to_string(),
i as u32,
upgrade_path.len() as u32,
));
(upgrade_function)(index, progress.clone())?;
}
Ok(())
}
fn v1_12_to_v1_13(_index: &Index, _progress: Progress) -> Result<()> {
Ok(())
}