mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-03 03:47:02 +02:00
Add the new tasks with most of the job done
This commit is contained in:
parent
b15de68831
commit
d3654906bf
38 changed files with 572 additions and 204 deletions
|
@ -1,2 +1,6 @@
|
|||
pub static VERSION_MAJOR: &str = env!("CARGO_PKG_VERSION_MAJOR");
|
||||
pub static VERSION_MINOR: &str = env!("CARGO_PKG_VERSION_MINOR");
|
||||
pub static VERSION_PATCH: &str = env!("CARGO_PKG_VERSION_PATCH");
|
||||
|
||||
pub const RESERVED_VECTORS_FIELD_NAME: &str = "_vectors";
|
||||
pub const RESERVED_GEO_FIELD_NAME: &str = "_geo";
|
||||
|
|
|
@ -10,7 +10,7 @@ use rhai::EvalAltResult;
|
|||
use serde_json::Value;
|
||||
use thiserror::Error;
|
||||
|
||||
use crate::constants::RESERVED_GEO_FIELD_NAME;
|
||||
use crate::constants::{RESERVED_GEO_FIELD_NAME, VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
|
||||
use crate::documents::{self, DocumentsBatchCursorError};
|
||||
use crate::thread_pool_no_abort::PanicCatched;
|
||||
use crate::{CriterionError, DocumentId, FieldId, Object, SortError};
|
||||
|
@ -288,6 +288,12 @@ and can not be more than 511 bytes.", .document_id.to_string()
|
|||
DocumentEditionCompilationError(rhai::ParseError),
|
||||
#[error("{0}")]
|
||||
DocumentEmbeddingError(String),
|
||||
#[error("Upgrade could not be processed because v{0}.{1}.{2} of the database is too old. Please re-open the v{0}.{1}.{2} and use a dump to upgrade your version. The oldest version meilisearch can upgrade from is v1.12.0.")]
|
||||
TooOldForUpgrade(u32, u32, u32),
|
||||
#[error("Upgrade could not be processed because the database version (v{0}.{1}.{2}) is newer than the targeted version (v{}.{}.{})", VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH)]
|
||||
CannotDowngrade(u32, u32, u32),
|
||||
#[error("Cannot upgrade to unknown version v{0}.{1}.{2}.")]
|
||||
CannotUpgradeToUnknownVersion(u32, u32, u32),
|
||||
}
|
||||
|
||||
impl From<crate::vector::Error> for Error {
|
||||
|
|
|
@ -10,6 +10,7 @@ mod roaring_bitmap_length;
|
|||
mod str_beu32_codec;
|
||||
mod str_ref;
|
||||
mod str_str_u8_codec;
|
||||
pub mod version;
|
||||
|
||||
pub use byte_slice_ref::BytesRefCodec;
|
||||
use heed::BoxedError;
|
||||
|
|
44
crates/milli/src/heed_codec/version.rs
Normal file
44
crates/milli/src/heed_codec/version.rs
Normal file
|
@ -0,0 +1,44 @@
|
|||
use std::mem::size_of;
|
||||
use std::{borrow::Cow, mem::size_of_val};
|
||||
|
||||
use byteorder::{BigEndian, ByteOrder};
|
||||
use heed::{BoxedError, BytesDecode, BytesEncode};
|
||||
|
||||
const VERSION_SIZE: usize = std::mem::size_of::<u32>() * 3;
|
||||
|
||||
#[derive(thiserror::Error, Debug)]
|
||||
#[error(
|
||||
"Could not decode the version: Expected {} bytes but instead received {0} bytes",
|
||||
VERSION_SIZE
|
||||
)]
|
||||
pub struct DecodeVersionError(usize);
|
||||
|
||||
pub struct VersionCodec;
|
||||
impl<'a> BytesEncode<'a> for VersionCodec {
|
||||
type EItem = (u32, u32, u32);
|
||||
|
||||
fn bytes_encode(item: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
|
||||
let mut ret = Vec::with_capacity(size_of::<u32>() * 3);
|
||||
ret.extend(&item.0.to_be_bytes());
|
||||
ret.extend(&item.1.to_be_bytes());
|
||||
ret.extend(&item.2.to_be_bytes());
|
||||
Ok(Cow::Owned(ret))
|
||||
}
|
||||
}
|
||||
impl<'a> BytesDecode<'a> for VersionCodec {
|
||||
type DItem = (u32, u32, u32);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
|
||||
if bytes.len() != VERSION_SIZE {
|
||||
Err(Box::new(DecodeVersionError(bytes.len())))
|
||||
} else {
|
||||
let major = BigEndian::read_u32(bytes);
|
||||
let bytes = &bytes[size_of_val(&major)..];
|
||||
let minor = BigEndian::read_u32(bytes);
|
||||
let bytes = &bytes[size_of_val(&major)..];
|
||||
let patch = BigEndian::read_u32(bytes);
|
||||
|
||||
Ok((major, minor, patch))
|
||||
}
|
||||
}
|
||||
}
|
|
@ -10,7 +10,7 @@ use roaring::RoaringBitmap;
|
|||
use rstar::RTree;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use crate::constants::RESERVED_VECTORS_FIELD_NAME;
|
||||
use crate::constants::{self, RESERVED_VECTORS_FIELD_NAME};
|
||||
use crate::documents::PrimaryKey;
|
||||
use crate::error::{InternalError, UserError};
|
||||
use crate::fields_ids_map::FieldsIdsMap;
|
||||
|
@ -18,6 +18,7 @@ use crate::heed_codec::facet::{
|
|||
FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec,
|
||||
FieldIdCodec, OrderedF64Codec,
|
||||
};
|
||||
use crate::heed_codec::version::VersionCodec;
|
||||
use crate::heed_codec::{BEU16StrCodec, FstSetCodec, StrBEU16Codec, StrRefCodec};
|
||||
use crate::order_by_map::OrderByMap;
|
||||
use crate::proximity::ProximityPrecision;
|
||||
|
@ -33,6 +34,7 @@ pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5;
|
|||
pub const DEFAULT_MIN_WORD_LEN_TWO_TYPOS: u8 = 9;
|
||||
|
||||
pub mod main_key {
|
||||
pub const VERSION_KEY: &str = "version";
|
||||
pub const CRITERIA_KEY: &str = "criteria";
|
||||
pub const DISPLAYED_FIELDS_KEY: &str = "displayed-fields";
|
||||
pub const DISTINCT_FIELD_KEY: &str = "distinct-field-key";
|
||||
|
@ -223,12 +225,9 @@ impl Index {
|
|||
let vector_arroy = env.create_database(&mut wtxn, Some(VECTOR_ARROY))?;
|
||||
|
||||
let documents = env.create_database(&mut wtxn, Some(DOCUMENTS))?;
|
||||
wtxn.commit()?;
|
||||
|
||||
Index::set_creation_dates(&env, main, created_at, updated_at)?;
|
||||
|
||||
Ok(Index {
|
||||
env,
|
||||
let this = Index {
|
||||
env: env.clone(),
|
||||
main,
|
||||
external_documents_ids,
|
||||
word_docids,
|
||||
|
@ -253,7 +252,22 @@ impl Index {
|
|||
vector_arroy,
|
||||
embedder_category_id,
|
||||
documents,
|
||||
})
|
||||
};
|
||||
if this.get_version(&wtxn)?.is_none() {
|
||||
this.put_version(
|
||||
&mut wtxn,
|
||||
(
|
||||
constants::VERSION_MAJOR.parse().unwrap(),
|
||||
constants::VERSION_MINOR.parse().unwrap(),
|
||||
constants::VERSION_PATCH.parse().unwrap(),
|
||||
),
|
||||
)?;
|
||||
}
|
||||
wtxn.commit()?;
|
||||
|
||||
Index::set_creation_dates(&this.env, this.main, created_at, updated_at)?;
|
||||
|
||||
Ok(this)
|
||||
}
|
||||
|
||||
pub fn new<P: AsRef<Path>>(options: heed::EnvOpenOptions, path: P) -> Result<Index> {
|
||||
|
@ -331,6 +345,26 @@ impl Index {
|
|||
self.env.prepare_for_closing()
|
||||
}
|
||||
|
||||
/* version */
|
||||
|
||||
/// Writes the version of the database.
|
||||
pub(crate) fn put_version(
|
||||
&self,
|
||||
wtxn: &mut RwTxn<'_>,
|
||||
(major, minor, patch): (u32, u32, u32),
|
||||
) -> heed::Result<()> {
|
||||
self.main.remap_types::<Str, VersionCodec>().put(
|
||||
wtxn,
|
||||
main_key::VERSION_KEY,
|
||||
&(major, minor, patch),
|
||||
)
|
||||
}
|
||||
|
||||
/// Get the version of the database. `None` if it was never set.
|
||||
pub(crate) fn get_version(&self, rtxn: &RoTxn<'_>) -> heed::Result<Option<(u32, u32, u32)>> {
|
||||
self.main.remap_types::<Str, VersionCodec>().get(rtxn, main_key::VERSION_KEY)
|
||||
}
|
||||
|
||||
/* documents ids */
|
||||
|
||||
/// Writes the documents ids that corresponds to the user-ids-documents-ids FST.
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
use std::any::TypeId;
|
||||
use std::borrow::Cow;
|
||||
use std::marker::PhantomData;
|
||||
use std::sync::atomic::{AtomicU32, Ordering};
|
||||
use std::sync::{Arc, RwLock};
|
||||
|
||||
|
@ -153,3 +154,41 @@ pub struct ProgressStepView {
|
|||
pub finished: u32,
|
||||
pub total: u32,
|
||||
}
|
||||
|
||||
/// Used when the name can change but it's still the same step.
|
||||
/// To avoid conflicts on the `TypeId`, create a unique type every time you use this step:
|
||||
/// ```text
|
||||
/// enum UpgradeVersion {}
|
||||
///
|
||||
/// progress.update_progress(VariableNameStep::<UpgradeVersion>::new(
|
||||
/// "v1 to v2",
|
||||
/// 0,
|
||||
/// 10,
|
||||
/// ));
|
||||
/// ```
|
||||
pub struct VariableNameStep<U: Send + Sync + 'static> {
|
||||
name: String,
|
||||
current: u32,
|
||||
total: u32,
|
||||
phantom: PhantomData<U>,
|
||||
}
|
||||
|
||||
impl<U: Send + Sync + 'static> VariableNameStep<U> {
|
||||
pub fn new(name: impl Into<String>, current: u32, total: u32) -> Self {
|
||||
Self { name: name.into(), current, total, phantom: PhantomData }
|
||||
}
|
||||
}
|
||||
|
||||
impl<U: Send + Sync + 'static> Step for VariableNameStep<U> {
|
||||
fn name(&self) -> Cow<'static, str> {
|
||||
self.name.clone().into()
|
||||
}
|
||||
|
||||
fn current(&self) -> u32 {
|
||||
self.current
|
||||
}
|
||||
|
||||
fn total(&self) -> u32 {
|
||||
self.total
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,6 +21,7 @@ mod indexer_config;
|
|||
pub mod new;
|
||||
pub(crate) mod settings;
|
||||
mod update_step;
|
||||
pub mod upgrade;
|
||||
mod word_prefix_docids;
|
||||
mod words_prefix_integer_docids;
|
||||
mod words_prefixes_fst;
|
||||
|
|
65
crates/milli/src/update/upgrade/mod.rs
Normal file
65
crates/milli/src/update/upgrade/mod.rs
Normal file
|
@ -0,0 +1,65 @@
|
|||
use crate::constants::{VERSION_MAJOR, VERSION_MINOR, VERSION_PATCH};
|
||||
use crate::progress::{Progress, VariableNameStep};
|
||||
use crate::{Index, Result, UserError};
|
||||
|
||||
pub fn upgrade(index: &Index, base_version: (u32, u32, u32), progress: Progress) -> Result<()> {
|
||||
let wtxn = index.env.write_txn()?;
|
||||
let from = index.get_version(&wtxn)?;
|
||||
let upgrade_functions =
|
||||
[(v1_12_to_v1_13 as fn(&Index, Progress) -> Result<()>, "Upgrading from v1.12 to v1.13")];
|
||||
|
||||
let current_major: u32 = VERSION_MAJOR.parse().unwrap();
|
||||
let current_minor: u32 = VERSION_MINOR.parse().unwrap();
|
||||
let current_patch: u32 = VERSION_PATCH.parse().unwrap();
|
||||
|
||||
let start = match from {
|
||||
// If there was no version it means we're coming from the base version specified by the index-scheduler
|
||||
None if base_version.0 == 1 && base_version.1 == 12 => 0,
|
||||
Some((1, 12, _)) => 0,
|
||||
|
||||
// --- Error handling
|
||||
None => {
|
||||
return Err(UserError::TooOldForUpgrade(
|
||||
base_version.0,
|
||||
base_version.1,
|
||||
base_version.2,
|
||||
)
|
||||
.into());
|
||||
}
|
||||
Some((major, minor, patch)) if major == 0 || (major == 1 && minor < 12) => {
|
||||
return Err(UserError::TooOldForUpgrade(major, minor, patch).into());
|
||||
}
|
||||
Some((major, minor, patch)) if major > current_major => {
|
||||
return Err(UserError::CannotDowngrade(major, minor, patch).into());
|
||||
}
|
||||
Some((major, minor, patch)) if major == current_major && minor > current_minor => {
|
||||
return Err(UserError::CannotDowngrade(major, minor, patch).into());
|
||||
}
|
||||
Some((major, minor, patch))
|
||||
if major == current_major && minor == current_minor && patch > current_patch =>
|
||||
{
|
||||
return Err(UserError::CannotDowngrade(major, minor, patch).into());
|
||||
}
|
||||
Some((major, minor, patch)) => {
|
||||
return Err(UserError::CannotUpgradeToUnknownVersion(major, minor, patch).into())
|
||||
}
|
||||
};
|
||||
|
||||
enum UpgradeVersion {}
|
||||
let upgrade_path = &upgrade_functions[start..];
|
||||
|
||||
for (i, (upgrade_function, upgrade_msg)) in upgrade_path.iter().enumerate() {
|
||||
progress.update_progress(VariableNameStep::<UpgradeVersion>::new(
|
||||
upgrade_msg.to_string(),
|
||||
i as u32,
|
||||
upgrade_path.len() as u32,
|
||||
));
|
||||
(upgrade_function)(index, progress.clone())?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn v1_12_to_v1_13(_index: &Index, _progress: Progress) -> Result<()> {
|
||||
Ok(())
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue