From ddd03e9b370f145787bca447b8791aeff5485c94 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 29 Oct 2024 02:46:14 +0100 Subject: [PATCH] implement the upgrade from v1.10 to v1.11 in meilitool --- Cargo.lock | 28 +++++++++++-- crates/meilitool/Cargo.toml | 2 + crates/milli/Cargo.toml | 2 +- meilitool/src/upgrade/mod.rs | 60 +++++++++++++++++++-------- meilitool/src/upgrade/v1_10.rs | 7 +++- meilitool/src/upgrade/v1_11.rs | 76 ++++++++++++++++++++++++++++++++++ 6 files changed, 150 insertions(+), 25 deletions(-) create mode 100644 meilitool/src/upgrade/v1_11.rs diff --git a/Cargo.lock b/Cargo.lock index 500f28454..43a93bb05 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -404,6 +404,25 @@ dependencies = [ "thiserror", ] +[[package]] +name = "arroy" +version = "0.5.0" +source = "git+https://github.com/meilisearch/arroy/?rev=3908c9e#3908c9edfba77ba18cc50bda41c88166ba5ebd37" +dependencies = [ + "bytemuck", + "byteorder", + "heed", + "log", + "memmap2", + "nohash", + "ordered-float", + "rand", + "rayon", + "roaring", + "tempfile", + "thiserror", +] + [[package]] name = "assert-json-diff" version = "2.0.2" @@ -707,9 +726,9 @@ checksum = "2c676a478f63e9fa2dd5368a42f28bba0d6c560b775f38583c8bbaa7fcd67c9c" [[package]] name = "bytemuck" -version = "1.16.1" +version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b236fc92302c97ed75b38da1f4917b5cdda4984745740f153a5d3059e48d725e" +checksum = "8334215b81e418a0a7bdb8ef0849474f40bb10c8b71f1c4ed315cff49f32494d" dependencies = [ "bytemuck_derive", ] @@ -2556,7 +2575,7 @@ name = "index-scheduler" version = "1.11.0" dependencies = [ "anyhow", - "arroy", + "arroy 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", "big_s", "bincode", "crossbeam", @@ -3517,6 +3536,7 @@ name = "meilitool" version = "1.11.0" dependencies = [ "anyhow", + "arroy 0.5.0 (git+https://github.com/meilisearch/arroy/?rev=3908c9e)", "clap", "dump", "file-store", @@ -3547,7 +3567,7 @@ dependencies = [ name = "milli" version = "1.11.0" dependencies = [ - "arroy", + "arroy 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", "big_s", "bimap", "bincode", diff --git a/crates/meilitool/Cargo.toml b/crates/meilitool/Cargo.toml index ce6c1ad5b..937a484e2 100644 --- a/crates/meilitool/Cargo.toml +++ b/crates/meilitool/Cargo.toml @@ -18,3 +18,5 @@ meilisearch-types = { path = "../meilisearch-types" } serde = { version = "1.0.209", features = ["derive"] } time = { version = "0.3.36", features = ["formatting"] } uuid = { version = "1.10.0", features = ["v4"], default-features = false } +arroy_v04_to_v05 = { package = "arroy", git = "https://github.com/meilisearch/arroy/", rev = "3908c9e" } + diff --git a/crates/milli/Cargo.toml b/crates/milli/Cargo.toml index df0e59496..7b43fbf33 100644 --- a/crates/milli/Cargo.toml +++ b/crates/milli/Cargo.toml @@ -15,7 +15,7 @@ license.workspace = true bimap = { version = "0.6.3", features = ["serde"] } bincode = "1.3.3" bstr = "1.9.1" -bytemuck = { version = "1.16.1", features = ["extern_crate_alloc"] } +bytemuck = { version = "1.18.0", features = ["extern_crate_alloc"] } byteorder = "1.5.0" charabia = { version = "0.9.1", default-features = false } concat-arrays = "0.1.2" diff --git a/meilitool/src/upgrade/mod.rs b/meilitool/src/upgrade/mod.rs index 053c61c14..9a1e4286f 100644 --- a/meilitool/src/upgrade/mod.rs +++ b/meilitool/src/upgrade/mod.rs @@ -1,13 +1,16 @@ mod v1_10; +mod v1_11; mod v1_9; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use anyhow::{bail, Context}; use meilisearch_types::versioning::create_version_file; use v1_10::v1_9_to_v1_10; +use crate::upgrade::v1_11::v1_10_to_v1_11; + pub struct OfflineUpgrade { pub db_path: PathBuf, pub current_version: (String, String, String), @@ -16,29 +19,50 @@ pub struct OfflineUpgrade { impl OfflineUpgrade { pub fn upgrade(self) -> anyhow::Result<()> { + let upgrade_list = [ + (v1_9_to_v1_10 as fn(&Path) -> Result<(), anyhow::Error>, "1", "10", "0"), + (v1_10_to_v1_11, "1", "11", "0"), + ]; + let (current_major, current_minor, current_patch) = &self.current_version; + + let start_at = match ( + current_major.as_str(), + current_minor.as_str(), + current_patch.as_str(), + ) { + ("1", "9", _) => 0, + ("1", "10", _) => 1, + _ => { + bail!("Unsupported current version {current_major}.{current_minor}.{current_patch}. Can only upgrade from v1.9") + } + }; + let (target_major, target_minor, target_patch) = &self.target_version; - println!("Upgrading from {current_major}.{current_minor}.{current_patch} to {target_major}.{target_minor}.{target_patch}"); + let ends_at = match (target_major.as_str(), target_minor.as_str(), target_patch.as_str()) { + ("v1", "10", _) => 0, + ("v1", "11", _) => 1, + _ => { + bail!("Unsupported target version {target_major}.{target_minor}.{target_patch}. Can only upgrade to v1.11") + } + }; - match ( - (current_major.as_str(), current_minor.as_str(), current_patch.as_str()), - (target_major.as_str(), target_minor.as_str(), target_patch.as_str()), - ) { - (("1", "9", _), ("1", "10", _)) => v1_9_to_v1_10(&self.db_path)?, - ((major, minor, _), _) if major != "1" && minor != "9" => - bail!("Unsupported current version {current_major}.{current_minor}.{current_patch}. Can only upgrade from v1.9"), - (_, (major, minor, _)) if major != "1" && minor != "10" => - bail!("Unsupported target version {target_major}.{target_minor}.{target_patch}. Can only upgrade to v1.10"), - _ => - bail!("Unsupported upgrade from {current_major}.{current_minor}.{current_patch} to {target_major}.{target_minor}.{target_patch}. Can only upgrade from v1.9 to v1.10"), + println!("Starting the upgrade from {current_major}.{current_minor}.{current_patch} to {target_major}.{target_minor}.{target_patch}"); + + #[allow(clippy::needless_range_loop)] + for index in start_at..=ends_at { + let (func, major, minor, patch) = upgrade_list[index]; + (func)(&self.db_path)?; + println!("Done"); + // We're writing the version file just in case an issue arise _while_ upgrading. + // We don't want the DB to fail in an unknown state. + println!("Writing VERSION file"); + + create_version_file(&self.db_path, major, minor, patch) + .context("while writing VERSION file after the upgrade")?; } - println!("Writing VERSION file"); - - create_version_file(&self.db_path, target_major, target_minor, target_patch) - .context("while writing VERSION file after the upgrade")?; - println!("Success"); Ok(()) diff --git a/meilitool/src/upgrade/v1_10.rs b/meilitool/src/upgrade/v1_10.rs index 96af99c39..99fe104e3 100644 --- a/meilitool/src/upgrade/v1_10.rs +++ b/meilitool/src/upgrade/v1_10.rs @@ -79,7 +79,8 @@ fn update_index_stats( let stats: Option = index_stats .remap_data_type::>() .get(sched_wtxn, &index_uuid) - .with_context(ctx)?; + .with_context(ctx) + .with_context(|| "While reading value")?; if let Some(stats) = stats { let stats: self::IndexStats = stats.into(); @@ -87,7 +88,8 @@ fn update_index_stats( index_stats .remap_data_type::>() .put(sched_wtxn, &index_uuid, &stats) - .with_context(ctx)?; + .with_context(ctx) + .with_context(|| "While writing value")?; } Ok(()) @@ -155,6 +157,7 @@ fn date_round_trip( } pub fn v1_9_to_v1_10(db_path: &Path) -> anyhow::Result<()> { + println!("Upgrading from v1.9.0 to v1.10.0"); // 2 changes here // 1. date format. needs to be done before opening the Index diff --git a/meilitool/src/upgrade/v1_11.rs b/meilitool/src/upgrade/v1_11.rs new file mode 100644 index 000000000..26c4234f6 --- /dev/null +++ b/meilitool/src/upgrade/v1_11.rs @@ -0,0 +1,76 @@ +//! The breaking changes that happened between the v1.10 and the v1.11 are: +//! - Arroy went from the v0.4.0 to the v0.5.0, see this release note to get the whole context: https://github.com/meilisearch/arroy/releases/tag/v0.5.0 +//! - The `angular` distance has been renamed to `cosine` => We only need to update the string in the metadata. +//! - Reorganize the `NodeId` to make the appending of vectors work => We'll have to update the keys of almost all items in the DB. +//! - Store the list of updated IDs directly in LMDB instead of a roaring bitmap => This shouldn't be an issue since we are never supposed to commit this roaring bitmap, but it's not forbidden by arroy so ensuring it works is probably better than anything. + +use std::path::Path; + +use anyhow::Context; +use meilisearch_types::{ + heed::{types::Str, Database, EnvOpenOptions}, + milli::index::db_name, +}; + +use crate::{try_opening_database, try_opening_poly_database, uuid_codec::UuidCodec}; + +pub fn v1_10_to_v1_11(db_path: &Path) -> anyhow::Result<()> { + println!("Upgrading from v1.10.0 to v1.11.0"); + + let index_scheduler_path = db_path.join("tasks"); + let env = unsafe { EnvOpenOptions::new().max_dbs(100).open(&index_scheduler_path) } + .with_context(|| format!("While trying to open {:?}", index_scheduler_path.display()))?; + + let sched_rtxn = env.read_txn()?; + + let index_mapping: Database = + try_opening_database(&env, &sched_rtxn, "index-mapping")?; + + let index_count = + index_mapping.len(&sched_rtxn).context("while reading the number of indexes")?; + + let indexes: Vec<_> = index_mapping + .iter(&sched_rtxn)? + .map(|res| res.map(|(uid, uuid)| (uid.to_owned(), uuid))) + .collect(); + + // check that update can take place + for (index_index, result) in indexes.into_iter().enumerate() { + let (uid, uuid) = result?; + let index_path = db_path.join("indexes").join(uuid.to_string()); + + println!( + "[{}/{index_count}]Checking that update can take place for `{uid}` at `{}`", + index_index + 1, + index_path.display() + ); + + let index_env = unsafe { + EnvOpenOptions::new().max_dbs(25).open(&index_path).with_context(|| { + format!("while opening index {uid} at '{}'", index_path.display()) + })? + }; + + let index_rtxn = index_env.read_txn().with_context(|| { + format!( + "while obtaining a read transaction for index {uid} at {}", + index_path.display() + ) + })?; + let mut index_wtxn = index_env.write_txn().with_context(|| { + format!( + "while obtaining a write transaction for index {uid} at {}", + index_path.display() + ) + })?; + + let database = try_opening_poly_database(&index_env, &index_rtxn, db_name::VECTOR_ARROY) + .with_context(|| format!("while updating date format for index `{uid}`"))?; + + arroy_v04_to_v05::ugrade_from_prev_version(&index_rtxn, &mut index_wtxn, database)?; + + index_wtxn.commit()?; + } + + Ok(()) +}