From 23e35fa5262188f9600e16e8009a8bbdd7427e3f Mon Sep 17 00:00:00 2001 From: Marin Postma Date: Tue, 7 Dec 2021 10:36:27 +0100 Subject: [PATCH] feat(dumps): drop dump V1 support --- .../dump_actor/loaders/mod.rs | 1 - .../index_controller/dump_actor/loaders/v1.rs | 223 +----------------- .../src/index_controller/dump_actor/mod.rs | 11 +- 3 files changed, 10 insertions(+), 225 deletions(-) diff --git a/meilisearch-lib/src/index_controller/dump_actor/loaders/mod.rs b/meilisearch-lib/src/index_controller/dump_actor/loaders/mod.rs index 08fbc33cf..ecc305652 100644 --- a/meilisearch-lib/src/index_controller/dump_actor/loaders/mod.rs +++ b/meilisearch-lib/src/index_controller/dump_actor/loaders/mod.rs @@ -1,4 +1,3 @@ -pub mod v1; pub mod v2; pub mod v3; pub mod v4; diff --git a/meilisearch-lib/src/index_controller/dump_actor/loaders/v1.rs b/meilisearch-lib/src/index_controller/dump_actor/loaders/v1.rs index 647f5b959..a07475b56 100644 --- a/meilisearch-lib/src/index_controller/dump_actor/loaders/v1.rs +++ b/meilisearch-lib/src/index_controller/dump_actor/loaders/v1.rs @@ -1,22 +1,8 @@ -use std::collections::{BTreeMap, BTreeSet}; -use std::fs::{create_dir_all, File}; -use std::io::{BufReader, Seek, SeekFrom}; -use std::marker::PhantomData; use std::path::Path; -use heed::EnvOpenOptions; -use log::{error, warn}; -use milli::documents::DocumentBatchReader; -use milli::update::Setting; -use serde::{Deserialize, Deserializer, Serialize}; -use uuid::Uuid; +use serde::{Deserialize, Serialize}; -use crate::document_formats::read_ndjson; -use crate::index::apply_settings_to_builder; -use crate::index::update_handler::UpdateHandler; -use crate::index_controller::dump_actor::compat; -use crate::index_controller::{self, IndexMetadata}; -use crate::{index::Unchecked, options::IndexerOpts}; +use crate::index_controller::IndexMetadata; #[derive(Serialize, Deserialize, Debug)] #[serde(rename_all = "camelCase")] @@ -34,208 +20,5 @@ impl MetadataV1 { size: usize, indexer_options: &IndexerOpts, ) -> anyhow::Result<()> { - unreachable!("dump v1 not implemented"); - // log::info!("Patching dump V2 to dump V3..."); - // let uuid_store = todo!(); // HeedMetaStore::new(&dst)?; - // for index in self.indexes { - // let uuid = Uuid::new_v4(); - // // Since we don't know when the index was created, we assume it's from 0 - // let meta = IndexMeta { - // uuid, - // creation_task_id: 0, - // }; - // // uuid_store.insert(index.uid.clone(), meta)?; - // let src = src.as_ref().join(index.uid); - // load_index( - // &src, - // &dst, - // uuid, - // index.meta.primary_key.as_deref(), - // size, - // indexer_options, - // )?; - // } - - // Ok(()) - } -} - -pub fn deserialize_some<'de, T, D>(deserializer: D) -> std::result::Result, D::Error> -where - T: Deserialize<'de>, - D: Deserializer<'de>, -{ - Deserialize::deserialize(deserializer).map(Some) -} - -// These are the settings used in legacy meilisearch (>>, - #[serde(default, deserialize_with = "deserialize_some")] - pub distinct_attribute: Option>, - #[serde(default, deserialize_with = "deserialize_some")] - pub searchable_attributes: Option>>, - #[serde(default, deserialize_with = "deserialize_some")] - pub displayed_attributes: Option>>, - #[serde(default, deserialize_with = "deserialize_some")] - pub stop_words: Option>>, - #[serde(default, deserialize_with = "deserialize_some")] - pub synonyms: Option>>>, - #[serde(default, deserialize_with = "deserialize_some")] - pub attributes_for_faceting: Option>>, -} - -#[allow(dead_code)] -fn load_index( - src: impl AsRef, - dst: impl AsRef, - uuid: Uuid, - primary_key: Option<&str>, - size: usize, - indexer_options: &IndexerOpts, -) -> anyhow::Result<()> { - let index_path = dst.as_ref().join(&format!("indexes/{}", uuid)); - - create_dir_all(&index_path)?; - let mut options = EnvOpenOptions::new(); - options.map_size(size); - let index = milli::Index::new(options, index_path)?; - - let update_handler = UpdateHandler::new(indexer_options)?; - - let mut txn = index.write_txn()?; - // extract `settings.json` file and import content - let settings = import_settings(&src)?; - let settings: index_controller::Settings = settings.into(); - - let handler = UpdateHandler::new(indexer_options)?; - - let mut builder = handler.update_builder().settings(&mut txn, &index); - - if let Some(primary_key) = primary_key { - builder.set_primary_key(primary_key.to_string()); - } - - apply_settings_to_builder(&settings.check(), &mut builder); - - builder.execute(|_| ())?; - - let reader = BufReader::new(File::open(&src.as_ref().join("documents.jsonl"))?); - - let mut tmp_doc_file = tempfile::tempfile()?; - - read_ndjson(reader, &mut tmp_doc_file)?; - - tmp_doc_file.seek(SeekFrom::Start(0))?; - - let documents_reader = DocumentBatchReader::from_reader(tmp_doc_file)?; - - //If the document file is empty, we don't perform the document addition, to prevent - //a primary key error to be thrown. - if !documents_reader.is_empty() { - let builder = update_handler - .update_builder() - .index_documents(&mut txn, &index); - builder.execute(documents_reader, |_| ())?; - } - - txn.commit()?; - - // Finaly, we extract the original milli::Index and close it - index.prepare_for_closing().wait(); - - // Updates are ignored in dumps V1. - - Ok(()) -} - -/// we need to **always** be able to convert the old settings to the settings currently being used -impl From for index_controller::Settings { - fn from(settings: Settings) -> Self { - Self { - distinct_attribute: match settings.distinct_attribute { - Some(Some(attr)) => Setting::Set(attr), - Some(None) => Setting::Reset, - None => Setting::NotSet - }, - // we need to convert the old `Vec` into a `BTreeSet` - displayed_attributes: match settings.displayed_attributes { - Some(Some(attrs)) => Setting::Set(attrs.into_iter().collect()), - Some(None) => Setting::Reset, - None => Setting::NotSet - }, - searchable_attributes: match settings.searchable_attributes { - Some(Some(attrs)) => Setting::Set(attrs), - Some(None) => Setting::Reset, - None => Setting::NotSet - }, - filterable_attributes: match settings.attributes_for_faceting { - Some(Some(attrs)) => Setting::Set(attrs.into_iter().collect()), - Some(None) => Setting::Reset, - None => Setting::NotSet - }, - sortable_attributes: Setting::NotSet, - ranking_rules: match settings.ranking_rules { - Some(Some(ranking_rules)) => Setting::Set(ranking_rules.into_iter().filter_map(|criterion| { - match criterion.as_str() { - "words" | "typo" | "proximity" | "attribute" | "exactness" => Some(criterion), - s if s.starts_with("asc") => compat::asc_ranking_rule(s).map(|f| format!("{}:asc", f)), - s if s.starts_with("desc") => compat::desc_ranking_rule(s).map(|f| format!("{}:desc", f)), - "wordsPosition" => { - warn!("The criteria `attribute` and `wordsPosition` have been merged \ - into a single criterion `attribute` so `wordsPositon` will be \ - ignored"); - None - } - s => { - error!("Unknown criterion found in the dump: `{}`, it will be ignored", s); - None - } - } - }).collect()), - Some(None) => Setting::Reset, - None => Setting::NotSet - }, - // we need to convert the old `Vec` into a `BTreeSet` - stop_words: match settings.stop_words { - Some(Some(stop_words)) => Setting::Set(stop_words.into_iter().collect()), - Some(None) => Setting::Reset, - None => Setting::NotSet - }, - // we need to convert the old `Vec` into a `BTreeMap` - synonyms: match settings.synonyms { - Some(Some(synonyms)) => Setting::Set(synonyms.into_iter().collect()), - Some(None) => Setting::Reset, - None => Setting::NotSet - }, - _kind: PhantomData, - } - } -} - -/// Extract Settings from `settings.json` file present at provided `dir_path` -fn import_settings(dir_path: impl AsRef) -> anyhow::Result { - let path = dir_path.as_ref().join("settings.json"); - let file = File::open(path)?; - let reader = std::io::BufReader::new(file); - let metadata = serde_json::from_reader(reader)?; - - Ok(metadata) -} - -#[cfg(test)] -mod test { - use super::*; - - #[test] - fn settings_format_regression() { - let settings = Settings::default(); - assert_eq!( - r##"{"rankingRules":null,"distinctAttribute":null,"searchableAttributes":null,"displayedAttributes":null,"stopWords":null,"synonyms":null,"attributesForFaceting":null}"##, - serde_json::to_string(&settings).unwrap() - ); - } + anyhow::bail!("The version 1 of the dumps is not supported anymore. You can re-export your dump from a version between 0.21 and 0.24, or start fresh from a version 0.25 onwards.") } diff --git a/meilisearch-lib/src/index_controller/dump_actor/mod.rs b/meilisearch-lib/src/index_controller/dump_actor/mod.rs index 656bd512d..a3b47abe2 100644 --- a/meilisearch-lib/src/index_controller/dump_actor/mod.rs +++ b/meilisearch-lib/src/index_controller/dump_actor/mod.rs @@ -5,8 +5,6 @@ use chrono::{DateTime, Utc}; use log::{info, trace, warn}; use serde::{Deserialize, Serialize}; -use loaders::v1::MetadataV1; - pub use actor::DumpActor; pub use handle_impl::*; pub use message::DumpMsg; @@ -64,6 +62,12 @@ pub trait DumpActorHandle { async fn dump_info(&self, uid: String) -> Result; } +#[derive(Serialize, Deserialize, Debug)] +#[serde(rename_all = "camelCase")] +pub struct MetadataV1 { + pub db_version: String, +} + #[derive(Debug, Serialize, Deserialize)] #[serde(tag = "dumpVersion")] pub enum MetadataVersion { @@ -194,8 +198,7 @@ pub fn load_dump( match meta { MetadataVersion::V1(_meta) => { - anyhow::bail!("This version (v1) of the dump is too old to be imported.") - // meta.load_dump(&tmp_src_path, tmp_dst.path(), index_db_size, indexer _opts)? + anyhow::bail!("The version 1 of the dumps is not supported anymore. You can re-export your dump from a version between 0.21 and 0.24, or start fresh from a version 0.25 onwards.") } MetadataVersion::V2(meta) => v2::load_dump( meta,