use std::collections::{BTreeMap, BTreeSet}; use std::fs::{create_dir_all, File}; use std::io::BufRead; use std::marker::PhantomData; use std::path::Path; use std::sync::Arc; use heed::EnvOpenOptions; use log::{error, info, warn}; use milli::update::{IndexDocumentsMethod, Setting, UpdateFormat}; use serde::{Deserialize, Deserializer, Serialize}; use uuid::Uuid; use crate::index_controller::{self, uuid_resolver::HeedUuidStore, IndexMetadata}; use crate::{ index::{update_handler::UpdateHandler, Index, Unchecked}, option::IndexerOpts, }; #[derive(Serialize, Deserialize, Debug)] #[serde(rename_all = "camelCase")] pub struct MetadataV1 { db_version: String, indexes: Vec, } impl MetadataV1 { pub fn load_dump( self, src: impl AsRef, dst: impl AsRef, size: usize, indexer_options: &IndexerOpts, ) -> anyhow::Result<()> { info!( "Loading dump, dump database version: {}, dump version: V1", self.db_version ); let uuid_store = HeedUuidStore::new(&dst)?; for index in self.indexes { let uuid = Uuid::new_v4(); uuid_store.insert(index.uid.clone(), uuid)?; let src = src.as_ref().join(index.uid); load_index( &src, &dst, uuid, index.meta.primary_key.as_deref(), size, indexer_options, )?; } Ok(()) } } pub fn deserialize_some<'de, T, D>(deserializer: D) -> std::result::Result, D::Error> where T: Deserialize<'de>, D: Deserializer<'de>, { Deserialize::deserialize(deserializer).map(Some) } // These are the settings used in legacy meilisearch (>>, #[serde(default, deserialize_with = "deserialize_some")] pub distinct_attribute: Option>, #[serde(default, deserialize_with = "deserialize_some")] pub searchable_attributes: Option>>, #[serde(default, deserialize_with = "deserialize_some")] pub displayed_attributes: Option>>, #[serde(default, deserialize_with = "deserialize_some")] pub stop_words: Option>>, #[serde(default, deserialize_with = "deserialize_some")] pub synonyms: Option>>>, #[serde(default, deserialize_with = "deserialize_some")] pub attributes_for_faceting: Option>>, } fn load_index( src: impl AsRef, dst: impl AsRef, uuid: Uuid, primary_key: Option<&str>, size: usize, indexer_options: &IndexerOpts, ) -> anyhow::Result<()> { let index_path = dst.as_ref().join(&format!("indexes/index-{}", uuid)); create_dir_all(&index_path)?; let mut options = EnvOpenOptions::new(); options.map_size(size); let index = milli::Index::new(options, index_path)?; let index = Index(Arc::new(index)); // extract `settings.json` file and import content let settings = import_settings(&src)?; let settings: index_controller::Settings = settings.into(); let mut txn = index.write_txn()?; let handler = UpdateHandler::new(indexer_options)?; index.update_settings_txn(&mut txn, &settings.check(), handler.update_builder(0))?; let file = File::open(&src.as_ref().join("documents.jsonl"))?; let mut reader = std::io::BufReader::new(file); reader.fill_buf()?; if !reader.buffer().is_empty() { index.update_documents_txn( &mut txn, UpdateFormat::JsonStream, IndexDocumentsMethod::ReplaceDocuments, Some(reader), handler.update_builder(0), primary_key, )?; } txn.commit()?; // Finaly, we extract the original milli::Index and close it Arc::try_unwrap(index.0) .map_err(|_e| "Couldn't close the index properly") .unwrap() .prepare_for_closing() .wait(); // Updates are ignored in dumps V1. Ok(()) } /// we need to **always** be able to convert the old settings to the settings currently being used impl From for index_controller::Settings { fn from(settings: Settings) -> Self { Self { distinct_attribute: match settings.distinct_attribute { Some(Some(attr)) => Setting::Set(attr), Some(None) => Setting::Reset, None => Setting::NotSet }, // we need to convert the old `Vec` into a `BTreeSet` displayed_attributes: match settings.displayed_attributes { Some(Some(attrs)) => Setting::Set(attrs.into_iter().collect()), Some(None) => Setting::Reset, None => Setting::NotSet }, searchable_attributes: match settings.searchable_attributes { Some(Some(attrs)) => Setting::Set(attrs), Some(None) => Setting::Reset, None => Setting::NotSet }, // we previously had a `Vec` but now we have a `HashMap` // representing the name of the faceted field + the type of the field. Since the type // was not known in the V1 of the dump we are just going to assume everything is a // String filterable_attributes: match settings.attributes_for_faceting { Some(Some(attrs)) => Setting::Set(attrs.into_iter().collect()), Some(None) => Setting::Reset, None => Setting::NotSet }, // we need to convert the old `Vec` into a `BTreeSet` ranking_rules: match settings.ranking_rules { Some(Some(ranking_rules)) => Setting::Set(ranking_rules.into_iter().filter(|criterion| { match criterion.as_str() { "words" | "typo" | "proximity" | "attribute" | "exactness" => true, s if s.starts_with("asc") || s.starts_with("desc") => true, "wordsPosition" => { warn!("The criteria `attribute` and `wordsPosition` have been merged into a single criterion `attribute` so `wordsPositon` will be ignored"); false } s => { error!("Unknown criterion found in the dump: `{}`, it will be ignored", s); false } } }).collect()), Some(None) => Setting::Reset, None => Setting::NotSet }, // we need to convert the old `Vec` into a `BTreeSet` stop_words: match settings.stop_words { Some(Some(stop_words)) => Setting::Set(stop_words.into_iter().collect()), Some(None) => Setting::Reset, None => Setting::NotSet }, // we need to convert the old `Vec` into a `BTreeMap` synonyms: match settings.synonyms { Some(Some(synonyms)) => Setting::Set(synonyms.into_iter().collect()), Some(None) => Setting::Reset, None => Setting::NotSet }, _kind: PhantomData, } } } /// Extract Settings from `settings.json` file present at provided `dir_path` fn import_settings(dir_path: impl AsRef) -> anyhow::Result { let path = dir_path.as_ref().join("settings.json"); let file = File::open(path)?; let reader = std::io::BufReader::new(file); let metadata = serde_json::from_reader(reader)?; Ok(metadata) } #[cfg(test)] mod test { use super::*; #[test] fn settings_format_regression() { let settings = Settings::default(); assert_eq!( r##"{"rankingRules":null,"distinctAttribute":null,"searchableAttributes":null,"displayedAttributes":null,"stopWords":null,"synonyms":null,"attributesForFaceting":null}"##, serde_json::to_string(&settings).unwrap() ); } }