MeiliSearch/dump/src/lib.rs

#![allow(clippy::type_complexity)]
#![allow(clippy::wrong_self_convention)]

use meilisearch_types::error::ResponseError;
use meilisearch_types::keys::Key;
use meilisearch_types::milli::update::IndexDocumentsMethod;
use meilisearch_types::settings::Unchecked;
use meilisearch_types::tasks::{Details, IndexSwap, KindWithContent, Status, Task, TaskId};
use meilisearch_types::InstanceUid;
use roaring::RoaringBitmap;
use serde::{Deserialize, Serialize};
use time::OffsetDateTime;

mod error;
mod reader;
mod writer;

pub use error::Error;
pub use reader::{DumpReader, UpdateFile};
pub use writer::DumpWriter;

const CURRENT_DUMP_VERSION: Version = Version::V6;

type Result<T> = std::result::Result<T, Error>;

#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct Metadata {
    pub dump_version: Version,
    pub db_version: String,
    #[serde(with = "time::serde::rfc3339")]
    pub dump_date: OffsetDateTime,
}

#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct IndexMetadata {
    pub uid: String,
    pub primary_key: Option<String>,
    #[serde(with = "time::serde::rfc3339")]
    pub created_at: OffsetDateTime,
    #[serde(with = "time::serde::rfc3339")]
    pub updated_at: OffsetDateTime,
}

#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, Serialize)]
pub enum Version {
    V1,
    V2,
    V3,
    V4,
    V5,
    V6,
}

#[derive(Debug, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct TaskDump {
    pub uid: TaskId,
    #[serde(default)]
    pub index_uid: Option<String>,
    pub status: Status,
    #[serde(rename = "type")]
    pub kind: KindDump,

    #[serde(skip_serializing_if = "Option::is_none")]
    pub canceled_by: Option<TaskId>,

    #[serde(skip_serializing_if = "Option::is_none")]
    pub details: Option<Details>,
    #[serde(skip_serializing_if = "Option::is_none")]
    pub error: Option<ResponseError>,

    #[serde(with = "time::serde::rfc3339")]
    pub enqueued_at: OffsetDateTime,
    #[serde(
        with = "time::serde::rfc3339::option",
        skip_serializing_if = "Option::is_none",
        default
    )]
    pub started_at: Option<OffsetDateTime>,
    #[serde(
        with = "time::serde::rfc3339::option",
        skip_serializing_if = "Option::is_none",
        default
    )]
    pub finished_at: Option<OffsetDateTime>,
}

// A `Kind` specific version made for the dump. If modified you may break the dump.
#[derive(Debug, PartialEq, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub enum KindDump {
    DocumentImport {
        primary_key: Option<String>,
        method: IndexDocumentsMethod,
        documents_count: u64,
        allow_index_creation: bool,
    },
    DocumentDeletion {
        documents_ids: Vec<String>,
    },
    DocumentClear,
    DocumentDeletionByFilter {
        filter: serde_json::Value,
    },
    Settings {
        settings: Box<meilisearch_types::settings::Settings<Unchecked>>,
        is_deletion: bool,
        allow_index_creation: bool,
    },
    IndexDeletion,
    IndexCreation {
        primary_key: Option<String>,
    },
    IndexUpdate {
        primary_key: Option<String>,
    },
    IndexSwap {
        swaps: Vec<IndexSwap>,
    },
    TaskCancelation {
        query: String,
        tasks: RoaringBitmap,
    },
    TasksDeletion {
        query: String,
        tasks: RoaringBitmap,
    },
    DumpCreation {
        keys: Vec<Key>,
        instance_uid: Option<InstanceUid>,
    },
    SnapshotCreation,
}

impl From<Task> for TaskDump {
    fn from(task: Task) -> Self {
        TaskDump {
            uid: task.uid,
            index_uid: task.index_uid().map(|uid| uid.to_string()),
            status: task.status,
            kind: task.kind.into(),
            canceled_by: task.canceled_by,
            details: task.details,
            error: task.error,
            enqueued_at: task.enqueued_at,
            started_at: task.started_at,
            finished_at: task.finished_at,
        }
    }
}

impl From<KindWithContent> for KindDump {
    fn from(kind: KindWithContent) -> Self {
        match kind {
            KindWithContent::DocumentAdditionOrUpdate {
                primary_key,
                method,
                documents_count,
                allow_index_creation,
                ..
            } => KindDump::DocumentImport {
                primary_key,
                method,
                documents_count,
                allow_index_creation,
            },
            KindWithContent::DocumentDeletion { documents_ids, .. } => {
                KindDump::DocumentDeletion { documents_ids }
            }
            KindWithContent::DocumentDeletionByFilter { filter_expr, .. } => {
                KindDump::DocumentDeletionByFilter { filter: filter_expr }
            }
            KindWithContent::DocumentClear { .. } => KindDump::DocumentClear,
            KindWithContent::SettingsUpdate {
                new_settings,
                is_deletion,
                allow_index_creation,
                ..
            } => KindDump::Settings { settings: new_settings, is_deletion, allow_index_creation },
            KindWithContent::IndexDeletion { .. } => KindDump::IndexDeletion,
            KindWithContent::IndexCreation { primary_key, .. } => {
                KindDump::IndexCreation { primary_key }
            }
            KindWithContent::IndexUpdate { primary_key, .. } => {
                KindDump::IndexUpdate { primary_key }
            }
            KindWithContent::IndexSwap { swaps } => KindDump::IndexSwap { swaps },
            KindWithContent::TaskCancelation { query, tasks } => {
                KindDump::TaskCancelation { query, tasks }
            }
            KindWithContent::TaskDeletion { query, tasks } => {
                KindDump::TasksDeletion { query, tasks }
            }
            KindWithContent::DumpCreation { keys, instance_uid } => {
                KindDump::DumpCreation { keys, instance_uid }
            }
            KindWithContent::SnapshotCreation => KindDump::SnapshotCreation,
        }
    }
}

#[cfg(test)]
pub(crate) mod test {
    use std::fs::File;
    use std::io::Seek;
    use std::str::FromStr;

    use big_s::S;
    use maplit::btreeset;
    use meilisearch_types::index_uid_pattern::IndexUidPattern;
    use meilisearch_types::keys::{Action, Key};
    use meilisearch_types::milli::update::Setting;
    use meilisearch_types::milli::{self};
    use meilisearch_types::settings::{Checked, Settings};
    use meilisearch_types::tasks::{Details, Status};
    use serde_json::{json, Map, Value};
    use time::macros::datetime;
    use uuid::Uuid;

    use crate::reader::Document;
    use crate::{DumpReader, DumpWriter, IndexMetadata, KindDump, TaskDump, Version};

    pub fn create_test_instance_uid() -> Uuid {
        Uuid::parse_str("9e15e977-f2ae-4761-943f-1eaf75fd736d").unwrap()
    }

    pub fn create_test_index_metadata() -> IndexMetadata {
        IndexMetadata {
            uid: S("doggo"),
            primary_key: None,
            created_at: datetime!(2022-11-20 12:00 UTC),
            updated_at: datetime!(2022-11-21 00:00 UTC),
        }
    }

    pub fn create_test_documents() -> Vec<Map<String, Value>> {
        vec![
            json!({ "id": 1, "race": "golden retriever", "name": "paul", "age": 4 })
                .as_object()
                .unwrap()
                .clone(),
            json!({ "id": 2, "race": "bernese mountain", "name": "tamo", "age": 6 })
                .as_object()
                .unwrap()
                .clone(),
            json!({ "id": 3, "race": "great pyrenees", "name": "patou", "age": 5 })
                .as_object()
                .unwrap()
                .clone(),
        ]
    }

    pub fn create_test_settings() -> Settings<Checked> {
        let settings = Settings {
            displayed_attributes: Setting::Set(vec![S("race"), S("name")]),
            searchable_attributes: Setting::Set(vec![S("name"), S("race")]),
            filterable_attributes: Setting::Set(btreeset! { S("race"), S("age") }),
            sortable_attributes: Setting::Set(btreeset! { S("age") }),
            ranking_rules: Setting::NotSet,
            stop_words: Setting::NotSet,
            synonyms: Setting::NotSet,
            distinct_attribute: Setting::NotSet,
            typo_tolerance: Setting::NotSet,
            faceting: Setting::NotSet,
            pagination: Setting::NotSet,
            _kind: std::marker::PhantomData,
        };
        settings.check()
    }

    pub fn create_test_tasks() -> Vec<(TaskDump, Option<Vec<Document>>)> {
        vec![
            (
                TaskDump {
                    uid: 0,
                    index_uid: Some(S("doggo")),
                    status: Status::Succeeded,
                    kind: KindDump::DocumentImport {
                        method: milli::update::IndexDocumentsMethod::UpdateDocuments,
                        allow_index_creation: true,
                        primary_key: Some(S("bone")),
                        documents_count: 12,
                    },
                    canceled_by: None,
                    details: Some(Details::DocumentAdditionOrUpdate {
                        received_documents: 12,
                        indexed_documents: Some(10),
                    }),
                    error: None,
                    enqueued_at: datetime!(2022-11-11 0:00 UTC),
                    started_at: Some(datetime!(2022-11-20 0:00 UTC)),
                    finished_at: Some(datetime!(2022-11-21 0:00 UTC)),
                },
                None,
            ),
            (
                TaskDump {
                    uid: 1,
                    index_uid: Some(S("doggo")),
                    status: Status::Enqueued,
                    kind: KindDump::DocumentImport {
                        method: milli::update::IndexDocumentsMethod::UpdateDocuments,
                        allow_index_creation: true,
                        primary_key: None,
                        documents_count: 2,
                    },
                    canceled_by: None,
                    details: Some(Details::DocumentAdditionOrUpdate {
                        received_documents: 2,
                        indexed_documents: None,
                    }),
                    error: None,
                    enqueued_at: datetime!(2022-11-11 0:00 UTC),
                    started_at: None,
                    finished_at: None,
                },
                Some(vec![
                    json!({ "id": 4, "race": "leonberg" }).as_object().unwrap().clone(),
                    json!({ "id": 5, "race": "patou" }).as_object().unwrap().clone(),
                ]),
            ),
            (
                TaskDump {
                    uid: 5,
                    index_uid: Some(S("catto")),
                    status: Status::Enqueued,
                    kind: KindDump::IndexDeletion,
                    canceled_by: None,
                    details: None,
                    error: None,
                    enqueued_at: datetime!(2022-11-15 0:00 UTC),
                    started_at: None,
                    finished_at: None,
                },
                None,
            ),
        ]
    }

    pub fn create_test_api_keys() -> Vec<Key> {
        vec![
            Key {
                description: Some(S("The main key to manage all the doggos")),
                name: Some(S("doggos_key")),
                uid: Uuid::from_str("9f8a34da-b6b2-42f0-939b-dbd4c3448655").unwrap(),
                actions: vec![Action::DocumentsAll],
                indexes: vec![IndexUidPattern::from_str("doggos").unwrap()],
                expires_at: Some(datetime!(4130-03-14 12:21 UTC)),
                created_at: datetime!(1960-11-15 0:00 UTC),
                updated_at: datetime!(2022-11-10 0:00 UTC),
            },
            Key {
                description: Some(S("The master key for everything and even the doggos")),
                name: Some(S("master_key")),
                uid: Uuid::from_str("4622f717-1c00-47bb-a494-39d76a49b591").unwrap(),
                actions: vec![Action::All],
                indexes: vec![IndexUidPattern::all()],
                expires_at: None,
                created_at: datetime!(0000-01-01 00:01 UTC),
                updated_at: datetime!(1964-05-04 17:25 UTC),
            },
            Key {
                description: Some(S("The useless key to for nothing nor the doggos")),
                name: Some(S("useless_key")),
                uid: Uuid::from_str("fb80b58b-0a34-412f-8ba7-1ce868f8ac5c").unwrap(),
                actions: vec![],
                indexes: vec![],
                expires_at: None,
                created_at: datetime!(400-02-29 0:00 UTC),
                updated_at: datetime!(1024-02-29 0:00 UTC),
            },
        ]
    }

    pub fn create_test_dump() -> File {
        let instance_uid = create_test_instance_uid();
        let dump = DumpWriter::new(Some(instance_uid)).unwrap();

        // ========== Adding an index
        let documents = create_test_documents();
        let settings = create_test_settings();

        let mut index = dump.create_index("doggos", &create_test_index_metadata()).unwrap();
        for document in &documents {
            index.push_document(document).unwrap();
        }
        index.flush().unwrap();
        index.settings(&settings).unwrap();

        // ========== pushing the task queue
        let tasks = create_test_tasks();

        let mut task_queue = dump.create_tasks_queue().unwrap();
        for (task, update_file) in &tasks {
            let mut update = task_queue.push_task(task).unwrap();
            if let Some(update_file) = update_file {
                for u in update_file {
                    update.push_document(u).unwrap();
                }
            }
        }
        task_queue.flush().unwrap();

        // ========== pushing the api keys
        let api_keys = create_test_api_keys();

        let mut keys = dump.create_keys().unwrap();
        for key in &api_keys {
            keys.push_key(key).unwrap();
        }
        keys.flush().unwrap();

        // create the dump
        let mut file = tempfile::tempfile().unwrap();
        dump.persist_to(&mut file).unwrap();
        file.rewind().unwrap();

        file
    }

    #[test]
    fn test_creating_and_read_dump() {
        let mut file = create_test_dump();
        let mut dump = DumpReader::open(&mut file).unwrap();

        // ==== checking the top level infos
        assert_eq!(dump.version(), Version::V6);
        assert!(dump.date().is_some());
        assert_eq!(dump.instance_uid().unwrap().unwrap(), create_test_instance_uid());

        // ==== checking the index
        let mut indexes = dump.indexes().unwrap();
        let mut index = indexes.next().unwrap().unwrap();
        assert!(indexes.next().is_none()); // there was only one index in the dump

        for (document, expected) in index.documents().unwrap().zip(create_test_documents()) {
            assert_eq!(document.unwrap(), expected);
        }
        assert_eq!(index.settings().unwrap(), create_test_settings());
        assert_eq!(index.metadata(), &create_test_index_metadata());

        drop(index);
        drop(indexes);

        // ==== checking the task queue
        for (task, expected) in dump.tasks().unwrap().zip(create_test_tasks()) {
            let (task, content_file) = task.unwrap();
            assert_eq!(task, expected.0);

            if let Some(expected_update) = expected.1 {
                assert!(
                    content_file.is_some(),
                    "A content file was expected for the task {}.",
                    expected.0.uid
                );
                let updates = content_file.unwrap().collect::<Result<Vec<_>, _>>().unwrap();
                assert_eq!(updates, expected_update);
            }
        }

        // ==== checking the keys
        for (key, expected) in dump.keys().unwrap().zip(create_test_api_keys()) {
            assert_eq!(key.unwrap(), expected);
        }
    }
}