fix all the errors code and settings issues when importing a dump v2

2025-07-04 20:37:15 +02:00 · 2022-10-17 12:47:48 +02:00 · 2022-10-17 12:47:48 +02:00 · a9eeb070b8
commit a9eeb070b8
parent 3872a1b8d1
9 changed files with 85 additions and 15 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -1109,11 +1109,13 @@ dependencies = [
 "http",
 "index-scheduler",
 "insta",
+ "lazy_static",
 "log",
 "maplit",
 "meili-snap",
 "meilisearch-auth",
 "meilisearch-types",
+ "regex",
 "serde",
 "serde_json",
 "tar",
--- a/dump/Cargo.toml
+++ b/dump/Cargo.toml
@ -19,6 +19,8 @@ log = "0.4.17"
 meilisearch-auth = { path = "../meilisearch-auth" }
 meilisearch-types = { path = "../meilisearch-types" }
 http = "0.2.8"
+regex = "1.6.0"
+lazy_static = "1.4.0"

 [dev-dependencies]
 big_s = "1.0.2"
--- a/dump/src/lib.rs
+++ b/dump/src/lib.rs
@ -1,10 +1,8 @@
 use meilisearch_types::{
    error::ResponseError,
-    keys::Key,
    milli::update::IndexDocumentsMethod,
    settings::Unchecked,
    tasks::{Details, KindWithContent, Status, Task, TaskId},
-    InstanceUid,
 };
 use serde::{Deserialize, Serialize};
 use time::OffsetDateTime;
--- a/dump/src/reader/compat/v2_to_v3.rs
+++ b/dump/src/reader/compat/v2_to_v3.rs
@ -353,7 +353,12 @@ impl<T> From<v2::Settings<T>> for v3::Settings<v3::Unchecked> {
            filterable_attributes: option_to_setting(settings.filterable_attributes)
                .map(|f| f.into_iter().collect()),
            sortable_attributes: v3::Setting::NotSet,
-            ranking_rules: option_to_setting(settings.ranking_rules),
+            ranking_rules: option_to_setting(settings.ranking_rules).map(|criteria| {
+                criteria
+                    .into_iter()
+                    .map(|criterion| patch_ranking_rules(&criterion))
+                    .collect()
+            }),
            stop_words: option_to_setting(settings.stop_words),
            synonyms: option_to_setting(settings.synonyms),
            distinct_attribute: option_to_setting(settings.distinct_attribute),
@ -362,6 +367,20 @@ impl<T> From<v2::Settings<T>> for v3::Settings<v3::Unchecked> {
    }
 }

+fn patch_ranking_rules(ranking_rule: &str) -> String {
+    match v2::settings::Criterion::from_str(ranking_rule) {
+        Ok(v2::settings::Criterion::Words) => String::from("words"),
+        Ok(v2::settings::Criterion::Typo) => String::from("typo"),
+        Ok(v2::settings::Criterion::Proximity) => String::from("proximity"),
+        Ok(v2::settings::Criterion::Attribute) => String::from("attribute"),
+        Ok(v2::settings::Criterion::Exactness) => String::from("exactness"),
+        Ok(v2::settings::Criterion::Asc(name)) => format!("{name}:asc"),
+        Ok(v2::settings::Criterion::Desc(name)) => format!("{name}:desc"),
+        // we want to forward the error to the current version of meilisearch
+        Err(_) => ranking_rule.to_string(),
+    }
+}
+
 #[cfg(test)]
 pub(crate) mod test {
    use std::{fs::File, io::BufReader};
@ -388,7 +407,7 @@ pub(crate) mod test {
        // tasks
        let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
        let (tasks, mut update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
-        meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"6adb1469ab4cc7625fd8ad32d07e51cd");
+        meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"9507711db47c7171c79bc6d57d0bed79");
        assert_eq!(update_files.len(), 9);
        assert!(update_files[0].is_some()); // the enqueued document addition
        assert!(update_files[1..].iter().all(|u| u.is_none())); // everything already processed
@ -440,7 +459,7 @@ pub(crate) mod test {
        }
        "###);

-        meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"8ee40d46442eb1a7cdc463d8a787515e");
+        meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"ae7c5ade2243a553152dab2f354e9095");
        let documents = movies
            .documents()
            .unwrap()
--- a/dump/src/reader/compat/v4_to_v5.rs
+++ b/dump/src/reader/compat/v4_to_v5.rs
@ -410,7 +410,7 @@ pub(crate) mod test {
        // tasks
        let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
        let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
-        meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"e0b53f2cbd76c66dc55b12263a60d2c5");
+        meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"ed9a30cded4c046ef46f7cff7450347e");
        assert_eq!(update_files.len(), 10);
        assert!(update_files[0].is_some()); // the enqueued document addition
        assert!(update_files[1..].iter().all(|u| u.is_none())); // everything already processed
--- a/dump/src/reader/compat/v5_to_v6.rs
+++ b/dump/src/reader/compat/v5_to_v6.rs
@ -414,7 +414,7 @@ pub(crate) mod test {
        // tasks
        let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
        let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
-        meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"84d5b8eb31735d643483fcee28080edf");
+        meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"8c6cd41457c0b7e4c6727c9c85b7abac");
        assert_eq!(update_files.len(), 22);
        assert!(update_files[0].is_none()); // the dump creation
        assert!(update_files[1].is_some()); // the enqueued document addition
--- a/dump/src/reader/mod.rs
+++ b/dump/src/reader/mod.rs
@ -203,7 +203,7 @@ pub(crate) mod test {
        // tasks
        let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
        let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
-        meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"84d5b8eb31735d643483fcee28080edf");
+        meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"8c6cd41457c0b7e4c6727c9c85b7abac");
        assert_eq!(update_files.len(), 22);
        assert!(update_files[0].is_none()); // the dump creation
        assert!(update_files[1].is_some()); // the enqueued document addition
@ -293,7 +293,7 @@ pub(crate) mod test {
        // tasks
        let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
        let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
-        meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"0903b293c6ff8dc0819cbd3406848ef2");
+        meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"acd74244b4e6578c353899e6db30b0b5");
        assert_eq!(update_files.len(), 10);
        assert!(update_files[0].is_some()); // the enqueued document addition
        assert!(update_files[1..].iter().all(|u| u.is_none())); // everything already processed
@ -382,7 +382,7 @@ pub(crate) mod test {
        // tasks
        let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
        let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
-        meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"891538c6fe0ba5187853a4f04890f9b5");
+        meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"fa74f7c6ab3014e09bb813fdc551db8f");
        assert_eq!(update_files.len(), 10);
        assert!(update_files[0].is_some()); // the enqueued document addition
        assert!(update_files[1..].iter().all(|u| u.is_none())); // everything already processed
@ -491,7 +491,7 @@ pub(crate) mod test {
        // tasks
        let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
        let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
-        meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"c52c07e1b356cce6982e2aeea7d0bf5e");
+        meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"19882e94dc085f1d60eb7df5005a3224");
        assert_eq!(update_files.len(), 9);
        assert!(update_files[0].is_some()); // the enqueued document addition
        assert!(update_files[1..].iter().all(|u| u.is_none())); // everything already processed
@ -540,7 +540,7 @@ pub(crate) mod test {
        }
        "###);

-        meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"1e51f7fdc322176408f471a6d90d7698");
+        meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"5389153ddf5527fa79c54b6a6e9c21f6");
        let documents = movies
            .documents()
            .unwrap()
--- a/dump/src/reader/v2/settings.rs
+++ b/dump/src/reader/v2/settings.rs
@ -1,8 +1,10 @@
 use std::{
    collections::{BTreeMap, BTreeSet, HashSet},
    marker::PhantomData,
+    str::FromStr,
 };

+use regex::Regex;
 use serde::{Deserialize, Deserializer};

 #[cfg(test)]
@ -129,3 +131,51 @@ impl Settings<Unchecked> {
        }
    }
 }
+
+lazy_static::lazy_static! {
+    static ref ASC_DESC_REGEX: Regex = Regex::new(r#"(asc|desc)\(([\w_-]+)\)"#).unwrap();
+}
+
+#[derive(Debug, Deserialize, Clone, PartialEq, Eq)]
+pub enum Criterion {
+    /// Sorted by decreasing number of matched query terms.
+    /// Query words at the front of an attribute is considered better than if it was at the back.
+    Words,
+    /// Sorted by increasing number of typos.
+    Typo,
+    /// Sorted by increasing distance between matched query terms.
+    Proximity,
+    /// Documents with quey words contained in more important
+    /// attributes are considred better.
+    Attribute,
+    /// Sorted by the similarity of the matched words with the query words.
+    Exactness,
+    /// Sorted by the increasing value of the field specified.
+    Asc(String),
+    /// Sorted by the decreasing value of the field specified.
+    Desc(String),
+}
+
+impl FromStr for Criterion {
+    type Err = ();
+
+    fn from_str(txt: &str) -> Result<Criterion, Self::Err> {
+        match txt {
+            "words" => Ok(Criterion::Words),
+            "typo" => Ok(Criterion::Typo),
+            "proximity" => Ok(Criterion::Proximity),
+            "attribute" => Ok(Criterion::Attribute),
+            "exactness" => Ok(Criterion::Exactness),
+            text => {
+                let caps = ASC_DESC_REGEX.captures(text).ok_or(())?;
+                let order = caps.get(1).unwrap().as_str();
+                let field_name = caps.get(2).unwrap().as_str();
+                match order {
+                    "asc" => Ok(Criterion::Asc(field_name.to_string())),
+                    "desc" => Ok(Criterion::Desc(field_name.to_string())),
+                    _text => Err(()),
+                }
+            }
+        }
+    }
+}
--- a/dump/src/writer.rs
+++ b/dump/src/writer.rs
@ -330,9 +330,8 @@ pub(crate) mod test {

        // ==== checking the task queue
        let tasks_queue = fs::read_to_string(dump_path.join("tasks/queue.jsonl")).unwrap();
-        for (task, mut expected) in tasks_queue.lines().zip(create_test_tasks()) {
-            // TODO: uncomment this one once the we write the dump integration in the index-scheduler
-            // assert_eq!(serde_json::from_str::<TaskView>(task).unwrap(), expected.0);
+        for (task, expected) in tasks_queue.lines().zip(create_test_tasks()) {
+            assert_eq!(serde_json::from_str::<TaskDump>(task).unwrap(), expected.0);

            if let Some(expected_update) = expected.1 {
                let path = dump_path.join(format!("tasks/update_files/{}.jsonl", expected.0.uid));