mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-22 03:07:27 +01:00
fix all the errors code and settings issues when importing a dump v2
This commit is contained in:
parent
3872a1b8d1
commit
a9eeb070b8
2
Cargo.lock
generated
2
Cargo.lock
generated
@ -1109,11 +1109,13 @@ dependencies = [
|
||||
"http",
|
||||
"index-scheduler",
|
||||
"insta",
|
||||
"lazy_static",
|
||||
"log",
|
||||
"maplit",
|
||||
"meili-snap",
|
||||
"meilisearch-auth",
|
||||
"meilisearch-types",
|
||||
"regex",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"tar",
|
||||
|
@ -19,6 +19,8 @@ log = "0.4.17"
|
||||
meilisearch-auth = { path = "../meilisearch-auth" }
|
||||
meilisearch-types = { path = "../meilisearch-types" }
|
||||
http = "0.2.8"
|
||||
regex = "1.6.0"
|
||||
lazy_static = "1.4.0"
|
||||
|
||||
[dev-dependencies]
|
||||
big_s = "1.0.2"
|
||||
|
@ -1,10 +1,8 @@
|
||||
use meilisearch_types::{
|
||||
error::ResponseError,
|
||||
keys::Key,
|
||||
milli::update::IndexDocumentsMethod,
|
||||
settings::Unchecked,
|
||||
tasks::{Details, KindWithContent, Status, Task, TaskId},
|
||||
InstanceUid,
|
||||
};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use time::OffsetDateTime;
|
||||
|
@ -353,7 +353,12 @@ impl<T> From<v2::Settings<T>> for v3::Settings<v3::Unchecked> {
|
||||
filterable_attributes: option_to_setting(settings.filterable_attributes)
|
||||
.map(|f| f.into_iter().collect()),
|
||||
sortable_attributes: v3::Setting::NotSet,
|
||||
ranking_rules: option_to_setting(settings.ranking_rules),
|
||||
ranking_rules: option_to_setting(settings.ranking_rules).map(|criteria| {
|
||||
criteria
|
||||
.into_iter()
|
||||
.map(|criterion| patch_ranking_rules(&criterion))
|
||||
.collect()
|
||||
}),
|
||||
stop_words: option_to_setting(settings.stop_words),
|
||||
synonyms: option_to_setting(settings.synonyms),
|
||||
distinct_attribute: option_to_setting(settings.distinct_attribute),
|
||||
@ -362,6 +367,20 @@ impl<T> From<v2::Settings<T>> for v3::Settings<v3::Unchecked> {
|
||||
}
|
||||
}
|
||||
|
||||
fn patch_ranking_rules(ranking_rule: &str) -> String {
|
||||
match v2::settings::Criterion::from_str(ranking_rule) {
|
||||
Ok(v2::settings::Criterion::Words) => String::from("words"),
|
||||
Ok(v2::settings::Criterion::Typo) => String::from("typo"),
|
||||
Ok(v2::settings::Criterion::Proximity) => String::from("proximity"),
|
||||
Ok(v2::settings::Criterion::Attribute) => String::from("attribute"),
|
||||
Ok(v2::settings::Criterion::Exactness) => String::from("exactness"),
|
||||
Ok(v2::settings::Criterion::Asc(name)) => format!("{name}:asc"),
|
||||
Ok(v2::settings::Criterion::Desc(name)) => format!("{name}:desc"),
|
||||
// we want to forward the error to the current version of meilisearch
|
||||
Err(_) => ranking_rule.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) mod test {
|
||||
use std::{fs::File, io::BufReader};
|
||||
@ -388,7 +407,7 @@ pub(crate) mod test {
|
||||
// tasks
|
||||
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
|
||||
let (tasks, mut update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
|
||||
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"6adb1469ab4cc7625fd8ad32d07e51cd");
|
||||
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"9507711db47c7171c79bc6d57d0bed79");
|
||||
assert_eq!(update_files.len(), 9);
|
||||
assert!(update_files[0].is_some()); // the enqueued document addition
|
||||
assert!(update_files[1..].iter().all(|u| u.is_none())); // everything already processed
|
||||
@ -440,7 +459,7 @@ pub(crate) mod test {
|
||||
}
|
||||
"###);
|
||||
|
||||
meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"8ee40d46442eb1a7cdc463d8a787515e");
|
||||
meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"ae7c5ade2243a553152dab2f354e9095");
|
||||
let documents = movies
|
||||
.documents()
|
||||
.unwrap()
|
||||
|
@ -410,7 +410,7 @@ pub(crate) mod test {
|
||||
// tasks
|
||||
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
|
||||
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
|
||||
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"e0b53f2cbd76c66dc55b12263a60d2c5");
|
||||
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"ed9a30cded4c046ef46f7cff7450347e");
|
||||
assert_eq!(update_files.len(), 10);
|
||||
assert!(update_files[0].is_some()); // the enqueued document addition
|
||||
assert!(update_files[1..].iter().all(|u| u.is_none())); // everything already processed
|
||||
|
@ -414,7 +414,7 @@ pub(crate) mod test {
|
||||
// tasks
|
||||
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
|
||||
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
|
||||
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"84d5b8eb31735d643483fcee28080edf");
|
||||
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"8c6cd41457c0b7e4c6727c9c85b7abac");
|
||||
assert_eq!(update_files.len(), 22);
|
||||
assert!(update_files[0].is_none()); // the dump creation
|
||||
assert!(update_files[1].is_some()); // the enqueued document addition
|
||||
|
@ -203,7 +203,7 @@ pub(crate) mod test {
|
||||
// tasks
|
||||
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
|
||||
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
|
||||
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"84d5b8eb31735d643483fcee28080edf");
|
||||
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"8c6cd41457c0b7e4c6727c9c85b7abac");
|
||||
assert_eq!(update_files.len(), 22);
|
||||
assert!(update_files[0].is_none()); // the dump creation
|
||||
assert!(update_files[1].is_some()); // the enqueued document addition
|
||||
@ -293,7 +293,7 @@ pub(crate) mod test {
|
||||
// tasks
|
||||
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
|
||||
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
|
||||
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"0903b293c6ff8dc0819cbd3406848ef2");
|
||||
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"acd74244b4e6578c353899e6db30b0b5");
|
||||
assert_eq!(update_files.len(), 10);
|
||||
assert!(update_files[0].is_some()); // the enqueued document addition
|
||||
assert!(update_files[1..].iter().all(|u| u.is_none())); // everything already processed
|
||||
@ -382,7 +382,7 @@ pub(crate) mod test {
|
||||
// tasks
|
||||
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
|
||||
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
|
||||
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"891538c6fe0ba5187853a4f04890f9b5");
|
||||
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"fa74f7c6ab3014e09bb813fdc551db8f");
|
||||
assert_eq!(update_files.len(), 10);
|
||||
assert!(update_files[0].is_some()); // the enqueued document addition
|
||||
assert!(update_files[1..].iter().all(|u| u.is_none())); // everything already processed
|
||||
@ -491,7 +491,7 @@ pub(crate) mod test {
|
||||
// tasks
|
||||
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
|
||||
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
|
||||
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"c52c07e1b356cce6982e2aeea7d0bf5e");
|
||||
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"19882e94dc085f1d60eb7df5005a3224");
|
||||
assert_eq!(update_files.len(), 9);
|
||||
assert!(update_files[0].is_some()); // the enqueued document addition
|
||||
assert!(update_files[1..].iter().all(|u| u.is_none())); // everything already processed
|
||||
@ -540,7 +540,7 @@ pub(crate) mod test {
|
||||
}
|
||||
"###);
|
||||
|
||||
meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"1e51f7fdc322176408f471a6d90d7698");
|
||||
meili_snap::snapshot_hash!(format!("{:#?}", movies.settings()), @"5389153ddf5527fa79c54b6a6e9c21f6");
|
||||
let documents = movies
|
||||
.documents()
|
||||
.unwrap()
|
||||
|
@ -1,8 +1,10 @@
|
||||
use std::{
|
||||
collections::{BTreeMap, BTreeSet, HashSet},
|
||||
marker::PhantomData,
|
||||
str::FromStr,
|
||||
};
|
||||
|
||||
use regex::Regex;
|
||||
use serde::{Deserialize, Deserializer};
|
||||
|
||||
#[cfg(test)]
|
||||
@ -129,3 +131,51 @@ impl Settings<Unchecked> {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
lazy_static::lazy_static! {
|
||||
static ref ASC_DESC_REGEX: Regex = Regex::new(r#"(asc|desc)\(([\w_-]+)\)"#).unwrap();
|
||||
}
|
||||
|
||||
#[derive(Debug, Deserialize, Clone, PartialEq, Eq)]
|
||||
pub enum Criterion {
|
||||
/// Sorted by decreasing number of matched query terms.
|
||||
/// Query words at the front of an attribute is considered better than if it was at the back.
|
||||
Words,
|
||||
/// Sorted by increasing number of typos.
|
||||
Typo,
|
||||
/// Sorted by increasing distance between matched query terms.
|
||||
Proximity,
|
||||
/// Documents with quey words contained in more important
|
||||
/// attributes are considred better.
|
||||
Attribute,
|
||||
/// Sorted by the similarity of the matched words with the query words.
|
||||
Exactness,
|
||||
/// Sorted by the increasing value of the field specified.
|
||||
Asc(String),
|
||||
/// Sorted by the decreasing value of the field specified.
|
||||
Desc(String),
|
||||
}
|
||||
|
||||
impl FromStr for Criterion {
|
||||
type Err = ();
|
||||
|
||||
fn from_str(txt: &str) -> Result<Criterion, Self::Err> {
|
||||
match txt {
|
||||
"words" => Ok(Criterion::Words),
|
||||
"typo" => Ok(Criterion::Typo),
|
||||
"proximity" => Ok(Criterion::Proximity),
|
||||
"attribute" => Ok(Criterion::Attribute),
|
||||
"exactness" => Ok(Criterion::Exactness),
|
||||
text => {
|
||||
let caps = ASC_DESC_REGEX.captures(text).ok_or(())?;
|
||||
let order = caps.get(1).unwrap().as_str();
|
||||
let field_name = caps.get(2).unwrap().as_str();
|
||||
match order {
|
||||
"asc" => Ok(Criterion::Asc(field_name.to_string())),
|
||||
"desc" => Ok(Criterion::Desc(field_name.to_string())),
|
||||
_text => Err(()),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -330,9 +330,8 @@ pub(crate) mod test {
|
||||
|
||||
// ==== checking the task queue
|
||||
let tasks_queue = fs::read_to_string(dump_path.join("tasks/queue.jsonl")).unwrap();
|
||||
for (task, mut expected) in tasks_queue.lines().zip(create_test_tasks()) {
|
||||
// TODO: uncomment this one once the we write the dump integration in the index-scheduler
|
||||
// assert_eq!(serde_json::from_str::<TaskView>(task).unwrap(), expected.0);
|
||||
for (task, expected) in tasks_queue.lines().zip(create_test_tasks()) {
|
||||
assert_eq!(serde_json::from_str::<TaskDump>(task).unwrap(), expected.0);
|
||||
|
||||
if let Some(expected_update) = expected.1 {
|
||||
let path = dump_path.join(format!("tasks/update_files/{}.jsonl", expected.0.uid));
|
||||
|
Loading…
x
Reference in New Issue
Block a user