3441: Fix import of dump v2 r=dureuill a=irevoire

# Pull Request
This bug was introduced because of a mistake we did earlier: We said the last version to export dump v2 was the v0.21.0 while it was the v0.22.0.
To fix the bug I updated our whole v2 reader to use the code from meilisearch v0.22.0.
Also:
- Import the bugged dump in the tests
- Test the import of this dump in the v2 reader and current reader

## Related issue
Fixes #3435


Co-authored-by: Tamo <tamo@meilisearch.com>
This commit is contained in:
bors[bot] 2023-01-31 13:23:57 +00:00 committed by GitHub
commit 20f8184c06
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 585 additions and 155 deletions

View File

@ -10,6 +10,7 @@ expression: products.settings().unwrap()
"*"
],
"filterableAttributes": [],
"sortableAttributes": [],
"rankingRules": [
"typo",
"words",

View File

@ -13,13 +13,17 @@ expression: movies.settings().unwrap()
"genres",
"id"
],
"sortableAttributes": [
"genres",
"id"
],
"rankingRules": [
"typo",
"words",
"proximity",
"attribute",
"exactness",
"asc(release_date)"
"release_date:asc"
],
"stopWords": [],
"synonyms": {},

View File

@ -10,6 +10,7 @@ expression: spells.settings().unwrap()
"*"
],
"filterableAttributes": [],
"sortableAttributes": [],
"rankingRules": [
"typo",
"words",

View File

@ -1,4 +1,3 @@
use std::collections::BTreeSet;
use std::str::FromStr;
use super::v2_to_v3::CompatV2ToV3;
@ -102,14 +101,15 @@ impl CompatIndexV1ToV2 {
impl From<v1::settings::Settings> for v2::Settings<v2::Unchecked> {
fn from(source: v1::settings::Settings) -> Self {
let displayed_attributes = source
.displayed_attributes
.map(|opt| opt.map(|displayed_attributes| displayed_attributes.into_iter().collect()));
let attributes_for_faceting = source.attributes_for_faceting.map(|opt| {
opt.map(|attributes_for_faceting| attributes_for_faceting.into_iter().collect())
});
let ranking_rules = source.ranking_rules.map(|opt| {
opt.map(|ranking_rules| {
Self {
displayed_attributes: option_to_setting(source.displayed_attributes)
.map(|displayed| displayed.into_iter().collect()),
searchable_attributes: option_to_setting(source.searchable_attributes),
filterable_attributes: option_to_setting(source.attributes_for_faceting.clone())
.map(|filterable| filterable.into_iter().collect()),
sortable_attributes: option_to_setting(source.attributes_for_faceting)
.map(|sortable| sortable.into_iter().collect()),
ranking_rules: option_to_setting(source.ranking_rules).map(|ranking_rules| {
ranking_rules
.into_iter()
.filter_map(|ranking_rule| {
@ -119,26 +119,33 @@ impl From<v1::settings::Settings> for v2::Settings<v2::Unchecked> {
ranking_rule.into();
criterion.as_ref().map(ToString::to_string)
}
Err(()) => Some(ranking_rule),
Err(()) => {
log::warn!(
"Could not import the following ranking rule: `{}`.",
ranking_rule
);
None
}
}
})
.collect()
})
});
Self {
displayed_attributes,
searchable_attributes: source.searchable_attributes,
filterable_attributes: attributes_for_faceting,
ranking_rules,
stop_words: source.stop_words,
synonyms: source.synonyms,
distinct_attribute: source.distinct_attribute,
}),
stop_words: option_to_setting(source.stop_words),
synonyms: option_to_setting(source.synonyms),
distinct_attribute: option_to_setting(source.distinct_attribute),
_kind: std::marker::PhantomData,
}
}
}
fn option_to_setting<T>(opt: Option<Option<T>>) -> v2::Setting<T> {
match opt {
Some(Some(t)) => v2::Setting::Set(t),
None => v2::Setting::NotSet,
Some(None) => v2::Setting::Reset,
}
}
impl From<v1::update::UpdateStatus> for Option<v2::updates::UpdateStatus> {
fn from(source: v1::update::UpdateStatus) -> Self {
use v1::update::UpdateStatus as UpdateStatusV1;
@ -251,38 +258,27 @@ impl From<v1::update::UpdateType> for Option<v2::updates::UpdateMeta> {
impl From<v1::settings::SettingsUpdate> for v2::Settings<v2::Unchecked> {
fn from(source: v1::settings::SettingsUpdate) -> Self {
let displayed_attributes: Option<Option<BTreeSet<String>>> =
source.displayed_attributes.into();
let attributes_for_faceting: Option<Option<Vec<String>>> =
source.attributes_for_faceting.into();
let ranking_rules: Option<Option<Vec<v1::settings::RankingRule>>> =
source.ranking_rules.into();
let ranking_rules = v2::Setting::from(source.ranking_rules);
// go from the concrete types of v1 (RankingRule) to the concrete type of v2 (Criterion),
// and then back to string as this is what the settings manipulate
let ranking_rules = ranking_rules.map(|opt| {
opt.map(|ranking_rules| {
ranking_rules
.into_iter()
// filter out the WordsPosition ranking rule that exists in v1 but not v2
.filter_map(|ranking_rule| {
Option::<v2::settings::Criterion>::from(ranking_rule)
})
.map(|criterion| criterion.to_string())
.collect()
})
let ranking_rules = ranking_rules.map(|ranking_rules| {
ranking_rules
.into_iter()
// filter out the WordsPosition ranking rule that exists in v1 but not v2
.filter_map(Option::<v2::settings::Criterion>::from)
.map(|criterion| criterion.to_string())
.collect()
});
Self {
displayed_attributes: displayed_attributes.map(|opt| {
opt.map(|displayed_attributes| displayed_attributes.into_iter().collect())
}),
displayed_attributes: v2::Setting::from(source.displayed_attributes)
.map(|displayed_attributes| displayed_attributes.into_iter().collect()),
searchable_attributes: source.searchable_attributes.into(),
filterable_attributes: attributes_for_faceting.map(|opt| {
opt.map(|attributes_for_faceting| attributes_for_faceting.into_iter().collect())
}),
filterable_attributes: v2::Setting::from(source.attributes_for_faceting.clone())
.map(|attributes_for_faceting| attributes_for_faceting.into_iter().collect()),
sortable_attributes: v2::Setting::from(source.attributes_for_faceting)
.map(|attributes_for_faceting| attributes_for_faceting.into_iter().collect()),
ranking_rules,
stop_words: source.stop_words.into(),
synonyms: source.synonyms.into(),
@ -314,12 +310,12 @@ impl From<v1::settings::RankingRule> for Option<v2::settings::Criterion> {
}
}
impl<T> From<v1::settings::UpdateState<T>> for Option<Option<T>> {
impl<T> From<v1::settings::UpdateState<T>> for v2::Setting<T> {
fn from(source: v1::settings::UpdateState<T>) -> Self {
match source {
v1::settings::UpdateState::Update(new_value) => Some(Some(new_value)),
v1::settings::UpdateState::Clear => Some(None),
v1::settings::UpdateState::Nothing => None,
v1::settings::UpdateState::Update(new_value) => v2::Setting::Set(new_value),
v1::settings::UpdateState::Clear => v2::Setting::Reset,
v1::settings::UpdateState::Nothing => v2::Setting::NotSet,
}
}
}
@ -352,7 +348,7 @@ pub(crate) mod test {
// tasks
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"ad6245d98d1a8e30535f3339a9a8d223");
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"2298010973ee98cf4670787314176a3a");
assert_eq!(update_files.len(), 9);
assert!(update_files[..].iter().all(|u| u.is_none())); // no update file in dumps v1

View File

@ -361,28 +361,29 @@ impl From<String> for v3::Code {
}
}
fn option_to_setting<T>(opt: Option<Option<T>>) -> v3::Setting<T> {
match opt {
Some(Some(t)) => v3::Setting::Set(t),
None => v3::Setting::NotSet,
Some(None) => v3::Setting::Reset,
impl<A> From<v2::Setting<A>> for v3::Setting<A> {
fn from(setting: v2::Setting<A>) -> Self {
match setting {
v2::settings::Setting::Set(a) => v3::settings::Setting::Set(a),
v2::settings::Setting::Reset => v3::settings::Setting::Reset,
v2::settings::Setting::NotSet => v3::settings::Setting::NotSet,
}
}
}
impl<T> From<v2::Settings<T>> for v3::Settings<v3::Unchecked> {
fn from(settings: v2::Settings<T>) -> Self {
v3::Settings {
displayed_attributes: option_to_setting(settings.displayed_attributes),
searchable_attributes: option_to_setting(settings.searchable_attributes),
filterable_attributes: option_to_setting(settings.filterable_attributes)
.map(|f| f.into_iter().collect()),
sortable_attributes: v3::Setting::NotSet,
ranking_rules: option_to_setting(settings.ranking_rules).map(|criteria| {
displayed_attributes: settings.displayed_attributes.into(),
searchable_attributes: settings.searchable_attributes.into(),
filterable_attributes: settings.filterable_attributes.into(),
sortable_attributes: settings.sortable_attributes.into(),
ranking_rules: v3::Setting::from(settings.ranking_rules).map(|criteria| {
criteria.into_iter().map(|criterion| patch_ranking_rules(&criterion)).collect()
}),
stop_words: option_to_setting(settings.stop_words),
synonyms: option_to_setting(settings.synonyms),
distinct_attribute: option_to_setting(settings.distinct_attribute),
stop_words: settings.stop_words.into(),
synonyms: settings.synonyms.into(),
distinct_attribute: settings.distinct_attribute.into(),
_kind: std::marker::PhantomData,
}
}
@ -394,6 +395,7 @@ fn patch_ranking_rules(ranking_rule: &str) -> String {
Ok(v2::settings::Criterion::Typo) => String::from("typo"),
Ok(v2::settings::Criterion::Proximity) => String::from("proximity"),
Ok(v2::settings::Criterion::Attribute) => String::from("attribute"),
Ok(v2::settings::Criterion::Sort) => String::from("sort"),
Ok(v2::settings::Criterion::Exactness) => String::from("exactness"),
Ok(v2::settings::Criterion::Asc(name)) => format!("{name}:asc"),
Ok(v2::settings::Criterion::Desc(name)) => format!("{name}:desc"),

View File

@ -530,6 +530,82 @@ pub(crate) mod test {
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"235016433dd04262c7f2da01d1e808ce");
}
#[test]
fn import_dump_v2_from_meilisearch_v0_22_0_issue_3435() {
let dump = File::open("tests/assets/v2-v0.22.0.dump").unwrap();
let mut dump = DumpReader::open(dump).unwrap();
// top level infos
insta::assert_display_snapshot!(dump.date().unwrap(), @"2023-01-30 16:26:09.247261 +00:00:00");
assert_eq!(dump.instance_uid().unwrap(), None);
// tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"2db37756d8af1fb7623436b76e8956a6");
assert_eq!(update_files.len(), 8);
assert!(update_files[0..].iter().all(|u| u.is_none())); // everything already processed
// keys
let keys = dump.keys().unwrap().collect::<Result<Vec<_>>>().unwrap();
meili_snap::snapshot_hash!(meili_snap::json_string!(keys), @"d751713988987e9331980363e24189ce");
// indexes
let mut indexes = dump.indexes().unwrap().collect::<Result<Vec<_>>>().unwrap();
// the index are not ordered in any way by default
indexes.sort_by_key(|index| index.metadata().uid.to_string());
let mut products = indexes.pop().unwrap();
let mut movies = indexes.pop().unwrap();
let mut spells = indexes.pop().unwrap();
assert!(indexes.is_empty());
// products
insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "products",
"primaryKey": "sku",
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
insta::assert_json_snapshot!(products.settings().unwrap());
let documents = products.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");
// movies
insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "movies",
"primaryKey": "id",
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
insta::assert_json_snapshot!(movies.settings().unwrap());
let documents = movies.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"0227598af846e574139ee0b80e03a720");
// spells
insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "dnd_spells",
"primaryKey": "index",
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
insta::assert_json_snapshot!(spells.settings().unwrap());
let documents = spells.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"235016433dd04262c7f2da01d1e808ce");
}
#[test]
fn import_dump_v1() {
let dump = File::open("tests/assets/v1.dump").unwrap();
@ -542,7 +618,7 @@ pub(crate) mod test {
// tasks
let tasks = dump.tasks().unwrap().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"b3e3652bfc10a76670be157d2507d761");
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"8df6eab075a44b3c1af6b726f9fd9a43");
assert_eq!(update_files.len(), 9);
assert!(update_files[..].iter().all(|u| u.is_none())); // no update file in dump v1

View File

@ -10,6 +10,7 @@ expression: spells.settings().unwrap()
"*"
],
"filterableAttributes": [],
"sortableAttributes": [],
"rankingRules": [
"typo",
"words",

View File

@ -10,6 +10,7 @@ expression: products.settings().unwrap()
"*"
],
"filterableAttributes": [],
"sortableAttributes": [],
"rankingRules": [
"typo",
"words",

View File

@ -13,6 +13,10 @@ expression: movies.settings().unwrap()
"genres",
"id"
],
"sortableAttributes": [
"genres",
"id"
],
"rankingRules": [
"typo",
"words",

View File

@ -0,0 +1,25 @@
---
source: dump/src/reader/mod.rs
expression: spells.settings().unwrap()
---
{
"displayedAttributes": [
"*"
],
"searchableAttributes": [
"*"
],
"filterableAttributes": [],
"sortableAttributes": [],
"rankingRules": [
"words",
"typo",
"proximity",
"attribute",
"sort",
"exactness"
],
"stopWords": [],
"synonyms": {},
"distinctAttribute": null
}

View File

@ -0,0 +1,39 @@
---
source: dump/src/reader/mod.rs
expression: products.settings().unwrap()
---
{
"displayedAttributes": [
"*"
],
"searchableAttributes": [
"*"
],
"filterableAttributes": [],
"sortableAttributes": [],
"rankingRules": [
"words",
"typo",
"proximity",
"attribute",
"sort",
"exactness"
],
"stopWords": [],
"synonyms": {
"android": [
"phone",
"smartphone"
],
"iphone": [
"phone",
"smartphone"
],
"phone": [
"android",
"iphone",
"smartphone"
]
},
"distinctAttribute": null
}

View File

@ -0,0 +1,30 @@
---
source: dump/src/reader/mod.rs
expression: movies.settings().unwrap()
---
{
"displayedAttributes": [
"*"
],
"searchableAttributes": [
"*"
],
"filterableAttributes": [
"genres",
"id"
],
"sortableAttributes": [
"release_date"
],
"rankingRules": [
"words",
"typo",
"proximity",
"attribute",
"exactness",
"release_date:asc"
],
"stopWords": [],
"synonyms": {},
"distinctAttribute": null
}

View File

@ -41,6 +41,7 @@ use super::Document;
use crate::{IndexMetadata, Result, Version};
pub type Settings<T> = settings::Settings<T>;
pub type Setting<T> = settings::Setting<T>;
pub type Checked = settings::Checked;
pub type Unchecked = settings::Unchecked;
@ -306,4 +307,81 @@ pub(crate) mod test {
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"235016433dd04262c7f2da01d1e808ce");
}
#[test]
fn read_dump_v2_from_meilisearch_v0_22_0_issue_3435() {
let dump = File::open("tests/assets/v2-v0.22.0.dump").unwrap();
let dir = TempDir::new().unwrap();
let mut dump = BufReader::new(dump);
let gz = GzDecoder::new(&mut dump);
let mut archive = tar::Archive::new(gz);
archive.unpack(dir.path()).unwrap();
let mut dump = V2Reader::open(dir).unwrap();
// top level infos
insta::assert_display_snapshot!(dump.date().unwrap(), @"2023-01-30 16:26:09.247261 +00:00:00");
// tasks
let tasks = dump.tasks().collect::<Result<Vec<_>>>().unwrap();
let (tasks, update_files): (Vec<_>, Vec<_>) = tasks.into_iter().unzip();
meili_snap::snapshot_hash!(meili_snap::json_string!(tasks), @"aca8ba13046272664eb3ea2da3031633");
assert_eq!(update_files.len(), 8);
assert!(update_files[0..].iter().all(|u| u.is_none())); // everything has already been processed
// indexes
let mut indexes = dump.indexes().unwrap().collect::<Result<Vec<_>>>().unwrap();
// the index are not ordered in any way by default
indexes.sort_by_key(|index| index.metadata().uid.to_string());
let mut products = indexes.pop().unwrap();
let mut movies = indexes.pop().unwrap();
let mut spells = indexes.pop().unwrap();
assert!(indexes.is_empty());
// products
insta::assert_json_snapshot!(products.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "products",
"primaryKey": "sku",
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
insta::assert_json_snapshot!(products.settings().unwrap());
let documents = products.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"548284a84de510f71e88e6cdea495cf5");
// movies
insta::assert_json_snapshot!(movies.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "movies",
"primaryKey": "id",
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
insta::assert_json_snapshot!(movies.settings().unwrap());
let documents = movies.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"0227598af846e574139ee0b80e03a720");
// spells
insta::assert_json_snapshot!(spells.metadata(), { ".createdAt" => "[now]", ".updatedAt" => "[now]" }, @r###"
{
"uid": "dnd_spells",
"primaryKey": "index",
"createdAt": "[now]",
"updatedAt": "[now]"
}
"###);
insta::assert_json_snapshot!(spells.settings().unwrap());
let documents = spells.documents().unwrap().collect::<Result<Vec<_>>>().unwrap();
assert_eq!(documents.len(), 10);
meili_snap::snapshot_hash!(format!("{:#?}", documents), @"235016433dd04262c7f2da01d1e808ce");
}
}

View File

@ -1,35 +1,33 @@
use std::collections::{BTreeMap, BTreeSet};
use std::fmt::Display;
use std::fmt;
use std::marker::PhantomData;
use std::str::FromStr;
use once_cell::sync::Lazy;
use regex::Regex;
use serde::{Deserialize, Deserializer};
#[cfg(test)]
fn serialize_with_wildcard<S>(
field: &Option<Option<Vec<String>>>,
field: &Setting<Vec<String>>,
s: S,
) -> std::result::Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
let wildcard = vec!["*".to_string()];
s.serialize_some(&field.as_ref().map(|o| o.as_ref().unwrap_or(&wildcard)))
}
use serde::Serialize;
fn deserialize_some<'de, T, D>(deserializer: D) -> std::result::Result<Option<T>, D::Error>
where
T: Deserialize<'de>,
D: Deserializer<'de>,
{
Deserialize::deserialize(deserializer).map(Some)
let wildcard = vec!["*".to_string()];
match field {
Setting::Set(value) => Some(value),
Setting::Reset => Some(&wildcard),
Setting::NotSet => None,
}
.serialize(s)
}
#[derive(Clone, Default, Debug)]
#[cfg_attr(test, derive(serde::Serialize))]
pub struct Checked;
#[derive(Clone, Default, Debug, Deserialize)]
#[cfg_attr(test, derive(serde::Serialize))]
pub struct Unchecked;
@ -42,75 +40,54 @@ pub struct Unchecked;
pub struct Settings<T> {
#[serde(
default,
deserialize_with = "deserialize_some",
serialize_with = "serialize_with_wildcard",
skip_serializing_if = "Option::is_none"
skip_serializing_if = "Setting::is_not_set"
)]
pub displayed_attributes: Option<Option<Vec<String>>>,
pub displayed_attributes: Setting<Vec<String>>,
#[serde(
default,
deserialize_with = "deserialize_some",
serialize_with = "serialize_with_wildcard",
skip_serializing_if = "Option::is_none"
skip_serializing_if = "Setting::is_not_set"
)]
pub searchable_attributes: Option<Option<Vec<String>>>,
pub searchable_attributes: Setting<Vec<String>>,
#[serde(
default,
deserialize_with = "deserialize_some",
skip_serializing_if = "Option::is_none"
)]
pub filterable_attributes: Option<Option<BTreeSet<String>>>,
#[serde(
default,
deserialize_with = "deserialize_some",
skip_serializing_if = "Option::is_none"
)]
pub ranking_rules: Option<Option<Vec<String>>>,
#[serde(
default,
deserialize_with = "deserialize_some",
skip_serializing_if = "Option::is_none"
)]
pub stop_words: Option<Option<BTreeSet<String>>>,
#[serde(
default,
deserialize_with = "deserialize_some",
skip_serializing_if = "Option::is_none"
)]
pub synonyms: Option<Option<BTreeMap<String, Vec<String>>>>,
#[serde(
default,
deserialize_with = "deserialize_some",
skip_serializing_if = "Option::is_none"
)]
pub distinct_attribute: Option<Option<String>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub filterable_attributes: Setting<BTreeSet<String>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub sortable_attributes: Setting<BTreeSet<String>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub ranking_rules: Setting<Vec<String>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub stop_words: Setting<BTreeSet<String>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub synonyms: Setting<BTreeMap<String, Vec<String>>>,
#[serde(default, skip_serializing_if = "Setting::is_not_set")]
pub distinct_attribute: Setting<String>,
#[serde(skip)]
pub _kind: PhantomData<T>,
}
impl Settings<Unchecked> {
pub fn check(mut self) -> Settings<Checked> {
let displayed_attributes = match self.displayed_attributes.take() {
Some(Some(fields)) => {
pub fn check(self) -> Settings<Checked> {
let displayed_attributes = match self.displayed_attributes {
Setting::Set(fields) => {
if fields.iter().any(|f| f == "*") {
Some(None)
Setting::Reset
} else {
Some(Some(fields))
Setting::Set(fields)
}
}
otherwise => otherwise,
};
let searchable_attributes = match self.searchable_attributes.take() {
Some(Some(fields)) => {
let searchable_attributes = match self.searchable_attributes {
Setting::Set(fields) => {
if fields.iter().any(|f| f == "*") {
Some(None)
Setting::Reset
} else {
Some(Some(fields))
Setting::Set(fields)
}
}
otherwise => otherwise,
@ -120,6 +97,7 @@ impl Settings<Unchecked> {
displayed_attributes,
searchable_attributes,
filterable_attributes: self.filterable_attributes,
sortable_attributes: self.sortable_attributes,
ranking_rules: self.ranking_rules,
stop_words: self.stop_words,
synonyms: self.synonyms,
@ -129,10 +107,61 @@ impl Settings<Unchecked> {
}
}
static ASC_DESC_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r#"(asc|desc)\(([\w_-]+)\)"#).unwrap());
#[derive(Debug, Clone, PartialEq)]
pub enum Setting<T> {
Set(T),
Reset,
NotSet,
}
#[derive(Debug, Deserialize, Clone, PartialEq, Eq)]
impl<T> Default for Setting<T> {
fn default() -> Self {
Self::NotSet
}
}
impl<T> Setting<T> {
pub const fn is_not_set(&self) -> bool {
matches!(self, Self::NotSet)
}
pub fn map<A>(self, f: fn(T) -> A) -> Setting<A> {
match self {
Setting::Set(a) => Setting::Set(f(a)),
Setting::Reset => Setting::Reset,
Setting::NotSet => Setting::NotSet,
}
}
}
#[cfg(test)]
impl<T: serde::Serialize> serde::Serialize for Setting<T> {
fn serialize<S>(&self, serializer: S) -> std::result::Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
match self {
Self::Set(value) => Some(value),
// Usually not_set isn't serialized by setting skip_serializing_if field attribute
Self::NotSet | Self::Reset => None,
}
.serialize(serializer)
}
}
impl<'de, T: Deserialize<'de>> Deserialize<'de> for Setting<T> {
fn deserialize<D>(deserializer: D) -> std::result::Result<Self, D::Error>
where
D: Deserializer<'de>,
{
Deserialize::deserialize(deserializer).map(|x| match x {
Some(x) => Self::Set(x),
None => Self::Reset, // Reset is forced by sending null value
})
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Criterion {
/// Sorted by decreasing number of matched query terms.
/// Query words at the front of an attribute is considered better than if it was at the back.
@ -142,8 +171,11 @@ pub enum Criterion {
/// Sorted by increasing distance between matched query terms.
Proximity,
/// Documents with quey words contained in more important
/// attributes are considred better.
/// attributes are considered better.
Attribute,
/// Dynamically sort at query time the documents. None, one or multiple Asc/Desc sortable
/// attributes can be used in place of this criterion at query time.
Sort,
/// Sorted by the similarity of the matched words with the query words.
Exactness,
/// Sorted by the increasing value of the field specified.
@ -152,40 +184,86 @@ pub enum Criterion {
Desc(String),
}
impl Criterion {
/// Returns the field name parameter of this criterion.
pub fn field_name(&self) -> Option<&str> {
match self {
Criterion::Asc(name) | Criterion::Desc(name) => Some(name),
_otherwise => None,
}
}
}
impl FromStr for Criterion {
// since we're not going to show the custom error message we can override the
// error type.
type Err = ();
fn from_str(txt: &str) -> Result<Criterion, Self::Err> {
match txt {
fn from_str(text: &str) -> Result<Criterion, Self::Err> {
match text {
"words" => Ok(Criterion::Words),
"typo" => Ok(Criterion::Typo),
"proximity" => Ok(Criterion::Proximity),
"attribute" => Ok(Criterion::Attribute),
"sort" => Ok(Criterion::Sort),
"exactness" => Ok(Criterion::Exactness),
text => {
let caps = ASC_DESC_REGEX.captures(text).ok_or(())?;
let order = caps.get(1).unwrap().as_str();
let field_name = caps.get(2).unwrap().as_str();
match order {
"asc" => Ok(Criterion::Asc(field_name.to_string())),
"desc" => Ok(Criterion::Desc(field_name.to_string())),
_text => Err(()),
}
}
text => match AscDesc::from_str(text) {
Ok(AscDesc::Asc(field)) => Ok(Criterion::Asc(field)),
Ok(AscDesc::Desc(field)) => Ok(Criterion::Desc(field)),
Err(_) => Err(()),
},
}
}
}
impl Display for Criterion {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Criterion::Words => write!(f, "words"),
Criterion::Typo => write!(f, "typo"),
Criterion::Proximity => write!(f, "proximity"),
Criterion::Attribute => write!(f, "attribute"),
Criterion::Exactness => write!(f, "exactness"),
Criterion::Asc(field_name) => write!(f, "asc({})", field_name),
Criterion::Desc(field_name) => write!(f, "desc({})", field_name),
#[derive(Debug, Deserialize, Clone, PartialEq, Eq)]
pub enum AscDesc {
Asc(String),
Desc(String),
}
impl FromStr for AscDesc {
type Err = ();
// since we don't know if this comes from the old or new syntax we need to check
// for both syntax.
// WARN: this code doesn't come from the original meilisearch v0.22.0 but was
// written specifically to be able to import the dump of meilisearch v0.21.0 AND
// meilisearch v0.22.0.
fn from_str(text: &str) -> Result<AscDesc, Self::Err> {
if let Some((field_name, asc_desc)) = text.rsplit_once(':') {
match asc_desc {
"asc" => Ok(AscDesc::Asc(field_name.to_string())),
"desc" => Ok(AscDesc::Desc(field_name.to_string())),
_ => Err(()),
}
} else if text.starts_with("asc(") && text.ends_with(')') {
Ok(AscDesc::Asc(
text.strip_prefix("asc(").unwrap().strip_suffix(')').unwrap().to_string(),
))
} else if text.starts_with("desc(") && text.ends_with(')') {
Ok(AscDesc::Desc(
text.strip_prefix("desc(").unwrap().strip_suffix(')').unwrap().to_string(),
))
} else {
Err(())
}
}
}
impl fmt::Display for Criterion {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
use Criterion::*;
match self {
Words => f.write_str("words"),
Typo => f.write_str("typo"),
Proximity => f.write_str("proximity"),
Attribute => f.write_str("attribute"),
Sort => f.write_str("sort"),
Exactness => f.write_str("exactness"),
Asc(attr) => write!(f, "{}:asc", attr),
Desc(attr) => write!(f, "{}:desc", attr),
}
}
}

View File

@ -0,0 +1,25 @@
---
source: dump/src/reader/v2/mod.rs
expression: spells.settings().unwrap()
---
{
"displayedAttributes": [
"*"
],
"searchableAttributes": [
"*"
],
"filterableAttributes": [],
"sortableAttributes": [],
"rankingRules": [
"words",
"typo",
"proximity",
"attribute",
"sort",
"exactness"
],
"stopWords": [],
"synonyms": {},
"distinctAttribute": null
}

View File

@ -0,0 +1,39 @@
---
source: dump/src/reader/v2/mod.rs
expression: products.settings().unwrap()
---
{
"displayedAttributes": [
"*"
],
"searchableAttributes": [
"*"
],
"filterableAttributes": [],
"sortableAttributes": [],
"rankingRules": [
"words",
"typo",
"proximity",
"attribute",
"sort",
"exactness"
],
"stopWords": [],
"synonyms": {
"android": [
"phone",
"smartphone"
],
"iphone": [
"phone",
"smartphone"
],
"phone": [
"android",
"iphone",
"smartphone"
]
},
"distinctAttribute": null
}

View File

@ -0,0 +1,30 @@
---
source: dump/src/reader/v2/mod.rs
expression: movies.settings().unwrap()
---
{
"displayedAttributes": [
"*"
],
"searchableAttributes": [
"*"
],
"filterableAttributes": [
"genres",
"id"
],
"sortableAttributes": [
"release_date"
],
"rankingRules": [
"words",
"typo",
"proximity",
"attribute",
"exactness",
"release_date:asc"
],
"stopWords": [],
"synonyms": {},
"distinctAttribute": null
}

Binary file not shown.

View File

@ -98,14 +98,14 @@ async fn import_dump_v1_movie_with_settings() {
assert_eq!(code, 200);
assert_eq!(
settings,
json!({ "displayedAttributes": ["genres", "id", "overview", "poster", "release_date", "title"], "searchableAttributes": ["title", "overview"], "filterableAttributes": ["genres"], "sortableAttributes": [], "rankingRules": ["typo", "words", "proximity", "attribute", "exactness"], "stopWords": ["of", "the"], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": { "oneTypo": 5, "twoTypos": 9 }, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100 }, "pagination": { "maxTotalHits": 1000 } })
json!({ "displayedAttributes": ["genres", "id", "overview", "poster", "release_date", "title"], "searchableAttributes": ["title", "overview"], "filterableAttributes": ["genres"], "sortableAttributes": ["genres"], "rankingRules": ["typo", "words", "proximity", "attribute", "exactness"], "stopWords": ["of", "the"], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": { "oneTypo": 5, "twoTypos": 9 }, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100 }, "pagination": { "maxTotalHits": 1000 } })
);
let (tasks, code) = index.list_tasks().await;
assert_eq!(code, 200);
assert_eq!(
tasks,
json!({ "results": [{ "uid": 1, "indexUid": "indexUID", "status": "succeeded", "type": "settingsUpdate", "canceledBy": null, "details": { "displayedAttributes": ["genres", "id", "overview", "poster", "release_date", "title"], "searchableAttributes": ["title", "overview"], "filterableAttributes": ["genres"], "stopWords": ["of", "the"] }, "error": null, "duration": "PT7.288826907S", "enqueuedAt": "2021-09-08T09:34:40.882977Z", "startedAt": "2021-09-08T09:34:40.883073093Z", "finishedAt": "2021-09-08T09:34:48.1719Z"}, { "uid": 0, "indexUid": "indexUID", "status": "succeeded", "type": "documentAdditionOrUpdate", "canceledBy": null, "details": { "receivedDocuments": 0, "indexedDocuments": 31968 }, "error": null, "duration": "PT9.090735774S", "enqueuedAt": "2021-09-08T09:34:16.036101Z", "startedAt": "2021-09-08T09:34:16.261191226Z", "finishedAt": "2021-09-08T09:34:25.351927Z" }], "limit": 20, "from": 1, "next": null })
json!({ "results": [{ "uid": 1, "indexUid": "indexUID", "status": "succeeded", "type": "settingsUpdate", "canceledBy": null, "details": { "displayedAttributes": ["genres", "id", "overview", "poster", "release_date", "title"], "searchableAttributes": ["title", "overview"], "filterableAttributes": ["genres"], "sortableAttributes": ["genres"], "stopWords": ["of", "the"] }, "error": null, "duration": "PT7.288826907S", "enqueuedAt": "2021-09-08T09:34:40.882977Z", "startedAt": "2021-09-08T09:34:40.883073093Z", "finishedAt": "2021-09-08T09:34:48.1719Z"}, { "uid": 0, "indexUid": "indexUID", "status": "succeeded", "type": "documentAdditionOrUpdate", "canceledBy": null, "details": { "receivedDocuments": 0, "indexedDocuments": 31968 }, "error": null, "duration": "PT9.090735774S", "enqueuedAt": "2021-09-08T09:34:16.036101Z", "startedAt": "2021-09-08T09:34:16.261191226Z", "finishedAt": "2021-09-08T09:34:25.351927Z" }], "limit": 20, "from": 1, "next": null })
);
// finally we're just going to check that we can still get a few documents by id
@ -161,7 +161,7 @@ async fn import_dump_v1_rubygems_with_settings() {
assert_eq!(code, 200);
assert_eq!(
settings,
json!({"displayedAttributes": ["description", "id", "name", "summary", "total_downloads", "version"], "searchableAttributes": ["name", "summary"], "filterableAttributes": ["version"], "sortableAttributes": [], "rankingRules": ["typo", "words", "fame:desc", "proximity", "attribute", "exactness", "total_downloads:desc"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100 }, "pagination": { "maxTotalHits": 1000 }})
json!({"displayedAttributes": ["description", "id", "name", "summary", "total_downloads", "version"], "searchableAttributes": ["name", "summary"], "filterableAttributes": ["version"], "sortableAttributes": ["version"], "rankingRules": ["typo", "words", "fame:desc", "proximity", "attribute", "exactness", "total_downloads:desc"], "stopWords": [], "synonyms": {}, "distinctAttribute": null, "typoTolerance": {"enabled": true, "minWordSizeForTypos": {"oneTypo": 5, "twoTypos": 9}, "disableOnWords": [], "disableOnAttributes": [] }, "faceting": { "maxValuesPerFacet": 100 }, "pagination": { "maxTotalHits": 1000 }})
);
let (tasks, code) = index.list_tasks().await;