From 390eadb73385c7c680e3b6b0c5b97a7cad5341fb Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Tue, 17 Sep 2024 15:01:01 +0200 Subject: [PATCH 1/4] Support iso-639-1 --- Cargo.lock | 1 + meilisearch-types/Cargo.toml | 1 + meilisearch-types/src/locales.rs | 637 +++++++++++++++++++++++----- meilisearch/tests/search/locales.rs | 205 ++++++--- 4 files changed, 669 insertions(+), 175 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3a2b09da2..ca6231355 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3502,6 +3502,7 @@ dependencies = [ "serde", "serde-cs", "serde_json", + "strum", "tar", "tempfile", "thiserror", diff --git a/meilisearch-types/Cargo.toml b/meilisearch-types/Cargo.toml index 73306c4dc..237f21f47 100644 --- a/meilisearch-types/Cargo.toml +++ b/meilisearch-types/Cargo.toml @@ -27,6 +27,7 @@ roaring = { version = "0.10.6", features = ["serde"] } serde = { version = "1.0.204", features = ["derive"] } serde-cs = "0.2.4" serde_json = "1.0.120" +strum = { version = "0.26", features = ["derive"] } tar = "0.4.41" tempfile = "3.10.1" thiserror = "1.0.61" diff --git a/meilisearch-types/src/locales.rs b/meilisearch-types/src/locales.rs index c6902dd71..6b670f191 100644 --- a/meilisearch-types/src/locales.rs +++ b/meilisearch-types/src/locales.rs @@ -1,121 +1,397 @@ use deserr::Deserr; -use milli::LocalizedAttributesRule; +use milli::{tokenizer::Language, LocalizedAttributesRule}; use serde::{Deserialize, Serialize}; use serde_json::json; +use strum::{EnumIter, IntoEnumIterator}; -/// Generate a Locale enum and its From and Into implementations for milli::tokenizer::Language. -/// -/// this enum implements `Deserr` in order to be used in the API. -macro_rules! make_locale { - - ($($language:tt), +) => { - #[derive(Debug, Copy, Clone, PartialEq, Eq, Deserr, Serialize, Deserialize, Ord, PartialOrd)] - #[deserr(rename_all = camelCase)] - #[serde(rename_all = "camelCase")] - pub enum Locale { - $($language),+, - } - - impl From for Locale { - fn from(other: milli::tokenizer::Language) -> Locale { - match other { - $(milli::tokenizer::Language::$language => Locale::$language), + - } - } - } - - impl From for milli::tokenizer::Language { - fn from(other: Locale) -> milli::tokenizer::Language { - match other { - $(Locale::$language => milli::tokenizer::Language::$language), +, - } - } - } - - #[derive(Debug)] - pub struct LocaleFormatError { - pub invalid_locale: String, - } - - impl std::fmt::Display for LocaleFormatError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let valid_locales = [$(Locale::$language),+].iter().map(|l| format!("`{}`", json!(l).as_str().unwrap())).collect::>().join(", "); - write!(f, "Unsupported locale `{}`, expected one of {}", self.invalid_locale, valid_locales) - } - } - }; -} - -make_locale! { - Epo, - Eng, - Rus, - Cmn, - Spa, - Por, - Ita, - Ben, - Fra, - Deu, - Ukr, - Kat, - Ara, - Hin, - Jpn, - Heb, - Yid, - Pol, +#[derive( + Debug, Copy, Clone, PartialEq, Eq, Deserr, Serialize, Deserialize, Ord, PartialOrd, EnumIter, +)] +#[deserr(rename_all = camelCase)] +#[serde(rename_all = "camelCase")] +pub enum Locale { + // ISO 639-3 + Afr, + Aka, Amh, - Jav, - Kor, - Nob, - Dan, - Swe, - Fin, - Tur, - Nld, - Hun, - Ces, - Ell, - Bul, - Bel, - Mar, - Kan, - Ron, - Slv, - Hrv, - Srp, - Mkd, - Lit, - Lav, - Est, - Tam, - Vie, - Urd, - Tha, - Guj, - Uzb, - Pan, + Ara, Aze, + Bel, + Ben, + Bul, + Cat, + Ces, + Cmn, + Dan, + Deu, + Ell, + Eng, + Epo, + Est, + Fas, + Fin, + Fra, + Guj, + Heb, + Hin, + Hrv, + Hun, + Hye, Ind, - Tel, - Pes, + Ita, + Jav, + Jpn, + Kan, + Kat, + Khm, + Kor, + Lat, + Lav, + Lit, Mal, - Ori, + Mar, + Mkd, Mya, Nep, + Nld, + Nob, + Ori, + Pan, + Pes, + Pol, + Por, + Ron, + Rus, Sin, - Khm, - Tuk, - Aka, - Zul, - Sna, - Afr, - Lat, Slk, - Cat, + Slv, + Sna, + Spa, + Srp, + Swe, + Tam, + Tel, Tgl, - Hye + Tha, + Tuk, + Tur, + Ukr, + Urd, + Uzb, + Vie, + Yid, + Zho, + Zul, + // ISO 639-1 + Af, + Ak, + Am, + Ar, + Az, + Be, + Bn, + Bg, + Ca, + Cs, + Zh, + Da, + De, + El, + En, + Eo, + Et, + Fi, + Fr, + Gu, + He, + Hi, + Hr, + Hu, + Hy, + Id, + It, + Jv, + Ja, + Kn, + Ka, + Km, + Ko, + La, + Lv, + Lt, + Ml, + Mr, + Mk, + My, + Ne, + Nl, + Nb, + Or, + Pa, + Fa, + Pl, + Pt, + Ro, + Ru, + Si, + Sk, + Sl, + Sn, + Es, + Sr, + Sv, + Ta, + Te, + Tl, + Th, + Tk, + Tr, + Uk, + Ur, + Uz, + Vi, + Yi, + Zu, +} + +impl From for Language { + fn from(other: Locale) -> Language { + match other { + // ISO 639-3 + Locale::Afr => Language::Afr, + Locale::Aka => Language::Aka, + Locale::Amh => Language::Amh, + Locale::Ara => Language::Ara, + Locale::Aze => Language::Aze, + Locale::Bel => Language::Bel, + Locale::Ben => Language::Ben, + Locale::Bul => Language::Bul, + Locale::Cat => Language::Cat, + Locale::Ces => Language::Ces, + Locale::Cmn => Language::Cmn, + Locale::Dan => Language::Dan, + Locale::Deu => Language::Deu, + Locale::Ell => Language::Ell, + Locale::Eng => Language::Eng, + Locale::Epo => Language::Epo, + Locale::Est => Language::Est, + Locale::Fas => Language::Pes, + Locale::Fin => Language::Fin, + Locale::Fra => Language::Fra, + Locale::Guj => Language::Guj, + Locale::Heb => Language::Heb, + Locale::Hin => Language::Hin, + Locale::Hrv => Language::Hrv, + Locale::Hun => Language::Hun, + Locale::Hye => Language::Hye, + Locale::Ind => Language::Ind, + Locale::Ita => Language::Ita, + Locale::Jav => Language::Jav, + Locale::Jpn => Language::Jpn, + Locale::Kan => Language::Kan, + Locale::Kat => Language::Kat, + Locale::Khm => Language::Khm, + Locale::Kor => Language::Kor, + Locale::Lat => Language::Lat, + Locale::Lav => Language::Lav, + Locale::Lit => Language::Lit, + Locale::Mal => Language::Mal, + Locale::Mar => Language::Mar, + Locale::Mkd => Language::Mkd, + Locale::Mya => Language::Mya, + Locale::Nep => Language::Nep, + Locale::Nld => Language::Nld, + Locale::Nob => Language::Nob, + Locale::Ori => Language::Ori, + Locale::Pan => Language::Pan, + Locale::Pes => Language::Pes, + Locale::Pol => Language::Pol, + Locale::Por => Language::Por, + Locale::Ron => Language::Ron, + Locale::Rus => Language::Rus, + Locale::Sin => Language::Sin, + Locale::Slk => Language::Slk, + Locale::Slv => Language::Slv, + Locale::Sna => Language::Sna, + Locale::Spa => Language::Spa, + Locale::Srp => Language::Srp, + Locale::Swe => Language::Swe, + Locale::Tam => Language::Tam, + Locale::Tel => Language::Tel, + Locale::Tgl => Language::Tgl, + Locale::Tha => Language::Tha, + Locale::Tuk => Language::Tuk, + Locale::Tur => Language::Tur, + Locale::Ukr => Language::Ukr, + Locale::Urd => Language::Urd, + Locale::Uzb => Language::Uzb, + Locale::Vie => Language::Vie, + Locale::Yid => Language::Yid, + Locale::Zho => Language::Cmn, + Locale::Zul => Language::Zul, + // ISO 639-1 + Locale::Af => Language::Afr, + Locale::Ak => Language::Aka, + Locale::Am => Language::Amh, + Locale::Ar => Language::Ara, + Locale::Az => Language::Aze, + Locale::Be => Language::Bel, + Locale::Bn => Language::Ben, + Locale::Bg => Language::Bul, + Locale::Ca => Language::Cat, + Locale::Cs => Language::Ces, + Locale::Zh => Language::Cmn, + Locale::Da => Language::Dan, + Locale::De => Language::Deu, + Locale::El => Language::Ell, + Locale::En => Language::Eng, + Locale::Eo => Language::Epo, + Locale::Et => Language::Est, + Locale::Fi => Language::Fin, + Locale::Fr => Language::Fra, + Locale::Gu => Language::Guj, + Locale::He => Language::Heb, + Locale::Hi => Language::Hin, + Locale::Hr => Language::Hrv, + Locale::Hu => Language::Hun, + Locale::Hy => Language::Hye, + Locale::Id => Language::Ind, + Locale::It => Language::Ita, + Locale::Jv => Language::Jav, + Locale::Ja => Language::Jpn, + Locale::Kn => Language::Kan, + Locale::Ka => Language::Kat, + Locale::Km => Language::Khm, + Locale::Ko => Language::Kor, + Locale::La => Language::Lat, + Locale::Lv => Language::Lav, + Locale::Lt => Language::Lit, + Locale::Ml => Language::Mal, + Locale::Mr => Language::Mar, + Locale::Mk => Language::Mkd, + Locale::My => Language::Mya, + Locale::Ne => Language::Nep, + Locale::Nl => Language::Nld, + Locale::Nb => Language::Nob, + Locale::Or => Language::Ori, + Locale::Pa => Language::Pan, + Locale::Fa => Language::Pes, + Locale::Pl => Language::Pol, + Locale::Pt => Language::Por, + Locale::Ro => Language::Ron, + Locale::Ru => Language::Rus, + Locale::Si => Language::Sin, + Locale::Sk => Language::Slk, + Locale::Sl => Language::Slv, + Locale::Sn => Language::Sna, + Locale::Es => Language::Spa, + Locale::Sr => Language::Srp, + Locale::Sv => Language::Swe, + Locale::Ta => Language::Tam, + Locale::Te => Language::Tel, + Locale::Tl => Language::Tgl, + Locale::Th => Language::Tha, + Locale::Tk => Language::Tuk, + Locale::Tr => Language::Tur, + Locale::Uk => Language::Ukr, + Locale::Ur => Language::Urd, + Locale::Uz => Language::Uzb, + Locale::Vi => Language::Vie, + Locale::Yi => Language::Yid, + Locale::Zu => Language::Zul, + } + } +} + +impl From for Locale { + fn from(other: Language) -> Locale { + match other { + Language::Afr => Locale::Afr, + Language::Aka => Locale::Aka, + Language::Amh => Locale::Amh, + Language::Ara => Locale::Ara, + Language::Aze => Locale::Aze, + Language::Bel => Locale::Bel, + Language::Ben => Locale::Ben, + Language::Bul => Locale::Bul, + Language::Cat => Locale::Cat, + Language::Ces => Locale::Ces, + Language::Cmn => Locale::Zho, + Language::Dan => Locale::Dan, + Language::Deu => Locale::Deu, + Language::Ell => Locale::Ell, + Language::Eng => Locale::Eng, + Language::Epo => Locale::Epo, + Language::Est => Locale::Est, + Language::Fin => Locale::Fin, + Language::Fra => Locale::Fra, + Language::Guj => Locale::Guj, + Language::Heb => Locale::Heb, + Language::Hin => Locale::Hin, + Language::Hrv => Locale::Hrv, + Language::Hun => Locale::Hun, + Language::Hye => Locale::Hye, + Language::Ind => Locale::Ind, + Language::Ita => Locale::Ita, + Language::Jav => Locale::Jav, + Language::Jpn => Locale::Jpn, + Language::Kan => Locale::Kan, + Language::Kat => Locale::Kat, + Language::Khm => Locale::Khm, + Language::Kor => Locale::Kor, + Language::Lat => Locale::Lat, + Language::Lav => Locale::Lav, + Language::Lit => Locale::Lit, + Language::Mal => Locale::Mal, + Language::Mar => Locale::Mar, + Language::Mkd => Locale::Mkd, + Language::Mya => Locale::Mya, + Language::Nep => Locale::Nep, + Language::Nld => Locale::Nld, + Language::Nob => Locale::Nob, + Language::Ori => Locale::Ori, + Language::Pan => Locale::Pan, + Language::Pes => Locale::Fas, + Language::Pol => Locale::Pol, + Language::Por => Locale::Por, + Language::Ron => Locale::Ron, + Language::Rus => Locale::Rus, + Language::Sin => Locale::Sin, + Language::Slk => Locale::Slk, + Language::Slv => Locale::Slv, + Language::Sna => Locale::Sna, + Language::Spa => Locale::Spa, + Language::Srp => Locale::Srp, + Language::Swe => Locale::Swe, + Language::Tam => Locale::Tam, + Language::Tel => Locale::Tel, + Language::Tgl => Locale::Tgl, + Language::Tha => Locale::Tha, + Language::Tuk => Locale::Tuk, + Language::Tur => Locale::Tur, + Language::Ukr => Locale::Ukr, + Language::Urd => Locale::Urd, + Language::Uzb => Locale::Uzb, + Language::Vie => Locale::Vie, + Language::Yid => Locale::Yid, + Language::Zul => Locale::Zul, + } + } +} + +#[derive(Debug)] +pub struct LocaleFormatError { + pub invalid_locale: String, +} + +impl std::fmt::Display for LocaleFormatError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let valid_locales = Locale::iter() + .map(|l| format!("`{}`", json!(l).as_str().unwrap())) + .collect::>() + .join(", "); + write!(f, "Unsupported locale `{}`, expected one of {}", self.invalid_locale, valid_locales) + } } impl std::error::Error for LocaleFormatError {} @@ -124,9 +400,154 @@ impl std::str::FromStr for Locale { type Err = LocaleFormatError; fn from_str(s: &str) -> Result { - milli::tokenizer::Language::from_code(s) - .map(Self::from) - .ok_or(LocaleFormatError { invalid_locale: s.to_string() }) + let locale = match s { + // ISO 639-3 + "afr" => Locale::Afr, + "aka" => Locale::Aka, + "amh" => Locale::Amh, + "ara" => Locale::Ara, + "aze" => Locale::Aze, + "bel" => Locale::Bel, + "ben" => Locale::Ben, + "bul" => Locale::Bul, + "cat" => Locale::Cat, + "ces" => Locale::Ces, + "cmn" => Locale::Cmn, + "dan" => Locale::Dan, + "deu" => Locale::Deu, + "ell" => Locale::Ell, + "eng" => Locale::Eng, + "epo" => Locale::Epo, + "est" => Locale::Est, + "fas" => Locale::Fas, + "fin" => Locale::Fin, + "fra" => Locale::Fra, + "guj" => Locale::Guj, + "heb" => Locale::Heb, + "hin" => Locale::Hin, + "hrv" => Locale::Hrv, + "hun" => Locale::Hun, + "hye" => Locale::Hye, + "ind" => Locale::Ind, + "ita" => Locale::Ita, + "jav" => Locale::Jav, + "jpn" => Locale::Jpn, + "kan" => Locale::Kan, + "kat" => Locale::Kat, + "khm" => Locale::Khm, + "kor" => Locale::Kor, + "lat" => Locale::Lat, + "lav" => Locale::Lav, + "lit" => Locale::Lit, + "mal" => Locale::Mal, + "mar" => Locale::Mar, + "mkd" => Locale::Mkd, + "mya" => Locale::Mya, + "nep" => Locale::Nep, + "nld" => Locale::Nld, + "nob" => Locale::Nob, + "ori" => Locale::Ori, + "pan" => Locale::Pan, + "pes" => Locale::Pes, + "pol" => Locale::Pol, + "por" => Locale::Por, + "ron" => Locale::Ron, + "rus" => Locale::Rus, + "sin" => Locale::Sin, + "slk" => Locale::Slk, + "slv" => Locale::Slv, + "sna" => Locale::Sna, + "spa" => Locale::Spa, + "srp" => Locale::Srp, + "swe" => Locale::Swe, + "tam" => Locale::Tam, + "tel" => Locale::Tel, + "tgl" => Locale::Tgl, + "tha" => Locale::Tha, + "tuk" => Locale::Tuk, + "tur" => Locale::Tur, + "ukr" => Locale::Ukr, + "urd" => Locale::Urd, + "uzb" => Locale::Uzb, + "vie" => Locale::Vie, + "yid" => Locale::Yid, + "zho" => Locale::Zho, + "zul" => Locale::Zul, + // ISO 639-1 + "af" => Locale::Af, + "ak" => Locale::Ak, + "am" => Locale::Am, + "ar" => Locale::Ar, + "az" => Locale::Az, + "be" => Locale::Be, + "bn" => Locale::Bn, + "bg" => Locale::Bg, + "ca" => Locale::Ca, + "cs" => Locale::Cs, + "zh" => Locale::Zh, + "da" => Locale::Da, + "de" => Locale::De, + "el" => Locale::El, + "en" => Locale::En, + "eo" => Locale::Eo, + "et" => Locale::Et, + "fi" => Locale::Fi, + "fr" => Locale::Fr, + "gu" => Locale::Gu, + "he" => Locale::He, + "hi" => Locale::Hi, + "hr" => Locale::Hr, + "hu" => Locale::Hu, + "hy" => Locale::Hy, + "id" => Locale::Id, + "it" => Locale::It, + "jv" => Locale::Jv, + "ja" => Locale::Ja, + "kn" => Locale::Kn, + "ka" => Locale::Ka, + "km" => Locale::Km, + "ko" => Locale::Ko, + "la" => Locale::La, + "lv" => Locale::Lv, + "lt" => Locale::Lt, + "ml" => Locale::Ml, + "mr" => Locale::Mr, + "mk" => Locale::Mk, + "my" => Locale::My, + "ne" => Locale::Ne, + "nl" => Locale::Nl, + "nb" => Locale::Nb, + "or" => Locale::Or, + "pa" => Locale::Pa, + "fa" => Locale::Fa, + "pl" => Locale::Pl, + "pt" => Locale::Pt, + "ro" => Locale::Ro, + "ru" => Locale::Ru, + "si" => Locale::Si, + "sk" => Locale::Sk, + "sl" => Locale::Sl, + "sn" => Locale::Sn, + "es" => Locale::Es, + "sr" => Locale::Sr, + "sv" => Locale::Sv, + "ta" => Locale::Ta, + "te" => Locale::Te, + "tl" => Locale::Tl, + "th" => Locale::Th, + "tk" => Locale::Tk, + "tr" => Locale::Tr, + "uk" => Locale::Uk, + "ur" => Locale::Ur, + "uz" => Locale::Uz, + "vi" => Locale::Vi, + "yi" => Locale::Yi, + "zu" => Locale::Zu, + // otherwise + _ => return Err(LocaleFormatError { invalid_locale: s.to_string() }), + }; + + Ok(locale) } } diff --git a/meilisearch/tests/search/locales.rs b/meilisearch/tests/search/locales.rs index dbc4fcc30..ff6bb5f4e 100644 --- a/meilisearch/tests/search/locales.rs +++ b/meilisearch/tests/search/locales.rs @@ -1133,76 +1133,147 @@ async fn force_different_locales_with_pattern_nested() { ) .await; + // force japanese + index + .search( + json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToHighlight": ["*"]}), + |response, code| { + snapshot!(response, @r###" + { + "hits": [ + { + "document_en": { + "name": "Attack on Titan", + "description": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama", + "author": "Hajime Isayama" + }, + "document_ja": { + "name": "進撃の巨人", + "description": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。", + "author": "諫山 創" + }, + "document_zh": { + "name": "进击的巨人", + "description": "进击的巨人是日本的漫画系列,由諫山 創作画。", + "author": "諫山創" + }, + "id": 852, + "_vectors": { + "manual": [ + 1.0, + 2.0, + 3.0 + ] + }, + "_formatted": { + "document_en": { + "name": "Attack on Titan", + "description": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama", + "author": "Hajime Isayama" + }, + "document_ja": { + "name": "進撃の巨人", + "description": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。", + "author": "諫山 創" + }, + "document_zh": { + "name": "巨人", + "description": "巨人是日本的漫画系列,由諫山 創作画。", + "author": "諫山創" + }, + "id": "852", + "_vectors": { + "manual": [ + "1.0", + "2.0", + "3.0" + ] + } + } + } + ], + "query": "\"进击的巨人\"", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 1 + } + "###); + snapshot!(code, @"200 OK"); + }, + ) + .await; + // force japanese index .search( - json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "locales": ["ja"], "attributesToHighlight": ["*"]}), |response, code| { snapshot!(response, @r###" - { - "hits": [ - { - "document_en": { - "name": "Attack on Titan", - "description": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama", - "author": "Hajime Isayama" - }, - "document_ja": { - "name": "進撃の巨人", - "description": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。", - "author": "諫山 創" - }, - "document_zh": { - "name": "进击的巨人", - "description": "进击的巨人是日本的漫画系列,由諫山 創作画。", - "author": "諫山創" - }, - "id": 852, - "_vectors": { - "manual": [ - 1.0, - 2.0, - 3.0 - ] - }, - "_formatted": { - "document_en": { - "name": "Attack on Titan", - "description": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama", - "author": "Hajime Isayama" - }, - "document_ja": { - "name": "進撃の巨人", - "description": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。", - "author": "諫山 創" - }, - "document_zh": { - "name": "巨人", - "description": "巨人是日本的漫画系列,由諫山 創作画。", - "author": "諫山創" - }, - "id": "852", - "_vectors": { - "manual": [ - "1.0", - "2.0", - "3.0" - ] + { + "hits": [ + { + "document_en": { + "name": "Attack on Titan", + "description": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama", + "author": "Hajime Isayama" + }, + "document_ja": { + "name": "進撃の巨人", + "description": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。", + "author": "諫山 創" + }, + "document_zh": { + "name": "进击的巨人", + "description": "进击的巨人是日本的漫画系列,由諫山 創作画。", + "author": "諫山創" + }, + "id": 852, + "_vectors": { + "manual": [ + 1.0, + 2.0, + 3.0 + ] + }, + "_formatted": { + "document_en": { + "name": "Attack on Titan", + "description": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama", + "author": "Hajime Isayama" + }, + "document_ja": { + "name": "進撃の巨人", + "description": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。", + "author": "諫山 創" + }, + "document_zh": { + "name": "巨人", + "description": "巨人是日本的漫画系列,由諫山 創作画。", + "author": "諫山創" + }, + "id": "852", + "_vectors": { + "manual": [ + "1.0", + "2.0", + "3.0" + ] + } + } + } + ], + "query": "\"进击的巨人\"", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 1 } - } - } - ], - "query": "\"进击的巨人\"", - "processingTimeMs": "[duration]", - "limit": 20, - "offset": 0, - "estimatedTotalHits": 1 - } - "###); - snapshot!(code, @"200 OK"); - }, - ) - .await; + "###); + snapshot!(code, @"200 OK"); + }, + ) + .await; } #[actix_rt::test] @@ -1355,7 +1426,7 @@ async fn invalid_locales() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Unknown value `invalid` at `.locales[0]`: expected one of `epo`, `eng`, `rus`, `cmn`, `spa`, `por`, `ita`, `ben`, `fra`, `deu`, `ukr`, `kat`, `ara`, `hin`, `jpn`, `heb`, `yid`, `pol`, `amh`, `jav`, `kor`, `nob`, `dan`, `swe`, `fin`, `tur`, `nld`, `hun`, `ces`, `ell`, `bul`, `bel`, `mar`, `kan`, `ron`, `slv`, `hrv`, `srp`, `mkd`, `lit`, `lav`, `est`, `tam`, `vie`, `urd`, `tha`, `guj`, `uzb`, `pan`, `aze`, `ind`, `tel`, `pes`, `mal`, `ori`, `mya`, `nep`, `sin`, `khm`, `tuk`, `aka`, `zul`, `sna`, `afr`, `lat`, `slk`, `cat`, `tgl`, `hye`", + "message": "Unknown value `invalid` at `.locales[0]`: expected one of `afr`, `aka`, `amh`, `ara`, `aze`, `bel`, `ben`, `bul`, `cat`, `ces`, `cmn`, `dan`, `deu`, `ell`, `eng`, `epo`, `est`, `fas`, `fin`, `fra`, `guj`, `heb`, `hin`, `hrv`, `hun`, `hye`, `ind`, `ita`, `jav`, `jpn`, `kan`, `kat`, `khm`, `kor`, `lat`, `lav`, `lit`, `mal`, `mar`, `mkd`, `mya`, `nep`, `nld`, `nob`, `ori`, `pan`, `pes`, `pol`, `por`, `ron`, `rus`, `sin`, `slk`, `slv`, `sna`, `spa`, `srp`, `swe`, `tam`, `tel`, `tgl`, `tha`, `tuk`, `tur`, `ukr`, `urd`, `uzb`, `vie`, `yid`, `zho`, `zul`, `af`, `ak`, `am`, `ar`, `az`, `be`, `bn`, `bg`, `ca`, `cs`, `zh`, `da`, `de`, `el`, `en`, `eo`, `et`, `fi`, `fr`, `gu`, `he`, `hi`, `hr`, `hu`, `hy`, `id`, `it`, `jv`, `ja`, `kn`, `ka`, `km`, `ko`, `la`, `lv`, `lt`, `ml`, `mr`, `mk`, `my`, `ne`, `nl`, `nb`, `or`, `pa`, `fa`, `pl`, `pt`, `ro`, `ru`, `si`, `sk`, `sl`, `sn`, `es`, `sr`, `sv`, `ta`, `te`, `tl`, `th`, `tk`, `tr`, `uk`, `ur`, `uz`, `vi`, `yi`, `zu`", "code": "invalid_search_locales", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_locales" @@ -1368,7 +1439,7 @@ async fn invalid_locales() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Invalid value in parameter `locales`: Unsupported locale `invalid`, expected one of `epo`, `eng`, `rus`, `cmn`, `spa`, `por`, `ita`, `ben`, `fra`, `deu`, `ukr`, `kat`, `ara`, `hin`, `jpn`, `heb`, `yid`, `pol`, `amh`, `jav`, `kor`, `nob`, `dan`, `swe`, `fin`, `tur`, `nld`, `hun`, `ces`, `ell`, `bul`, `bel`, `mar`, `kan`, `ron`, `slv`, `hrv`, `srp`, `mkd`, `lit`, `lav`, `est`, `tam`, `vie`, `urd`, `tha`, `guj`, `uzb`, `pan`, `aze`, `ind`, `tel`, `pes`, `mal`, `ori`, `mya`, `nep`, `sin`, `khm`, `tuk`, `aka`, `zul`, `sna`, `afr`, `lat`, `slk`, `cat`, `tgl`, `hye`", + "message": "Invalid value in parameter `locales`: Unsupported locale `invalid`, expected one of `afr`, `aka`, `amh`, `ara`, `aze`, `bel`, `ben`, `bul`, `cat`, `ces`, `cmn`, `dan`, `deu`, `ell`, `eng`, `epo`, `est`, `fas`, `fin`, `fra`, `guj`, `heb`, `hin`, `hrv`, `hun`, `hye`, `ind`, `ita`, `jav`, `jpn`, `kan`, `kat`, `khm`, `kor`, `lat`, `lav`, `lit`, `mal`, `mar`, `mkd`, `mya`, `nep`, `nld`, `nob`, `ori`, `pan`, `pes`, `pol`, `por`, `ron`, `rus`, `sin`, `slk`, `slv`, `sna`, `spa`, `srp`, `swe`, `tam`, `tel`, `tgl`, `tha`, `tuk`, `tur`, `ukr`, `urd`, `uzb`, `vie`, `yid`, `zho`, `zul`, `af`, `ak`, `am`, `ar`, `az`, `be`, `bn`, `bg`, `ca`, `cs`, `zh`, `da`, `de`, `el`, `en`, `eo`, `et`, `fi`, `fr`, `gu`, `he`, `hi`, `hr`, `hu`, `hy`, `id`, `it`, `jv`, `ja`, `kn`, `ka`, `km`, `ko`, `la`, `lv`, `lt`, `ml`, `mr`, `mk`, `my`, `ne`, `nl`, `nb`, `or`, `pa`, `fa`, `pl`, `pt`, `ro`, `ru`, `si`, `sk`, `sl`, `sn`, `es`, `sr`, `sv`, `ta`, `te`, `tl`, `th`, `tk`, `tr`, `uk`, `ur`, `uz`, `vi`, `yi`, `zu`", "code": "invalid_search_locales", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_locales" @@ -1390,7 +1461,7 @@ async fn invalid_localized_attributes_rules() { .await; snapshot!(response, @r###" { - "message": "Unknown value `japan` at `.localizedAttributes[0].locales[0]`: expected one of `epo`, `eng`, `rus`, `cmn`, `spa`, `por`, `ita`, `ben`, `fra`, `deu`, `ukr`, `kat`, `ara`, `hin`, `jpn`, `heb`, `yid`, `pol`, `amh`, `jav`, `kor`, `nob`, `dan`, `swe`, `fin`, `tur`, `nld`, `hun`, `ces`, `ell`, `bul`, `bel`, `mar`, `kan`, `ron`, `slv`, `hrv`, `srp`, `mkd`, `lit`, `lav`, `est`, `tam`, `vie`, `urd`, `tha`, `guj`, `uzb`, `pan`, `aze`, `ind`, `tel`, `pes`, `mal`, `ori`, `mya`, `nep`, `sin`, `khm`, `tuk`, `aka`, `zul`, `sna`, `afr`, `lat`, `slk`, `cat`, `tgl`, `hye`", + "message": "Unknown value `japan` at `.localizedAttributes[0].locales[0]`: expected one of `afr`, `aka`, `amh`, `ara`, `aze`, `bel`, `ben`, `bul`, `cat`, `ces`, `cmn`, `dan`, `deu`, `ell`, `eng`, `epo`, `est`, `fas`, `fin`, `fra`, `guj`, `heb`, `hin`, `hrv`, `hun`, `hye`, `ind`, `ita`, `jav`, `jpn`, `kan`, `kat`, `khm`, `kor`, `lat`, `lav`, `lit`, `mal`, `mar`, `mkd`, `mya`, `nep`, `nld`, `nob`, `ori`, `pan`, `pes`, `pol`, `por`, `ron`, `rus`, `sin`, `slk`, `slv`, `sna`, `spa`, `srp`, `swe`, `tam`, `tel`, `tgl`, `tha`, `tuk`, `tur`, `ukr`, `urd`, `uzb`, `vie`, `yid`, `zho`, `zul`, `af`, `ak`, `am`, `ar`, `az`, `be`, `bn`, `bg`, `ca`, `cs`, `zh`, `da`, `de`, `el`, `en`, `eo`, `et`, `fi`, `fr`, `gu`, `he`, `hi`, `hr`, `hu`, `hy`, `id`, `it`, `jv`, `ja`, `kn`, `ka`, `km`, `ko`, `la`, `lv`, `lt`, `ml`, `mr`, `mk`, `my`, `ne`, `nl`, `nb`, `or`, `pa`, `fa`, `pl`, `pt`, `ro`, `ru`, `si`, `sk`, `sl`, `sn`, `es`, `sr`, `sv`, `ta`, `te`, `tl`, `th`, `tk`, `tr`, `uk`, `ur`, `uz`, `vi`, `yi`, `zu`", "code": "invalid_settings_localized_attributes", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_settings_localized_attributes" From a197d63ab64432fe123bc9743e8a9f3c21969f14 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Tue, 17 Sep 2024 15:30:12 +0200 Subject: [PATCH 2/4] simplify tests --- meilisearch/tests/search/locales.rs | 612 +++------------------------- 1 file changed, 54 insertions(+), 558 deletions(-) diff --git a/meilisearch/tests/search/locales.rs b/meilisearch/tests/search/locales.rs index ff6bb5f4e..f818898f1 100644 --- a/meilisearch/tests/search/locales.rs +++ b/meilisearch/tests/search/locales.rs @@ -103,41 +103,12 @@ async fn simple_search() { // english index - .search(json!({"q": "Atta", "attributesToHighlight": ["*"]}), |response, code| { + .search(json!({"q": "Atta", "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { "hits": [ { - "name_en": "Attack on Titan", - "name_ja": "進撃の巨人", - "author_en": "Hajime Isayama", - "author_ja": "諫山 創", - "description_en": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama", - "description_ja": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。", - "id": 852, - "_vectors": { - "manual": [ - 1.0, - 2.0, - 3.0 - ] - }, - "_formatted": { - "name_en": "Attack on Titan", - "name_ja": "進撃の巨人", - "author_en": "Hajime Isayama", - "author_ja": "諫山 創", - "description_en": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama", - "description_ja": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。", - "id": "852", - "_vectors": { - "manual": [ - "1.0", - "2.0", - "3.0" - ] - } - } + "id": 852 } ], "query": "Atta", @@ -153,35 +124,12 @@ async fn simple_search() { // japanese index - .search(json!({"q": "進撃", "attributesToHighlight": ["*"]}), |response, code| { + .search(json!({"q": "進撃", "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { "hits": [ { - "name_zh": "进击的巨人", - "author_zh": "諫山創", - "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。", - "id": 853, - "_vectors": { - "manual": [ - 1.0, - 2.0, - 3.0 - ] - }, - "_formatted": { - "name_zh": "进击的巨人", - "author_zh": "諫山創", - "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。", - "id": "853", - "_vectors": { - "manual": [ - "1.0", - "2.0", - "3.0" - ] - } - } + "id": 853 } ], "query": "進撃", @@ -197,68 +145,16 @@ async fn simple_search() { index .search( - json!({"q": "進撃", "locales": ["jpn"], "attributesToHighlight": ["*"]}), + json!({"q": "進撃", "locales": ["jpn"], "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { "hits": [ { - "name_en": "Attack on Titan", - "name_ja": "進撃の巨人", - "author_en": "Hajime Isayama", - "author_ja": "諫山 創", - "description_en": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama", - "description_ja": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。", - "id": 852, - "_vectors": { - "manual": [ - 1.0, - 2.0, - 3.0 - ] - }, - "_formatted": { - "name_en": "Attack on Titan", - "name_ja": "進撃の巨人", - "author_en": "Hajime Isayama", - "author_ja": "諫山 創", - "description_en": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama", - "description_ja": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。", - "id": "852", - "_vectors": { - "manual": [ - "1.0", - "2.0", - "3.0" - ] - } - } + "id": 852 }, { - "name_zh": "进击的巨人", - "author_zh": "諫山創", - "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。", - "id": 853, - "_vectors": { - "manual": [ - 1.0, - 2.0, - 3.0 - ] - }, - "_formatted": { - "name_zh": "进击的巨人", - "author_zh": "諫山創", - "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。", - "id": "853", - "_vectors": { - "manual": [ - "1.0", - "2.0", - "3.0" - ] - } - } + "id": 853 } ], "query": "進撃", @@ -275,67 +171,15 @@ async fn simple_search() { // chinese index - .search(json!({"q": "进击", "attributesToHighlight": ["*"]}), |response, code| { + .search(json!({"q": "进击", "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { "hits": [ { - "name_zh": "进击的巨人", - "author_zh": "諫山創", - "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。", - "id": 853, - "_vectors": { - "manual": [ - 1.0, - 2.0, - 3.0 - ] - }, - "_formatted": { - "name_zh": "进击的巨人", - "author_zh": "諫山創", - "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。", - "id": "853", - "_vectors": { - "manual": [ - "1.0", - "2.0", - "3.0" - ] - } - } + "id": 853 }, { - "name_en": "Attack on Titan", - "name_ja": "進撃の巨人", - "author_en": "Hajime Isayama", - "author_ja": "諫山 創", - "description_en": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama", - "description_ja": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。", - "id": 852, - "_vectors": { - "manual": [ - 1.0, - 2.0, - 3.0 - ] - }, - "_formatted": { - "name_en": "Attack on Titan", - "name_ja": "進撃の巨人", - "author_en": "Hajime Isayama", - "author_ja": "諫山 創", - "description_en": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama", - "description_ja": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。", - "id": "852", - "_vectors": { - "manual": [ - "1.0", - "2.0", - "3.0" - ] - } - } + "id": 852 } ], "query": "进击", @@ -382,36 +226,13 @@ async fn force_locales() { // chinese detection index .search( - json!({"q": "\"进击的巨人\"", "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { "hits": [ { - "name_zh": "进击的巨人", - "author_zh": "諫山創", - "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。", - "id": 853, - "_vectors": { - "manual": [ - 1.0, - 2.0, - 3.0 - ] - }, - "_formatted": { - "name_zh": "巨人", - "author_zh": "諫山創", - "description_zh": "巨人是日本的漫画系列,由諫山 創作画。", - "id": "853", - "_vectors": { - "manual": [ - "1.0", - "2.0", - "3.0" - ] - } - } + "id": 853 } ], "query": "\"进击的巨人\"", @@ -429,36 +250,13 @@ async fn force_locales() { // force japanese index .search( - json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { "hits": [ { - "name_zh": "进击的巨人", - "author_zh": "諫山創", - "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。", - "id": 853, - "_vectors": { - "manual": [ - 1.0, - 2.0, - 3.0 - ] - }, - "_formatted": { - "name_zh": "巨人", - "author_zh": "諫山創", - "description_zh": "巨人是日本的漫画系列,由諫山 創作画。", - "id": "853", - "_vectors": { - "manual": [ - "1.0", - "2.0", - "3.0" - ] - } - } + "id": 853 } ], "query": "\"进击的巨人\"", @@ -506,36 +304,13 @@ async fn force_locales_with_pattern() { // chinese detection index .search( - json!({"q": "\"进击的巨人\"", "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { "hits": [ { - "name_zh": "进击的巨人", - "author_zh": "諫山創", - "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。", - "id": 853, - "_vectors": { - "manual": [ - 1.0, - 2.0, - 3.0 - ] - }, - "_formatted": { - "name_zh": "巨人", - "author_zh": "諫山創", - "description_zh": "巨人是日本的漫画系列,由諫山 創作画。", - "id": "853", - "_vectors": { - "manual": [ - "1.0", - "2.0", - "3.0" - ] - } - } + "id": 853 } ], "query": "\"进击的巨人\"", @@ -553,36 +328,13 @@ async fn force_locales_with_pattern() { // force japanese index .search( - json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { "hits": [ { - "name_zh": "进击的巨人", - "author_zh": "諫山創", - "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。", - "id": 853, - "_vectors": { - "manual": [ - 1.0, - 2.0, - 3.0 - ] - }, - "_formatted": { - "name_zh": "巨人", - "author_zh": "諫山創", - "description_zh": "巨人是日本的漫画系列,由諫山 創作画。", - "id": "853", - "_vectors": { - "manual": [ - "1.0", - "2.0", - "3.0" - ] - } - } + "id": 853 } ], "query": "\"进击的巨人\"", @@ -628,7 +380,7 @@ async fn force_locales_with_pattern_nested() { // chinese index .search( - json!({"q": "\"进击的巨人\"", "locales": ["cmn"], "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "locales": ["cmn"], "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { @@ -648,60 +400,13 @@ async fn force_locales_with_pattern_nested() { // force japanese index .search( - json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { "hits": [ { - "document_en": { - "name": "Attack on Titan", - "description": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama", - "author": "Hajime Isayama" - }, - "document_ja": { - "name": "進撃の巨人", - "description": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。", - "author": "諫山 創" - }, - "document_zh": { - "name": "进击的巨人", - "description": "进击的巨人是日本的漫画系列,由諫山 創作画。", - "author": "諫山創" - }, - "id": 852, - "_vectors": { - "manual": [ - 1.0, - 2.0, - 3.0 - ] - }, - "_formatted": { - "document_en": { - "name": "Attack on Titan", - "description": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama", - "author": "Hajime Isayama" - }, - "document_ja": { - "name": "進撃の巨人", - "description": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。", - "author": "諫山 創" - }, - "document_zh": { - "name": "巨人", - "description": "巨人是日本的漫画系列,由諫山 創作画。", - "author": "諫山創" - }, - "id": "852", - "_vectors": { - "manual": [ - "1.0", - "2.0", - "3.0" - ] - } - } + "id": 852 } ], "query": "\"进击的巨人\"", @@ -750,7 +455,7 @@ async fn force_different_locales_with_pattern() { // force chinese index .search( - json!({"q": "\"进击的巨人\"", "locales": ["cmn"], "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "locales": ["cmn"], "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { @@ -770,36 +475,13 @@ async fn force_different_locales_with_pattern() { // force japanese index .search( - json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { "hits": [ { - "name_zh": "进击的巨人", - "author_zh": "諫山創", - "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。", - "id": 853, - "_vectors": { - "manual": [ - 1.0, - 2.0, - 3.0 - ] - }, - "_formatted": { - "name_zh": "巨人", - "author_zh": "諫山創", - "description_zh": "巨人是日本的漫画系列,由諫山 創作画。", - "id": "853", - "_vectors": { - "manual": [ - "1.0", - "2.0", - "3.0" - ] - } - } + "id": 853 } ], "query": "\"进击的巨人\"", @@ -851,7 +533,7 @@ async fn auto_infer_locales_at_search_with_attributes_to_search_on() { // auto infer any language index .search( - json!({"q": "\"进击的巨人\"", "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { @@ -871,36 +553,13 @@ async fn auto_infer_locales_at_search_with_attributes_to_search_on() { // should infer chinese index .search( - json!({"q": "\"进击的巨人\"", "attributesToHighlight": ["*"], "attributesToSearchOn": ["name_zh", "description_zh"]}), + json!({"q": "\"进击的巨人\"", "attributesToRetrieve": ["id"], "attributesToSearchOn": ["name_zh", "description_zh"]}), |response, code| { snapshot!(response, @r###" { "hits": [ { - "name_zh": "进击的巨人", - "author_zh": "諫山創", - "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。", - "id": 853, - "_vectors": { - "manual": [ - 1.0, - 2.0, - 3.0 - ] - }, - "_formatted": { - "name_zh": "巨人", - "author_zh": "諫山創", - "description_zh": "巨人是日本的漫画系列,由諫山 創作画。", - "id": "853", - "_vectors": { - "manual": [ - "1.0", - "2.0", - "3.0" - ] - } - } + "id": 853 } ], "query": "\"进击的巨人\"", @@ -947,36 +606,13 @@ async fn auto_infer_locales_at_search() { index .search( - json!({"q": "\"进击的巨人\"", "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { "hits": [ { - "name_zh": "进击的巨人", - "author_zh": "諫山創", - "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。", - "id": 853, - "_vectors": { - "manual": [ - 1.0, - 2.0, - 3.0 - ] - }, - "_formatted": { - "name_zh": "巨人", - "author_zh": "諫山創", - "description_zh": "巨人是日本的漫画系列,由諫山 創作画。", - "id": "853", - "_vectors": { - "manual": [ - "1.0", - "2.0", - "3.0" - ] - } - } + "id": 853 } ], "query": "\"进击的巨人\"", @@ -993,36 +629,13 @@ async fn auto_infer_locales_at_search() { index .search( - json!({"q": "\"进击的巨人\"", "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { "hits": [ { - "name_zh": "进击的巨人", - "author_zh": "諫山創", - "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。", - "id": 853, - "_vectors": { - "manual": [ - 1.0, - 2.0, - 3.0 - ] - }, - "_formatted": { - "name_zh": "巨人", - "author_zh": "諫山創", - "description_zh": "巨人是日本的漫画系列,由諫山 創作画。", - "id": "853", - "_vectors": { - "manual": [ - "1.0", - "2.0", - "3.0" - ] - } - } + "id": 853 } ], "query": "\"进击的巨人\"", @@ -1039,36 +652,13 @@ async fn auto_infer_locales_at_search() { index .search( - json!({"q": "\"进击的巨人\"", "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { "hits": [ { - "name_zh": "进击的巨人", - "author_zh": "諫山創", - "description_zh": "进击的巨人是日本的漫画系列,由諫山 創作画。", - "id": 853, - "_vectors": { - "manual": [ - 1.0, - 2.0, - 3.0 - ] - }, - "_formatted": { - "name_zh": "巨人", - "author_zh": "諫山創", - "description_zh": "巨人是日本的漫画系列,由諫山 創作画。", - "id": "853", - "_vectors": { - "manual": [ - "1.0", - "2.0", - "3.0" - ] - } - } + "id": 853 } ], "query": "\"进击的巨人\"", @@ -1116,7 +706,7 @@ async fn force_different_locales_with_pattern_nested() { // chinese index .search( - json!({"q": "\"进击的巨人\"", "locales": ["cmn"], "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "locales": ["cmn"], "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { @@ -1136,60 +726,13 @@ async fn force_different_locales_with_pattern_nested() { // force japanese index .search( - json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { "hits": [ { - "document_en": { - "name": "Attack on Titan", - "description": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama", - "author": "Hajime Isayama" - }, - "document_ja": { - "name": "進撃の巨人", - "description": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。", - "author": "諫山 創" - }, - "document_zh": { - "name": "进击的巨人", - "description": "进击的巨人是日本的漫画系列,由諫山 創作画。", - "author": "諫山創" - }, - "id": 852, - "_vectors": { - "manual": [ - 1.0, - 2.0, - 3.0 - ] - }, - "_formatted": { - "document_en": { - "name": "Attack on Titan", - "description": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama", - "author": "Hajime Isayama" - }, - "document_ja": { - "name": "進撃の巨人", - "description": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。", - "author": "諫山 創" - }, - "document_zh": { - "name": "巨人", - "description": "巨人是日本的漫画系列,由諫山 創作画。", - "author": "諫山創" - }, - "id": "852", - "_vectors": { - "manual": [ - "1.0", - "2.0", - "3.0" - ] - } - } + "id": 852 } ], "query": "\"进击的巨人\"", @@ -1207,69 +750,22 @@ async fn force_different_locales_with_pattern_nested() { // force japanese index .search( - json!({"q": "\"进击的巨人\"", "locales": ["ja"], "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "locales": ["ja"], "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" - { - "hits": [ - { - "document_en": { - "name": "Attack on Titan", - "description": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama", - "author": "Hajime Isayama" - }, - "document_ja": { - "name": "進撃の巨人", - "description": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。", - "author": "諫山 創" - }, - "document_zh": { - "name": "进击的巨人", - "description": "进击的巨人是日本的漫画系列,由諫山 創作画。", - "author": "諫山創" - }, - "id": 852, - "_vectors": { - "manual": [ - 1.0, - 2.0, - 3.0 - ] - }, - "_formatted": { - "document_en": { - "name": "Attack on Titan", - "description": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama", - "author": "Hajime Isayama" - }, - "document_ja": { - "name": "進撃の巨人", - "description": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。", - "author": "諫山 創" - }, - "document_zh": { - "name": "巨人", - "description": "巨人是日本的漫画系列,由諫山 創作画。", - "author": "諫山創" - }, - "id": "852", - "_vectors": { - "manual": [ - "1.0", - "2.0", - "3.0" - ] - } - } - } - ], - "query": "\"进击的巨人\"", - "processingTimeMs": "[duration]", - "limit": 20, - "offset": 0, - "estimatedTotalHits": 1 - } - "###); + { + "hits": [ + { + "id": 852 + } + ], + "query": "\"进击的巨人\"", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 1 + } + "###); snapshot!(code, @"200 OK"); }, ) @@ -1307,7 +803,7 @@ async fn settings_change() { // chinese index .search( - json!({"q": "\"进击的巨人\"", "locales": ["cmn"], "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "locales": ["cmn"], "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { @@ -1327,7 +823,7 @@ async fn settings_change() { // force japanese index .search( - json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { @@ -1370,7 +866,7 @@ async fn settings_change() { // chinese index .search( - json!({"q": "\"进击的巨人\"", "locales": ["cmn"], "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "locales": ["cmn"], "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { @@ -1390,7 +886,7 @@ async fn settings_change() { // force japanese index .search( - json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToHighlight": ["*"]}), + json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToRetrieve": ["id"]}), |response, code| { snapshot!(response, @r###" { From 0fbf9ea5b15b689a39a5ffadbf7c65ea593a8f6b Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Tue, 17 Sep 2024 16:59:13 +0200 Subject: [PATCH 3/4] Factorize using macro --- Cargo.lock | 1 - meilisearch-types/Cargo.toml | 1 - meilisearch-types/src/locales.rs | 683 ++++++---------------------- meilisearch/tests/search/locales.rs | 6 +- 4 files changed, 136 insertions(+), 555 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ca6231355..3a2b09da2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3502,7 +3502,6 @@ dependencies = [ "serde", "serde-cs", "serde_json", - "strum", "tar", "tempfile", "thiserror", diff --git a/meilisearch-types/Cargo.toml b/meilisearch-types/Cargo.toml index 237f21f47..73306c4dc 100644 --- a/meilisearch-types/Cargo.toml +++ b/meilisearch-types/Cargo.toml @@ -27,7 +27,6 @@ roaring = { version = "0.10.6", features = ["serde"] } serde = { version = "1.0.204", features = ["derive"] } serde-cs = "0.2.4" serde_json = "1.0.120" -strum = { version = "0.26", features = ["derive"] } tar = "0.4.41" tempfile = "3.10.1" thiserror = "1.0.61" diff --git a/meilisearch-types/src/locales.rs b/meilisearch-types/src/locales.rs index 6b670f191..36c45aac3 100644 --- a/meilisearch-types/src/locales.rs +++ b/meilisearch-types/src/locales.rs @@ -1,555 +1,6 @@ use deserr::Deserr; -use milli::{tokenizer::Language, LocalizedAttributesRule}; +use milli::LocalizedAttributesRule; use serde::{Deserialize, Serialize}; -use serde_json::json; -use strum::{EnumIter, IntoEnumIterator}; - -#[derive( - Debug, Copy, Clone, PartialEq, Eq, Deserr, Serialize, Deserialize, Ord, PartialOrd, EnumIter, -)] -#[deserr(rename_all = camelCase)] -#[serde(rename_all = "camelCase")] -pub enum Locale { - // ISO 639-3 - Afr, - Aka, - Amh, - Ara, - Aze, - Bel, - Ben, - Bul, - Cat, - Ces, - Cmn, - Dan, - Deu, - Ell, - Eng, - Epo, - Est, - Fas, - Fin, - Fra, - Guj, - Heb, - Hin, - Hrv, - Hun, - Hye, - Ind, - Ita, - Jav, - Jpn, - Kan, - Kat, - Khm, - Kor, - Lat, - Lav, - Lit, - Mal, - Mar, - Mkd, - Mya, - Nep, - Nld, - Nob, - Ori, - Pan, - Pes, - Pol, - Por, - Ron, - Rus, - Sin, - Slk, - Slv, - Sna, - Spa, - Srp, - Swe, - Tam, - Tel, - Tgl, - Tha, - Tuk, - Tur, - Ukr, - Urd, - Uzb, - Vie, - Yid, - Zho, - Zul, - // ISO 639-1 - Af, - Ak, - Am, - Ar, - Az, - Be, - Bn, - Bg, - Ca, - Cs, - Zh, - Da, - De, - El, - En, - Eo, - Et, - Fi, - Fr, - Gu, - He, - Hi, - Hr, - Hu, - Hy, - Id, - It, - Jv, - Ja, - Kn, - Ka, - Km, - Ko, - La, - Lv, - Lt, - Ml, - Mr, - Mk, - My, - Ne, - Nl, - Nb, - Or, - Pa, - Fa, - Pl, - Pt, - Ro, - Ru, - Si, - Sk, - Sl, - Sn, - Es, - Sr, - Sv, - Ta, - Te, - Tl, - Th, - Tk, - Tr, - Uk, - Ur, - Uz, - Vi, - Yi, - Zu, -} - -impl From for Language { - fn from(other: Locale) -> Language { - match other { - // ISO 639-3 - Locale::Afr => Language::Afr, - Locale::Aka => Language::Aka, - Locale::Amh => Language::Amh, - Locale::Ara => Language::Ara, - Locale::Aze => Language::Aze, - Locale::Bel => Language::Bel, - Locale::Ben => Language::Ben, - Locale::Bul => Language::Bul, - Locale::Cat => Language::Cat, - Locale::Ces => Language::Ces, - Locale::Cmn => Language::Cmn, - Locale::Dan => Language::Dan, - Locale::Deu => Language::Deu, - Locale::Ell => Language::Ell, - Locale::Eng => Language::Eng, - Locale::Epo => Language::Epo, - Locale::Est => Language::Est, - Locale::Fas => Language::Pes, - Locale::Fin => Language::Fin, - Locale::Fra => Language::Fra, - Locale::Guj => Language::Guj, - Locale::Heb => Language::Heb, - Locale::Hin => Language::Hin, - Locale::Hrv => Language::Hrv, - Locale::Hun => Language::Hun, - Locale::Hye => Language::Hye, - Locale::Ind => Language::Ind, - Locale::Ita => Language::Ita, - Locale::Jav => Language::Jav, - Locale::Jpn => Language::Jpn, - Locale::Kan => Language::Kan, - Locale::Kat => Language::Kat, - Locale::Khm => Language::Khm, - Locale::Kor => Language::Kor, - Locale::Lat => Language::Lat, - Locale::Lav => Language::Lav, - Locale::Lit => Language::Lit, - Locale::Mal => Language::Mal, - Locale::Mar => Language::Mar, - Locale::Mkd => Language::Mkd, - Locale::Mya => Language::Mya, - Locale::Nep => Language::Nep, - Locale::Nld => Language::Nld, - Locale::Nob => Language::Nob, - Locale::Ori => Language::Ori, - Locale::Pan => Language::Pan, - Locale::Pes => Language::Pes, - Locale::Pol => Language::Pol, - Locale::Por => Language::Por, - Locale::Ron => Language::Ron, - Locale::Rus => Language::Rus, - Locale::Sin => Language::Sin, - Locale::Slk => Language::Slk, - Locale::Slv => Language::Slv, - Locale::Sna => Language::Sna, - Locale::Spa => Language::Spa, - Locale::Srp => Language::Srp, - Locale::Swe => Language::Swe, - Locale::Tam => Language::Tam, - Locale::Tel => Language::Tel, - Locale::Tgl => Language::Tgl, - Locale::Tha => Language::Tha, - Locale::Tuk => Language::Tuk, - Locale::Tur => Language::Tur, - Locale::Ukr => Language::Ukr, - Locale::Urd => Language::Urd, - Locale::Uzb => Language::Uzb, - Locale::Vie => Language::Vie, - Locale::Yid => Language::Yid, - Locale::Zho => Language::Cmn, - Locale::Zul => Language::Zul, - // ISO 639-1 - Locale::Af => Language::Afr, - Locale::Ak => Language::Aka, - Locale::Am => Language::Amh, - Locale::Ar => Language::Ara, - Locale::Az => Language::Aze, - Locale::Be => Language::Bel, - Locale::Bn => Language::Ben, - Locale::Bg => Language::Bul, - Locale::Ca => Language::Cat, - Locale::Cs => Language::Ces, - Locale::Zh => Language::Cmn, - Locale::Da => Language::Dan, - Locale::De => Language::Deu, - Locale::El => Language::Ell, - Locale::En => Language::Eng, - Locale::Eo => Language::Epo, - Locale::Et => Language::Est, - Locale::Fi => Language::Fin, - Locale::Fr => Language::Fra, - Locale::Gu => Language::Guj, - Locale::He => Language::Heb, - Locale::Hi => Language::Hin, - Locale::Hr => Language::Hrv, - Locale::Hu => Language::Hun, - Locale::Hy => Language::Hye, - Locale::Id => Language::Ind, - Locale::It => Language::Ita, - Locale::Jv => Language::Jav, - Locale::Ja => Language::Jpn, - Locale::Kn => Language::Kan, - Locale::Ka => Language::Kat, - Locale::Km => Language::Khm, - Locale::Ko => Language::Kor, - Locale::La => Language::Lat, - Locale::Lv => Language::Lav, - Locale::Lt => Language::Lit, - Locale::Ml => Language::Mal, - Locale::Mr => Language::Mar, - Locale::Mk => Language::Mkd, - Locale::My => Language::Mya, - Locale::Ne => Language::Nep, - Locale::Nl => Language::Nld, - Locale::Nb => Language::Nob, - Locale::Or => Language::Ori, - Locale::Pa => Language::Pan, - Locale::Fa => Language::Pes, - Locale::Pl => Language::Pol, - Locale::Pt => Language::Por, - Locale::Ro => Language::Ron, - Locale::Ru => Language::Rus, - Locale::Si => Language::Sin, - Locale::Sk => Language::Slk, - Locale::Sl => Language::Slv, - Locale::Sn => Language::Sna, - Locale::Es => Language::Spa, - Locale::Sr => Language::Srp, - Locale::Sv => Language::Swe, - Locale::Ta => Language::Tam, - Locale::Te => Language::Tel, - Locale::Tl => Language::Tgl, - Locale::Th => Language::Tha, - Locale::Tk => Language::Tuk, - Locale::Tr => Language::Tur, - Locale::Uk => Language::Ukr, - Locale::Ur => Language::Urd, - Locale::Uz => Language::Uzb, - Locale::Vi => Language::Vie, - Locale::Yi => Language::Yid, - Locale::Zu => Language::Zul, - } - } -} - -impl From for Locale { - fn from(other: Language) -> Locale { - match other { - Language::Afr => Locale::Afr, - Language::Aka => Locale::Aka, - Language::Amh => Locale::Amh, - Language::Ara => Locale::Ara, - Language::Aze => Locale::Aze, - Language::Bel => Locale::Bel, - Language::Ben => Locale::Ben, - Language::Bul => Locale::Bul, - Language::Cat => Locale::Cat, - Language::Ces => Locale::Ces, - Language::Cmn => Locale::Zho, - Language::Dan => Locale::Dan, - Language::Deu => Locale::Deu, - Language::Ell => Locale::Ell, - Language::Eng => Locale::Eng, - Language::Epo => Locale::Epo, - Language::Est => Locale::Est, - Language::Fin => Locale::Fin, - Language::Fra => Locale::Fra, - Language::Guj => Locale::Guj, - Language::Heb => Locale::Heb, - Language::Hin => Locale::Hin, - Language::Hrv => Locale::Hrv, - Language::Hun => Locale::Hun, - Language::Hye => Locale::Hye, - Language::Ind => Locale::Ind, - Language::Ita => Locale::Ita, - Language::Jav => Locale::Jav, - Language::Jpn => Locale::Jpn, - Language::Kan => Locale::Kan, - Language::Kat => Locale::Kat, - Language::Khm => Locale::Khm, - Language::Kor => Locale::Kor, - Language::Lat => Locale::Lat, - Language::Lav => Locale::Lav, - Language::Lit => Locale::Lit, - Language::Mal => Locale::Mal, - Language::Mar => Locale::Mar, - Language::Mkd => Locale::Mkd, - Language::Mya => Locale::Mya, - Language::Nep => Locale::Nep, - Language::Nld => Locale::Nld, - Language::Nob => Locale::Nob, - Language::Ori => Locale::Ori, - Language::Pan => Locale::Pan, - Language::Pes => Locale::Fas, - Language::Pol => Locale::Pol, - Language::Por => Locale::Por, - Language::Ron => Locale::Ron, - Language::Rus => Locale::Rus, - Language::Sin => Locale::Sin, - Language::Slk => Locale::Slk, - Language::Slv => Locale::Slv, - Language::Sna => Locale::Sna, - Language::Spa => Locale::Spa, - Language::Srp => Locale::Srp, - Language::Swe => Locale::Swe, - Language::Tam => Locale::Tam, - Language::Tel => Locale::Tel, - Language::Tgl => Locale::Tgl, - Language::Tha => Locale::Tha, - Language::Tuk => Locale::Tuk, - Language::Tur => Locale::Tur, - Language::Ukr => Locale::Ukr, - Language::Urd => Locale::Urd, - Language::Uzb => Locale::Uzb, - Language::Vie => Locale::Vie, - Language::Yid => Locale::Yid, - Language::Zul => Locale::Zul, - } - } -} - -#[derive(Debug)] -pub struct LocaleFormatError { - pub invalid_locale: String, -} - -impl std::fmt::Display for LocaleFormatError { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - let valid_locales = Locale::iter() - .map(|l| format!("`{}`", json!(l).as_str().unwrap())) - .collect::>() - .join(", "); - write!(f, "Unsupported locale `{}`, expected one of {}", self.invalid_locale, valid_locales) - } -} - -impl std::error::Error for LocaleFormatError {} - -impl std::str::FromStr for Locale { - type Err = LocaleFormatError; - - fn from_str(s: &str) -> Result { - let locale = match s { - // ISO 639-3 - "afr" => Locale::Afr, - "aka" => Locale::Aka, - "amh" => Locale::Amh, - "ara" => Locale::Ara, - "aze" => Locale::Aze, - "bel" => Locale::Bel, - "ben" => Locale::Ben, - "bul" => Locale::Bul, - "cat" => Locale::Cat, - "ces" => Locale::Ces, - "cmn" => Locale::Cmn, - "dan" => Locale::Dan, - "deu" => Locale::Deu, - "ell" => Locale::Ell, - "eng" => Locale::Eng, - "epo" => Locale::Epo, - "est" => Locale::Est, - "fas" => Locale::Fas, - "fin" => Locale::Fin, - "fra" => Locale::Fra, - "guj" => Locale::Guj, - "heb" => Locale::Heb, - "hin" => Locale::Hin, - "hrv" => Locale::Hrv, - "hun" => Locale::Hun, - "hye" => Locale::Hye, - "ind" => Locale::Ind, - "ita" => Locale::Ita, - "jav" => Locale::Jav, - "jpn" => Locale::Jpn, - "kan" => Locale::Kan, - "kat" => Locale::Kat, - "khm" => Locale::Khm, - "kor" => Locale::Kor, - "lat" => Locale::Lat, - "lav" => Locale::Lav, - "lit" => Locale::Lit, - "mal" => Locale::Mal, - "mar" => Locale::Mar, - "mkd" => Locale::Mkd, - "mya" => Locale::Mya, - "nep" => Locale::Nep, - "nld" => Locale::Nld, - "nob" => Locale::Nob, - "ori" => Locale::Ori, - "pan" => Locale::Pan, - "pes" => Locale::Pes, - "pol" => Locale::Pol, - "por" => Locale::Por, - "ron" => Locale::Ron, - "rus" => Locale::Rus, - "sin" => Locale::Sin, - "slk" => Locale::Slk, - "slv" => Locale::Slv, - "sna" => Locale::Sna, - "spa" => Locale::Spa, - "srp" => Locale::Srp, - "swe" => Locale::Swe, - "tam" => Locale::Tam, - "tel" => Locale::Tel, - "tgl" => Locale::Tgl, - "tha" => Locale::Tha, - "tuk" => Locale::Tuk, - "tur" => Locale::Tur, - "ukr" => Locale::Ukr, - "urd" => Locale::Urd, - "uzb" => Locale::Uzb, - "vie" => Locale::Vie, - "yid" => Locale::Yid, - "zho" => Locale::Zho, - "zul" => Locale::Zul, - // ISO 639-1 - "af" => Locale::Af, - "ak" => Locale::Ak, - "am" => Locale::Am, - "ar" => Locale::Ar, - "az" => Locale::Az, - "be" => Locale::Be, - "bn" => Locale::Bn, - "bg" => Locale::Bg, - "ca" => Locale::Ca, - "cs" => Locale::Cs, - "zh" => Locale::Zh, - "da" => Locale::Da, - "de" => Locale::De, - "el" => Locale::El, - "en" => Locale::En, - "eo" => Locale::Eo, - "et" => Locale::Et, - "fi" => Locale::Fi, - "fr" => Locale::Fr, - "gu" => Locale::Gu, - "he" => Locale::He, - "hi" => Locale::Hi, - "hr" => Locale::Hr, - "hu" => Locale::Hu, - "hy" => Locale::Hy, - "id" => Locale::Id, - "it" => Locale::It, - "jv" => Locale::Jv, - "ja" => Locale::Ja, - "kn" => Locale::Kn, - "ka" => Locale::Ka, - "km" => Locale::Km, - "ko" => Locale::Ko, - "la" => Locale::La, - "lv" => Locale::Lv, - "lt" => Locale::Lt, - "ml" => Locale::Ml, - "mr" => Locale::Mr, - "mk" => Locale::Mk, - "my" => Locale::My, - "ne" => Locale::Ne, - "nl" => Locale::Nl, - "nb" => Locale::Nb, - "or" => Locale::Or, - "pa" => Locale::Pa, - "fa" => Locale::Fa, - "pl" => Locale::Pl, - "pt" => Locale::Pt, - "ro" => Locale::Ro, - "ru" => Locale::Ru, - "si" => Locale::Si, - "sk" => Locale::Sk, - "sl" => Locale::Sl, - "sn" => Locale::Sn, - "es" => Locale::Es, - "sr" => Locale::Sr, - "sv" => Locale::Sv, - "ta" => Locale::Ta, - "te" => Locale::Te, - "tl" => Locale::Tl, - "th" => Locale::Th, - "tk" => Locale::Tk, - "tr" => Locale::Tr, - "uk" => Locale::Uk, - "ur" => Locale::Ur, - "uz" => Locale::Uz, - "vi" => Locale::Vi, - "yi" => Locale::Yi, - "zu" => Locale::Zu, - // otherwise - _ => return Err(LocaleFormatError { invalid_locale: s.to_string() }), - }; - - Ok(locale) - } -} #[derive(Debug, Clone, PartialEq, Eq, Deserr, Serialize, Deserialize)] #[deserr(rename_all = camelCase)] @@ -576,3 +27,135 @@ impl From for LocalizedAttributesRule { } } } + +/// Generate a Locale enum and its From and Into implementations for milli::tokenizer::Language. +/// +/// this enum implements `Deserr` in order to be used in the API. +macro_rules! make_locale { + ($(($iso_639_1:ident, $iso_639_1_str:expr) => ($iso_639_3:ident, $iso_639_3_str:expr),)+) => { + #[derive(Debug, Copy, Clone, PartialEq, Eq, Deserr, Serialize, Deserialize, Ord, PartialOrd)] + #[deserr(rename_all = camelCase)] + #[serde(rename_all = "camelCase")] + pub enum Locale { + $($iso_639_1,)+ + $($iso_639_3,)+ + } + + impl From for Locale { + fn from(other: milli::tokenizer::Language) -> Locale { + match other { + $(milli::tokenizer::Language::$iso_639_3 => Locale::$iso_639_3,)+ + } + } + } + + impl From for milli::tokenizer::Language { + fn from(other: Locale) -> milli::tokenizer::Language { + match other { + $(Locale::$iso_639_1 => milli::tokenizer::Language::$iso_639_3,)+ + $(Locale::$iso_639_3 => milli::tokenizer::Language::$iso_639_3,)+ + } + } + } + + impl std::str::FromStr for Locale { + type Err = LocaleFormatError; + + fn from_str(s: &str) -> Result { + let locale = match s { + $($iso_639_1_str => Locale::$iso_639_1,)+ + $($iso_639_3_str => Locale::$iso_639_3,)+ + _ => return Err(LocaleFormatError { invalid_locale: s.to_string() }), + }; + + Ok(locale) + } + } + + #[derive(Debug)] + pub struct LocaleFormatError { + pub invalid_locale: String, + } + + impl std::fmt::Display for LocaleFormatError { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let valid_locales = [$($iso_639_1_str),+,$($iso_639_3_str),+].join(", "); + write!(f, "Unsupported locale `{}`, expected one of {}", self.invalid_locale, valid_locales) + } + } + + impl std::error::Error for LocaleFormatError {} + }; +} + +make_locale!( + (Af, "af") => (Afr, "afr"), + (Ak, "ak") => (Aka, "aka"), + (Am, "am") => (Amh, "amh"), + (Ar, "ar") => (Ara, "ara"), + (Az, "az") => (Aze, "aze"), + (Be, "be") => (Bel, "bel"), + (Bn, "bn") => (Ben, "ben"), + (Bg, "bg") => (Bul, "bul"), + (Ca, "ca") => (Cat, "cat"), + (Cs, "cs") => (Ces, "ces"), + (Zh, "zh") => (Cmn, "cmn"), + (Da, "da") => (Dan, "dan"), + (De, "de") => (Deu, "deu"), + (El, "el") => (Ell, "ell"), + (En, "en") => (Eng, "eng"), + (Eo, "eo") => (Epo, "epo"), + (Et, "et") => (Est, "est"), + (Fi, "fi") => (Fin, "fin"), + (Fr, "fr") => (Fra, "fra"), + (Gu, "gu") => (Guj, "guj"), + (He, "he") => (Heb, "heb"), + (Hi, "hi") => (Hin, "hin"), + (Hr, "hr") => (Hrv, "hrv"), + (Hu, "hu") => (Hun, "hun"), + (Hy, "hy") => (Hye, "hye"), + (Id, "id") => (Ind, "ind"), + (It, "it") => (Ita, "ita"), + (Jv, "jv") => (Jav, "jav"), + (Ja, "ja") => (Jpn, "jpn"), + (Kn, "kn") => (Kan, "kan"), + (Ka, "ka") => (Kat, "kat"), + (Km, "km") => (Khm, "khm"), + (Ko, "ko") => (Kor, "kor"), + (La, "la") => (Lat, "lat"), + (Lv, "lv") => (Lav, "lav"), + (Lt, "lt") => (Lit, "lit"), + (Ml, "ml") => (Mal, "mal"), + (Mr, "mr") => (Mar, "mar"), + (Mk, "mk") => (Mkd, "mkd"), + (My, "my") => (Mya, "mya"), + (Ne, "ne") => (Nep, "nep"), + (Nl, "nl") => (Nld, "nld"), + (Nb, "nb") => (Nob, "nob"), + (Or, "or") => (Ori, "ori"), + (Pa, "pa") => (Pan, "pan"), + (Fa, "fa") => (Pes, "pes"), + (Pl, "pl") => (Pol, "pol"), + (Pt, "pt") => (Por, "por"), + (Ro, "ro") => (Ron, "ron"), + (Ru, "ru") => (Rus, "rus"), + (Si, "si") => (Sin, "sin"), + (Sk, "sk") => (Slk, "slk"), + (Sl, "sl") => (Slv, "slv"), + (Sn, "sn") => (Sna, "sna"), + (Es, "es") => (Spa, "spa"), + (Sr, "sr") => (Srp, "srp"), + (Sv, "sv") => (Swe, "swe"), + (Ta, "ta") => (Tam, "tam"), + (Te, "te") => (Tel, "tel"), + (Tl, "tl") => (Tgl, "tgl"), + (Th, "th") => (Tha, "tha"), + (Tk, "tk") => (Tuk, "tuk"), + (Tr, "tr") => (Tur, "tur"), + (Uk, "uk") => (Ukr, "ukr"), + (Ur, "ur") => (Urd, "urd"), + (Uz, "uz") => (Uzb, "uzb"), + (Vi, "vi") => (Vie, "vie"), + (Yi, "yi") => (Yid, "yid"), + (Zu, "zu") => (Zul, "zul"), +); diff --git a/meilisearch/tests/search/locales.rs b/meilisearch/tests/search/locales.rs index f818898f1..3ac35ab5d 100644 --- a/meilisearch/tests/search/locales.rs +++ b/meilisearch/tests/search/locales.rs @@ -922,7 +922,7 @@ async fn invalid_locales() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Unknown value `invalid` at `.locales[0]`: expected one of `afr`, `aka`, `amh`, `ara`, `aze`, `bel`, `ben`, `bul`, `cat`, `ces`, `cmn`, `dan`, `deu`, `ell`, `eng`, `epo`, `est`, `fas`, `fin`, `fra`, `guj`, `heb`, `hin`, `hrv`, `hun`, `hye`, `ind`, `ita`, `jav`, `jpn`, `kan`, `kat`, `khm`, `kor`, `lat`, `lav`, `lit`, `mal`, `mar`, `mkd`, `mya`, `nep`, `nld`, `nob`, `ori`, `pan`, `pes`, `pol`, `por`, `ron`, `rus`, `sin`, `slk`, `slv`, `sna`, `spa`, `srp`, `swe`, `tam`, `tel`, `tgl`, `tha`, `tuk`, `tur`, `ukr`, `urd`, `uzb`, `vie`, `yid`, `zho`, `zul`, `af`, `ak`, `am`, `ar`, `az`, `be`, `bn`, `bg`, `ca`, `cs`, `zh`, `da`, `de`, `el`, `en`, `eo`, `et`, `fi`, `fr`, `gu`, `he`, `hi`, `hr`, `hu`, `hy`, `id`, `it`, `jv`, `ja`, `kn`, `ka`, `km`, `ko`, `la`, `lv`, `lt`, `ml`, `mr`, `mk`, `my`, `ne`, `nl`, `nb`, `or`, `pa`, `fa`, `pl`, `pt`, `ro`, `ru`, `si`, `sk`, `sl`, `sn`, `es`, `sr`, `sv`, `ta`, `te`, `tl`, `th`, `tk`, `tr`, `uk`, `ur`, `uz`, `vi`, `yi`, `zu`", + "message": "Unknown value `invalid` at `.locales[0]`: expected one of `af`, `ak`, `am`, `ar`, `az`, `be`, `bn`, `bg`, `ca`, `cs`, `zh`, `da`, `de`, `el`, `en`, `eo`, `et`, `fi`, `fr`, `gu`, `he`, `hi`, `hr`, `hu`, `hy`, `id`, `it`, `jv`, `ja`, `kn`, `ka`, `km`, `ko`, `la`, `lv`, `lt`, `ml`, `mr`, `mk`, `my`, `ne`, `nl`, `nb`, `or`, `pa`, `fa`, `pl`, `pt`, `ro`, `ru`, `si`, `sk`, `sl`, `sn`, `es`, `sr`, `sv`, `ta`, `te`, `tl`, `th`, `tk`, `tr`, `uk`, `ur`, `uz`, `vi`, `yi`, `zu`, `afr`, `aka`, `amh`, `ara`, `aze`, `bel`, `ben`, `bul`, `cat`, `ces`, `cmn`, `dan`, `deu`, `ell`, `eng`, `epo`, `est`, `fin`, `fra`, `guj`, `heb`, `hin`, `hrv`, `hun`, `hye`, `ind`, `ita`, `jav`, `jpn`, `kan`, `kat`, `khm`, `kor`, `lat`, `lav`, `lit`, `mal`, `mar`, `mkd`, `mya`, `nep`, `nld`, `nob`, `ori`, `pan`, `pes`, `pol`, `por`, `ron`, `rus`, `sin`, `slk`, `slv`, `sna`, `spa`, `srp`, `swe`, `tam`, `tel`, `tgl`, `tha`, `tuk`, `tur`, `ukr`, `urd`, `uzb`, `vie`, `yid`, `zul`", "code": "invalid_search_locales", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_locales" @@ -935,7 +935,7 @@ async fn invalid_locales() { snapshot!(code, @"400 Bad Request"); snapshot!(json_string!(response), @r###" { - "message": "Invalid value in parameter `locales`: Unsupported locale `invalid`, expected one of `afr`, `aka`, `amh`, `ara`, `aze`, `bel`, `ben`, `bul`, `cat`, `ces`, `cmn`, `dan`, `deu`, `ell`, `eng`, `epo`, `est`, `fas`, `fin`, `fra`, `guj`, `heb`, `hin`, `hrv`, `hun`, `hye`, `ind`, `ita`, `jav`, `jpn`, `kan`, `kat`, `khm`, `kor`, `lat`, `lav`, `lit`, `mal`, `mar`, `mkd`, `mya`, `nep`, `nld`, `nob`, `ori`, `pan`, `pes`, `pol`, `por`, `ron`, `rus`, `sin`, `slk`, `slv`, `sna`, `spa`, `srp`, `swe`, `tam`, `tel`, `tgl`, `tha`, `tuk`, `tur`, `ukr`, `urd`, `uzb`, `vie`, `yid`, `zho`, `zul`, `af`, `ak`, `am`, `ar`, `az`, `be`, `bn`, `bg`, `ca`, `cs`, `zh`, `da`, `de`, `el`, `en`, `eo`, `et`, `fi`, `fr`, `gu`, `he`, `hi`, `hr`, `hu`, `hy`, `id`, `it`, `jv`, `ja`, `kn`, `ka`, `km`, `ko`, `la`, `lv`, `lt`, `ml`, `mr`, `mk`, `my`, `ne`, `nl`, `nb`, `or`, `pa`, `fa`, `pl`, `pt`, `ro`, `ru`, `si`, `sk`, `sl`, `sn`, `es`, `sr`, `sv`, `ta`, `te`, `tl`, `th`, `tk`, `tr`, `uk`, `ur`, `uz`, `vi`, `yi`, `zu`", + "message": "Invalid value in parameter `locales`: Unsupported locale `invalid`, expected one of af, ak, am, ar, az, be, bn, bg, ca, cs, zh, da, de, el, en, eo, et, fi, fr, gu, he, hi, hr, hu, hy, id, it, jv, ja, kn, ka, km, ko, la, lv, lt, ml, mr, mk, my, ne, nl, nb, or, pa, fa, pl, pt, ro, ru, si, sk, sl, sn, es, sr, sv, ta, te, tl, th, tk, tr, uk, ur, uz, vi, yi, zu, afr, aka, amh, ara, aze, bel, ben, bul, cat, ces, cmn, dan, deu, ell, eng, epo, est, fin, fra, guj, heb, hin, hrv, hun, hye, ind, ita, jav, jpn, kan, kat, khm, kor, lat, lav, lit, mal, mar, mkd, mya, nep, nld, nob, ori, pan, pes, pol, por, ron, rus, sin, slk, slv, sna, spa, srp, swe, tam, tel, tgl, tha, tuk, tur, ukr, urd, uzb, vie, yid, zul", "code": "invalid_search_locales", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_search_locales" @@ -957,7 +957,7 @@ async fn invalid_localized_attributes_rules() { .await; snapshot!(response, @r###" { - "message": "Unknown value `japan` at `.localizedAttributes[0].locales[0]`: expected one of `afr`, `aka`, `amh`, `ara`, `aze`, `bel`, `ben`, `bul`, `cat`, `ces`, `cmn`, `dan`, `deu`, `ell`, `eng`, `epo`, `est`, `fas`, `fin`, `fra`, `guj`, `heb`, `hin`, `hrv`, `hun`, `hye`, `ind`, `ita`, `jav`, `jpn`, `kan`, `kat`, `khm`, `kor`, `lat`, `lav`, `lit`, `mal`, `mar`, `mkd`, `mya`, `nep`, `nld`, `nob`, `ori`, `pan`, `pes`, `pol`, `por`, `ron`, `rus`, `sin`, `slk`, `slv`, `sna`, `spa`, `srp`, `swe`, `tam`, `tel`, `tgl`, `tha`, `tuk`, `tur`, `ukr`, `urd`, `uzb`, `vie`, `yid`, `zho`, `zul`, `af`, `ak`, `am`, `ar`, `az`, `be`, `bn`, `bg`, `ca`, `cs`, `zh`, `da`, `de`, `el`, `en`, `eo`, `et`, `fi`, `fr`, `gu`, `he`, `hi`, `hr`, `hu`, `hy`, `id`, `it`, `jv`, `ja`, `kn`, `ka`, `km`, `ko`, `la`, `lv`, `lt`, `ml`, `mr`, `mk`, `my`, `ne`, `nl`, `nb`, `or`, `pa`, `fa`, `pl`, `pt`, `ro`, `ru`, `si`, `sk`, `sl`, `sn`, `es`, `sr`, `sv`, `ta`, `te`, `tl`, `th`, `tk`, `tr`, `uk`, `ur`, `uz`, `vi`, `yi`, `zu`", + "message": "Unknown value `japan` at `.localizedAttributes[0].locales[0]`: expected one of `af`, `ak`, `am`, `ar`, `az`, `be`, `bn`, `bg`, `ca`, `cs`, `zh`, `da`, `de`, `el`, `en`, `eo`, `et`, `fi`, `fr`, `gu`, `he`, `hi`, `hr`, `hu`, `hy`, `id`, `it`, `jv`, `ja`, `kn`, `ka`, `km`, `ko`, `la`, `lv`, `lt`, `ml`, `mr`, `mk`, `my`, `ne`, `nl`, `nb`, `or`, `pa`, `fa`, `pl`, `pt`, `ro`, `ru`, `si`, `sk`, `sl`, `sn`, `es`, `sr`, `sv`, `ta`, `te`, `tl`, `th`, `tk`, `tr`, `uk`, `ur`, `uz`, `vi`, `yi`, `zu`, `afr`, `aka`, `amh`, `ara`, `aze`, `bel`, `ben`, `bul`, `cat`, `ces`, `cmn`, `dan`, `deu`, `ell`, `eng`, `epo`, `est`, `fin`, `fra`, `guj`, `heb`, `hin`, `hrv`, `hun`, `hye`, `ind`, `ita`, `jav`, `jpn`, `kan`, `kat`, `khm`, `kor`, `lat`, `lav`, `lit`, `mal`, `mar`, `mkd`, `mya`, `nep`, `nld`, `nob`, `ori`, `pan`, `pes`, `pol`, `por`, `ron`, `rus`, `sin`, `slk`, `slv`, `sna`, `spa`, `srp`, `swe`, `tam`, `tel`, `tgl`, `tha`, `tuk`, `tur`, `ukr`, `urd`, `uzb`, `vie`, `yid`, `zul`", "code": "invalid_settings_localized_attributes", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#invalid_settings_localized_attributes" From 6e058709f2d76511e4b48723e2af08e5f589fbad Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Tue, 17 Sep 2024 17:02:06 +0200 Subject: [PATCH 4/4] Rustfmt --- meilisearch/tests/search/locales.rs | 40 ++++++++++++++--------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/meilisearch/tests/search/locales.rs b/meilisearch/tests/search/locales.rs index 3ac35ab5d..4724f975d 100644 --- a/meilisearch/tests/search/locales.rs +++ b/meilisearch/tests/search/locales.rs @@ -628,10 +628,10 @@ async fn auto_infer_locales_at_search() { .await; index - .search( - json!({"q": "\"进击的巨人\"", "attributesToRetrieve": ["id"]}), - |response, code| { - snapshot!(response, @r###" + .search( + json!({"q": "\"进击的巨人\"", "attributesToRetrieve": ["id"]}), + |response, code| { + snapshot!(response, @r###" { "hits": [ { @@ -645,10 +645,10 @@ async fn auto_infer_locales_at_search() { "estimatedTotalHits": 1 } "###); - snapshot!(code, @"200 OK"); - }, - ) - .await; + snapshot!(code, @"200 OK"); + }, + ) + .await; index .search( @@ -725,10 +725,10 @@ async fn force_different_locales_with_pattern_nested() { // force japanese index - .search( - json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToRetrieve": ["id"]}), - |response, code| { - snapshot!(response, @r###" + .search( + json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToRetrieve": ["id"]}), + |response, code| { + snapshot!(response, @r###" { "hits": [ { @@ -742,10 +742,10 @@ async fn force_different_locales_with_pattern_nested() { "estimatedTotalHits": 1 } "###); - snapshot!(code, @"200 OK"); - }, - ) - .await; + snapshot!(code, @"200 OK"); + }, + ) + .await; // force japanese index @@ -766,10 +766,10 @@ async fn force_different_locales_with_pattern_nested() { "estimatedTotalHits": 1 } "###); - snapshot!(code, @"200 OK"); - }, - ) - .await; + snapshot!(code, @"200 OK"); + }, + ) + .await; } #[actix_rt::test]