Add missing Korean support

Some configuration is missing `korean` features and
add a test case in `milli/src/search/mod.rs`.
This commit is contained in:
Junho Choi 2024-05-30 17:28:11 +09:00
parent cade18bd47
commit 2e0ff56f3f
3 changed files with 27 additions and 0 deletions

View File

@ -49,6 +49,8 @@ chinese-pinyin = ["milli/chinese-pinyin"]
hebrew = ["milli/hebrew"]
# japanese specialized tokenization
japanese = ["milli/japanese"]
# korean specialized tokenization
korean = ["milli/korean"]
# thai specialized tokenization
thai = ["milli/thai"]
# allow greek specialized tokenization

View File

@ -151,6 +151,7 @@ chinese = ["meilisearch-types/chinese"]
chinese-pinyin = ["meilisearch-types/chinese-pinyin"]
hebrew = ["meilisearch-types/hebrew"]
japanese = ["meilisearch-types/japanese"]
korean = ["meilisearch-types/korean"]
thai = ["meilisearch-types/thai"]
greek = ["meilisearch-types/greek"]
khmer = ["meilisearch-types/khmer"]

View File

@ -336,4 +336,28 @@ mod test {
assert_eq!(documents_ids, vec![1]);
}
#[cfg(feature = "korean")]
#[test]
fn test_hangul_language_detection() {
use crate::index::tests::TempIndex;
let index = TempIndex::new();
index
.add_documents(documents!([
{ "id": 0, "title": "The quick (\"brown\") fox can't jump 32.3 feet, right? Brr, it's 29.3°F!" },
{ "id": 1, "title": "김밥먹을래。" },
{ "id": 2, "title": "הַשּׁוּעָל הַמָּהִיר (״הַחוּם״) לֹא יָכוֹל לִקְפֹּץ 9.94 מֶטְרִים, נָכוֹן? ברר, 1.5°C- בַּחוּץ!" }
]))
.unwrap();
let txn = index.write_txn().unwrap();
let mut search = Search::new(&txn, &index);
search.query("김밥");
let SearchResult { documents_ids, .. } = search.execute().unwrap();
assert_eq!(documents_ids, vec![1]);
}
}