Merge pull request #456 from djKooks/update/cjk-filter-ko-ja

Update CJK filter
This commit is contained in:
Clément Renault 2020-01-30 09:46:08 +01:00 committed by GitHub
commit 43ce45f62b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -4,15 +4,22 @@ use slice_group_by::StrGroupBy;
use std::iter::Peekable;
pub fn is_cjk(c: char) -> bool {
(c >= '\u{2e80}' && c <= '\u{2eff}')
|| (c >= '\u{2f00}' && c <= '\u{2fdf}')
|| (c >= '\u{3040}' && c <= '\u{309f}')
|| (c >= '\u{30a0}' && c <= '\u{30ff}')
(c >= '\u{1100}' && c <= '\u{11ff}') // Hangul Jamo
|| (c >= '\u{2e80}' && c <= '\u{2eff}') // CJK Radicals Supplement
|| (c >= '\u{2f00}' && c <= '\u{2fdf}') // Kangxi radical
|| (c >= '\u{3000}' && c <= '\u{303f}') // Japanese-style punctuation
|| (c >= '\u{3040}' && c <= '\u{309f}') // Japanese Hiragana
|| (c >= '\u{30a0}' && c <= '\u{30ff}') // Japanese Katakana
|| (c >= '\u{3100}' && c <= '\u{312f}')
|| (c >= '\u{3200}' && c <= '\u{32ff}')
|| (c >= '\u{3400}' && c <= '\u{4dbf}')
|| (c >= '\u{4e00}' && c <= '\u{9fff}')
|| (c >= '\u{f900}' && c <= '\u{faff}')
|| (c >= '\u{3130}' && c <= '\u{318F}') // Hangul Compatibility Jamo
|| (c >= '\u{3200}' && c <= '\u{32ff}') // Enclosed CJK Letters and Months
|| (c >= '\u{3400}' && c <= '\u{4dbf}') // CJK Unified Ideographs Extension A
|| (c >= '\u{4e00}' && c <= '\u{9fff}') // CJK Unified Ideographs
|| (c >= '\u{a960}' && c <= '\u{a97f}') // Hangul Jamo Extended-A
|| (c >= '\u{ac00}' && c <= '\u{d7a3}') // Hangul Syllables
|| (c >= '\u{d7b0}' && c <= '\u{d7ff}') // Hangul Jamo Extended-B
|| (c >= '\u{f900}' && c <= '\u{faff}') // CJK Compatibility Ideographs
|| (c >= '\u{ff00}' && c <= '\u{ffef}') // Full-width roman characters and half-width katakana
}
#[derive(Debug, Copy, Clone, PartialEq, Eq)]