Update cjk filter

This commit is contained in:
kwangin.jung 2020-01-30 09:55:16 +09:00
parent cde8845143
commit 2b5d153361
1 changed files with 15 additions and 8 deletions

View File

@ -4,15 +4,22 @@ use slice_group_by::StrGroupBy;
use std::iter::Peekable;
pub fn is_cjk(c: char) -> bool {
(c >= '\u{2e80}' && c <= '\u{2eff}')
|| (c >= '\u{2f00}' && c <= '\u{2fdf}')
|| (c >= '\u{3040}' && c <= '\u{309f}')
|| (c >= '\u{30a0}' && c <= '\u{30ff}')
(c >= '\u{1100}' && c <= '\u{11ff}') // Hangul Jamo
|| (c >= '\u{2e80}' && c <= '\u{2eff}') // CJK Radicals Supplement
|| (c >= '\u{2f00}' && c <= '\u{2fdf}') // Kangxi radical
|| (c >= '\u{3000}' && c <= '\u{303f}') // Japanese-style punctuation
|| (c >= '\u{3040}' && c <= '\u{309f}') // Japanese Hiragana
|| (c >= '\u{30a0}' && c <= '\u{30ff}') // Japanese Katakana
|| (c >= '\u{3100}' && c <= '\u{312f}')
|| (c >= '\u{3200}' && c <= '\u{32ff}')
|| (c >= '\u{3400}' && c <= '\u{4dbf}')
|| (c >= '\u{4e00}' && c <= '\u{9fff}')
|| (c >= '\u{f900}' && c <= '\u{faff}')
|| (c >= '\u{3130}' && c <= '\u{318F}') // Hangul Compatibility Jamo
|| (c >= '\u{3200}' && c <= '\u{32ff}') // Enclosed CJK Letters and Months
|| (c >= '\u{3400}' && c <= '\u{4dbf}') // CJK Unified Ideographs Extension A
|| (c >= '\u{4e00}' && c <= '\u{9fff}') // CJK Unified Ideographs
|| (c >= '\u{a960}' && c <= '\u{a97f}') // Hangul Jamo Extended-A
|| (c >= '\u{ac00}' && c <= '\u{d7a3}') // Hangul Syllables
|| (c >= '\u{d7b0}' && c <= '\u{d7ff}') // Hangul Jamo Extended-B
|| (c >= '\u{f900}' && c <= '\u{faff}') // CJK Compatibility Ideographs
|| (c >= '\u{ff00}' && c <= '\u{ffef}') // Full-width roman characters and half-width katakana
}
#[derive(Debug, Copy, Clone, PartialEq, Eq)]