pub fn is_cjk_ideograph(c: char) -> bool {
matches!(c,
'\u{4E00}'..='\u{9FFF}' | '\u{3400}'..='\u{4DBF}' | '\u{20000}'..='\u{2A6DF}' | '\u{2A700}'..='\u{2B73F}' | '\u{2B740}'..='\u{2B81F}' | '\u{F900}'..='\u{FAFF}' )
}
pub fn is_hiragana(c: char) -> bool {
('\u{3040}'..='\u{309F}').contains(&c)
}
pub fn is_katakana(c: char) -> bool {
('\u{30A0}'..='\u{30FF}').contains(&c)
}
pub fn is_hangul(c: char) -> bool {
('\u{AC00}'..='\u{D7AF}').contains(&c)
}
pub fn is_hangul_jamo(c: char) -> bool {
matches!(c,
'\u{1100}'..='\u{11FF}' | '\u{3130}'..='\u{318F}' )
}
pub fn is_cjk(c: char) -> bool {
is_cjk_ideograph(c) || is_hiragana(c) || is_katakana(c) || is_hangul(c)
}
pub fn is_thai(c: char) -> bool {
('\u{0E00}'..='\u{0E7F}').contains(&c)
}
pub fn needs_segmentation(c: char) -> bool {
is_cjk(c) || is_thai(c)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn cjk_detection() {
assert!(is_cjk_ideograph('中'));
assert!(is_cjk_ideograph('全'));
assert!(!is_cjk_ideograph('a'));
assert!(!is_cjk_ideograph('Ω'));
}
#[test]
fn hiragana_detection() {
assert!(is_hiragana('あ'));
assert!(is_hiragana('ん'));
assert!(!is_hiragana('ア'));
}
#[test]
fn katakana_detection() {
assert!(is_katakana('ア'));
assert!(is_katakana('ン'));
assert!(!is_katakana('あ'));
}
#[test]
fn hangul_detection() {
assert!(is_hangul('한'));
assert!(is_hangul('글'));
assert!(!is_hangul('a'));
}
#[test]
fn cjk_combined() {
assert!(is_cjk('中')); assert!(is_cjk('あ')); assert!(is_cjk('ア')); assert!(is_cjk('한')); assert!(!is_cjk('a')); assert!(!is_cjk('α')); }
#[test]
fn thai_detection() {
assert!(is_thai('ก'));
assert!(is_thai('ๆ'));
assert!(!is_thai('a'));
}
}