ratex_types/
unicode_scripts.rs1#[derive(Debug, Clone, Copy, PartialEq, Eq)]
9pub enum UnicodeScript {
10 Latin,
11 Cyrillic,
12 Armenian,
13 Brahmic,
14 Georgian,
15 Cjk,
16 Hangul,
17}
18
19impl UnicodeScript {
20 pub fn as_str(self) -> &'static str {
21 match self {
22 Self::Latin => "latin",
23 Self::Cyrillic => "cyrillic",
24 Self::Armenian => "armenian",
25 Self::Brahmic => "brahmic",
26 Self::Georgian => "georgian",
27 Self::Cjk => "cjk",
28 Self::Hangul => "hangul",
29 }
30 }
31}
32
33struct ScriptDef {
34 script: UnicodeScript,
35 blocks: &'static [(u32, u32)],
36}
37
38static SCRIPT_DATA: &[ScriptDef] = &[
39 ScriptDef {
40 script: UnicodeScript::Latin,
41 blocks: &[
42 (0x0100, 0x024F), (0x0300, 0x036F), ],
45 },
46 ScriptDef {
47 script: UnicodeScript::Cyrillic,
48 blocks: &[(0x0400, 0x04FF)],
49 },
50 ScriptDef {
51 script: UnicodeScript::Armenian,
52 blocks: &[(0x0530, 0x058F)],
53 },
54 ScriptDef {
55 script: UnicodeScript::Brahmic,
56 blocks: &[(0x0900, 0x109F)],
57 },
58 ScriptDef {
59 script: UnicodeScript::Georgian,
60 blocks: &[(0x10A0, 0x10FF)],
61 },
62 ScriptDef {
63 script: UnicodeScript::Cjk,
64 blocks: &[
65 (0x3000, 0x30FF), (0x4E00, 0x9FAF), (0xFF00, 0xFF60), ],
69 },
70 ScriptDef {
71 script: UnicodeScript::Hangul,
72 blocks: &[(0xAC00, 0xD7AF)],
73 },
74];
75
76pub fn script_from_codepoint(codepoint: u32) -> Option<UnicodeScript> {
78 for def in SCRIPT_DATA {
79 for &(lo, hi) in def.blocks {
80 if codepoint >= lo && codepoint <= hi {
81 return Some(def.script);
82 }
83 }
84 }
85 None
86}
87
88pub fn supported_codepoint(codepoint: u32) -> bool {
93 for def in SCRIPT_DATA {
94 for &(lo, hi) in def.blocks {
95 if codepoint >= lo && codepoint <= hi {
96 return true;
97 }
98 }
99 }
100 false
101}
102
103#[cfg(test)]
104mod tests {
105 use super::*;
106
107 #[test]
108 fn test_basic_latin_not_matched() {
109 assert!(!supported_codepoint('A' as u32));
110 assert!(!supported_codepoint('z' as u32));
111 }
112
113 #[test]
114 fn test_latin_extended() {
115 assert!(supported_codepoint(0x0100)); assert!(supported_codepoint(0x024F)); assert_eq!(script_from_codepoint(0x0100), Some(UnicodeScript::Latin));
118 }
119
120 #[test]
121 fn test_combining_diacritical() {
122 assert!(supported_codepoint(0x0300)); assert!(supported_codepoint(0x0301)); assert!(supported_codepoint(0x036F)); assert_eq!(script_from_codepoint(0x0301), Some(UnicodeScript::Latin));
126 }
127
128 #[test]
129 fn test_cyrillic() {
130 assert!(supported_codepoint('А' as u32)); assert!(supported_codepoint('я' as u32)); assert_eq!(script_from_codepoint('А' as u32), Some(UnicodeScript::Cyrillic));
133 }
134
135 #[test]
136 fn test_cjk() {
137 assert!(supported_codepoint('中' as u32)); assert!(supported_codepoint('あ' as u32)); assert_eq!(script_from_codepoint('中' as u32), Some(UnicodeScript::Cjk));
140 }
141
142 #[test]
143 fn test_hangul() {
144 assert!(supported_codepoint(0xAC00)); assert_eq!(script_from_codepoint(0xAC00), Some(UnicodeScript::Hangul));
146 }
147
148 #[test]
149 fn test_brahmic() {
150 assert!(supported_codepoint(0x0900)); assert!(supported_codepoint(0x0E01)); assert_eq!(script_from_codepoint(0x0900), Some(UnicodeScript::Brahmic));
153 }
154
155 #[test]
156 fn test_armenian() {
157 assert!(supported_codepoint(0x0530));
158 assert_eq!(script_from_codepoint(0x0531), Some(UnicodeScript::Armenian));
159 }
160
161 #[test]
162 fn test_georgian() {
163 assert!(supported_codepoint(0x10A0));
164 assert_eq!(script_from_codepoint(0x10A0), Some(UnicodeScript::Georgian));
165 }
166
167 #[test]
168 fn test_unsupported_codepoint() {
169 assert!(!supported_codepoint(0xFFFF));
170 assert_eq!(script_from_codepoint(0xFFFF), None);
171 }
172}