1use libc::{c_char, wchar_t};
2
3use errno::errno;
4
5mod c {
6 #[allow(non_camel_case_types)]
7 type wint_t = i64;
8
9 #[link(name = "rustlocale", kind = "static")]
10 extern "C" {
11 pub fn utf8towc(
12 wc_buf: *mut libc::wchar_t,
13 multibytes: *const libc::c_char,
14 byte_length: libc::size_t,
15 ) -> u8;
16 pub fn wctoutf8(utf8_bytes: *mut libc::c_char, wc: libc::wchar_t) -> libc::ssize_t;
17 pub fn iswspace_native(ch: wint_t) -> i8;
18 pub fn iswblank_native(ch: wint_t) -> libc::c_int;
19 pub fn towupper_native(ch: wint_t) -> wint_t;
20 pub fn towlower_native(ch: wint_t) -> wint_t;
21 }
22}
23
24pub trait CType {
25 fn is_space(&self) -> bool;
45
46 fn is_blank(&self) -> bool;
61
62 fn to_uppercase(&self) -> Self;
82
83 fn to_lowercase(&self) -> Self;
103}
104
105impl CType for char {
106 fn is_space(&self) -> bool {
107 let buf = utf8_bytes(self);
108 if buf.len() == 1 {
109 unsafe { libc::isspace(buf[0].into()) != 0 }
110 } else {
111 let wc = utf8towc(&buf);
112 isspace(wc)
113 }
114 }
115
116 fn is_blank(&self) -> bool {
117 let buf = utf8_bytes(self);
118 if buf.len() == 1 {
119 unsafe { libc::isblank(buf[0].into()) != 0 }
120 } else {
121 let wc = utf8towc(&buf);
122 isblank(wc)
123 }
124 }
125
126 fn to_uppercase(&self) -> char {
127 let bytes = utf8_bytes(self);
128 let wc = utf8towc(&bytes);
129 let upper = toupper(wc);
130 wctochar(upper)
131 }
132
133 fn to_lowercase(&self) -> char {
134 let bytes = utf8_bytes(self);
135 let wc = utf8towc(&bytes);
136 let lower = tolower(wc);
137 wctochar(lower)
138 }
139}
140
141fn utf8_bytes(c: &char) -> Vec<u8> {
142 let length = c.len_utf8();
143 let mut buf = vec![0; length];
144 c.encode_utf8(&mut buf);
145 buf
146}
147
148fn utf8towc(utf8_bytes: &Vec<u8>) -> wchar_t {
149 let mut wc = 0;
150 match unsafe {
151 c::utf8towc(
152 &mut wc as *mut wchar_t,
153 utf8_bytes.as_ptr() as *const c_char,
154 utf8_bytes.len(),
155 )
156 } {
157 s if s == 0 => wc,
158 s => panic!("utf8towc failed. status={}, error={}", s, errno()),
159 }
160}
161
162fn wctochar(wc: wchar_t) -> char {
163 let mut buf = [0; 4];
164 match unsafe { c::wctoutf8(buf.as_mut_ptr(), wc) } {
165 length if length > 0 => {
166 let length = length as usize;
167 String::from_utf8(buf[..length].iter().map(|c| *c as u8).collect())
168 .unwrap()
169 .chars()
170 .next()
171 .unwrap()
172 }
173 status => panic!("wctochar failed. status={}, error={}", status, errno()),
174 }
175}
176
177fn isspace(wc: wchar_t) -> bool {
178 match unsafe { c::iswspace_native(wc.into()) } {
179 s if s >= 0 => s != 0,
180 _ => panic!("iswspace_native failed. error={}", errno()),
181 }
182}
183
184fn isblank(wc: wchar_t) -> bool {
185 unsafe { c::iswblank_native(wc.into()) != 0 }
186}
187
188fn toupper(wc: wchar_t) -> wchar_t {
189 unsafe { c::towupper_native(wc.into()) as wchar_t }
190}
191
192fn tolower(wc: wchar_t) -> wchar_t {
193 unsafe { c::towlower_native(wc.into()) as wchar_t }
194}
195
196#[cfg(test)]
197mod tests {
198 use super::*;
199
200 #[test]
201 fn is_always_space() {
202 assert!(' '.is_space());
203 assert!('\x0c'.is_space());
204 assert!('\n'.is_space());
205 assert!('\r'.is_space());
206 assert!('\t'.is_space());
207 assert!('\x0b'.is_space());
208 }
209
210 #[test]
211 fn is_space_i18n() {
212 std::env::set_var("LC_ALL", "POSIX");
213 assert!(!'\u{1680}'.is_space());
214 assert!(!'\u{2000}'.is_space());
215 assert!(!'\u{2006}'.is_space());
216 assert!(!'\u{2008}'.is_space());
217 assert!(!'\u{200A}'.is_space());
218 assert!(!'\u{2028}'.is_space());
219 assert!(!'\u{2029}'.is_space());
220 assert!(!'\u{205F}'.is_space());
221 assert!(!'\u{3000}'.is_space());
222 std::env::set_var("LC_ALL", "en_US");
223 assert!('\u{1680}'.is_space());
224 assert!('\u{2000}'.is_space());
225 assert!('\u{2006}'.is_space());
226 assert!('\u{2008}'.is_space());
227 assert!('\u{200A}'.is_space());
228 assert!('\u{2028}'.is_space());
229 assert!('\u{2029}'.is_space());
230 assert!('\u{205F}'.is_space());
231 assert!('\u{3000}'.is_space());
232 }
233
234 #[test]
235 #[ignore]
236 fn is_space_special() {
237 std::env::set_var("LC_ALL", "en_US");
238 assert!(!'\u{1361}'.is_space());
239 std::env::set_var("LC_ALL", "am_ET");
240 assert!('\u{1361}'.is_space());
241 }
242
243 #[test]
244 fn is_blank() {
245 std::env::set_var("LC_ALL", "POSIX");
246 assert!(' '.is_blank());
247 assert!('\t'.is_blank());
248 assert!(!'\n'.is_blank());
249 assert!(!'\u{3000}'.is_blank());
250 std::env::set_var("LC_ALL", "en_US");
251 assert!('\u{3000}'.is_blank());
252 assert!(!'\u{2028}'.is_blank());
253 }
254
255 #[test]
256 fn to_uppercase() {
257 assert_eq!(CType::to_uppercase(&'a'), 'A');
258 assert_eq!(CType::to_uppercase(&'1'), '1');
259 std::env::set_var("LC_ALL", "POSIX");
260 assert_eq!(CType::to_uppercase(&'\u{017F}'), '\u{017F}');
261 std::env::set_var("LC_ALL", "en_US");
262 assert_eq!(CType::to_uppercase(&'\u{017F}'), 'S');
263 }
264
265 #[test]
266 #[ignore]
267 fn to_uppercase_special() {
268 std::env::set_var("LC_ALL", "en_US");
269 assert_eq!(CType::to_uppercase(&'i'), 'I');
270 std::env::set_var("LC_ALL", "tr_TR");
271 assert_eq!(CType::to_uppercase(&'i'), '\u{0130}');
272 }
273
274 #[test]
275 fn to_lowercase() {
276 assert_eq!(CType::to_lowercase(&'A'), 'a');
277 assert_eq!(CType::to_lowercase(&'1'), '1');
278 std::env::set_var("LC_ALL", "POSIX");
279 assert_eq!(CType::to_lowercase(&'\u{0190}'), '\u{0190}');
280 std::env::set_var("LC_ALL", "en_US");
281 assert_eq!(CType::to_lowercase(&'\u{0190}'), '\u{025b}');
282 }
283
284 #[test]
285 #[ignore]
286 fn to_lowercase_special() {
287 std::env::set_var("LC_ALL", "en_US");
288 assert_eq!(CType::to_lowercase(&'I'), 'i');
289 std::env::set_var("LC_ALL", "tr_TR");
290 assert_eq!(CType::to_lowercase(&'I'), '\u{0131}');
291 }
292}