1use std::char;
12use std::str;
13
14use regex::Regex;
15
16lazy_static! {
17 static ref CODEPOINT_SEPARATORS: Regex = Regex::new(r#"[^\w+]"#).unwrap();
19
20 static ref CODEPOINT_PREFIX: Regex = Regex::new(r#"^[Uu][+]"#).unwrap();
22
23 static ref HEX_SEPARATORS: Regex = Regex::new(r#"[^\w]"#).unwrap();
25
26 static ref HEX_PREFIX: Regex = Regex::new(r#"^0[xX]"#).unwrap();
28}
29
30pub fn codepoints(string: &str) -> String {
31 CODEPOINT_SEPARATORS
32 .split(&string)
33 .map(|token| {
34 let mut token = token;
35 if CODEPOINT_PREFIX.is_match(token) {
36 token = &token[2..];
37 }
38 let codepoint = u32::from_str_radix(token, 16)
39 .unwrap_or_else(|_| panic!("Cannot parse token as hex number: {}", token));
40 char::from_u32(codepoint)
41 .unwrap_or_else(|| panic!("Invalid Unicode Scalar Value code-point: {}", codepoint))
42 })
43 .collect::<String>()
44}
45
46pub fn utf8_hex(string: &str) -> String {
47 let utf8 = HEX_SEPARATORS.split(&string).map(|token| {
48 let mut token = token;
49 if HEX_PREFIX.is_match(token) {
50 token = &token[2..];
51 }
52 u8::from_str_radix(token, 16)
53 .unwrap_or_else(|_| panic!("Cannot parse token as hex byte value: {}", token))
54 });
55
56 String::from_utf8(utf8.collect()).expect("Invalid UTF-8 sequence")
57}
58
59pub fn utf16_hex(string: &str) -> String {
60 let utf16 = HEX_SEPARATORS.split(&string).map(|token| {
61 let mut token = token;
62 if HEX_PREFIX.is_match(token) {
63 token = &token[2..];
64 }
65 u16::from_str_radix(token, 16)
66 .unwrap_or_else(|_| panic!("Cannot parse token as hex byte value: {}", token))
67 });
68
69 char::decode_utf16(utf16)
70 .map(|r| r.expect("Invalid UTF-16 sequence"))
71 .collect()
72}