#![allow(dead_code)]
use nom::hex_u32;
named!(code_unit<u16>,
do_parse!(
tag!(r"\u") >>
hex: flat_map!(take!(4), terminated!(hex_u32, eof!())) >>
(hex as u16)
)
);
#[derive(PartialEq, Eq)]
enum Category {Bmp, LowSurrogate, HighSurrogate}
use self::Category::*;
fn category(v: u16) -> Category {
if v < 0xD800 {Bmp}
else if v < 0xDC00 {HighSurrogate}
else if v < 0xE000 {LowSurrogate}
else {Bmp}
}
named_args!(code_unit_cat(cat: Category)<u16>,
verify!(code_unit, |v| category(v) == cat)
);
fn surrogate_pair_to_char(high: u16, low: u16) -> char {
debug_assert!(category(high) == HighSurrogate);
debug_assert!(category(low) == LowSurrogate);
String::from_utf16(&[high, low])
.expect("All combinations of high+ low surrogate form a valid character")
.chars().next()
.expect("The string contains one character")
}
named!(code_unit_supplementary<char>,
do_parse!(
high: call!(code_unit_cat, HighSurrogate) >>
low: call!(code_unit_cat, LowSurrogate) >>
(surrogate_pair_to_char(high, low))
)
);
fn basic_to_char(v: u16) -> char {
debug_assert!(category(v) == Bmp);
::std::char::from_u32(v as u32).expect("All BMP code units are valid characters")
}
named!(code_unit_bmp<char>,
map!(
call!(code_unit_cat, Bmp),
basic_to_char
)
);
named!(pub escape_sequence<char>,
alt!(code_unit_bmp | code_unit_supplementary)
);
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_code_unit() {
assert_done!(code_unit(br"\u1234"), 0x1234);
assert_done!(code_unit(br"\uABcd"), 0xabcd);
assert_done_partial!(code_unit(br"\u123456789"), 0x1234, b"56789");
assert_error!(code_unit(br"\u54xt"));
assert_incomplete!(code_unit(br"\u15"));
}
#[test]
fn test_escape_sequence() {
assert_done!(escape_sequence(br"\u0024"), '\u{0024}');
assert_done!(escape_sequence(br"\uFA15"), '\u{FA15}');
assert_done!(escape_sequence(br"\uD801\uDC37"), '\u{10437}');
assert_done!(escape_sequence(br"\uD852\uDF62"), '\u{24B62}');
assert_incomplete!(escape_sequence(br"\uD801"));
assert_incomplete!(escape_sequence(br"\uD852"));
assert_error!(escape_sequence(br"\uDC37"));
assert_error!(escape_sequence(br"\uDF62"));
assert_error!(escape_sequence(br"\uD801\uD852"));
}
}