use nom::types::CompleteByteSlice;
quick_error! {
#[derive(Debug)]
pub enum UnicodeRuneError {
UTF8(err: ::std::string::FromUtf8Error) {
from()
}
Int(err: ::std::num::ParseIntError) {
from()
}
}
}
macro_rules! fixed_length_radix {
($i:expr, $type:ident, $len:expr, $radix:expr) => {
map_res!($i, take!($len), |n: CompleteByteSlice| -> Result<_, UnicodeRuneError> {
Ok($type::from_str_radix(
&String::from_utf8(n.0.to_vec())?,
$radix
)?)
})
}
}
fn validate_unicode_scalar(n: u32) -> Option<Vec<u8>> {
::std::char::from_u32(n).map(|c| {
let mut tmp = [0; 4];
c.encode_utf8(&mut tmp).as_bytes().to_vec()
})
}
named!(rune <CompleteByteSlice, Vec<u8>>,
preceded!(char!('\\'),
alt!(
char!('a') => { |_| vec![0x07] }
| char!('b') => { |_| vec![0x08] }
| char!('f') => { |_| vec![0x0c] }
| char!('n') => { |_| vec![0x0a] }
| char!('r') => { |_| vec![0x0d] }
| char!('t') => { |_| vec![0x09] }
| char!('v') => { |_| vec![0x0b] }
| char!('\\') => { |_| vec![0x5c] }
| char!('\'') => { |_| vec![0x27] }
| char!('"') => { |_| vec![0x22] }
| map!(
fixed_length_radix!(u8, 3, 8),
|n| vec![n]
)
| map!(
preceded!(char!('x'), fixed_length_radix!(u8, 2, 16)),
|n| vec![n]
)
| map_opt!(
preceded!(char!('u'), fixed_length_radix!(u32, 4, 16)),
validate_unicode_scalar
)
| map_opt!(
preceded!(char!('U'), fixed_length_radix!(u32, 8, 16)),
validate_unicode_scalar
)
)
)
);
macro_rules! is_not_v {
($i:expr, $arg:expr) => {
map!($i, is_not!($arg), |bytes| bytes.0.to_vec())
}
}
macro_rules! chars_except {
($i:expr, $arg:expr) => {
map!(
$i,
many0!(alt!(rune | is_not_v!($arg))),
|s| s.concat()
)
}
}
named!(pub string <CompleteByteSlice, String>, map_res!(
alt!(
delimited!(char!('"'), chars_except!("\n\"\\"), char!('"'))
|
delimited!(char!('\''), chars_except!("\n'\\"), char!('\''))
|
delimited!(char!('`'), is_not_v!("`"), char!('`') )
),
|s: Vec<u8>| String::from_utf8(s)
));
#[allow(unused_imports)]
#[cfg(test)]
mod tests {
use super::*;
use nom::{Err, ErrorKind, Context};
fn cbs(s: &str) -> CompleteByteSlice {
CompleteByteSlice(s.as_bytes())
}
#[test]
fn strings() {
assert_eq!(
string(cbs("\"lorem ipsum \\\"dolor\\nsit amet\\\"\"")),
Ok((cbs(""), "lorem ipsum \"dolor\nsit amet\"".to_string()))
);
assert_eq!(
string(cbs("'lorem ipsum \\'dolor\\nsit\\tamet\\''")),
Ok((cbs(""), "lorem ipsum 'dolor\nsit\tamet'".to_string()))
);
assert_eq!(
string(cbs("`lorem ipsum \\\"dolor\\nsit\\tamet\\\"`")),
Ok((cbs(""), "lorem ipsum \\\"dolor\\nsit\\tamet\\\"".to_string()))
);
assert_eq!(
string(cbs("'this\nis not valid'")),
Err(Err::Error(Context::Code(cbs("'this\nis not valid'"), ErrorKind::Alt)))
);
assert_eq!(
string(cbs("`but this\nis`")),
Ok((cbs(""), "but this\nis".to_string()))
);
}
#[test]
fn runes() {
assert_eq!(
rune(cbs("\\123")),
Ok((cbs(""), vec![0o123]))
);
assert_eq!(
rune(cbs("\\x23")),
Ok((cbs(""), vec![0x23]))
);
assert_eq!(
rune(cbs("\\uabcd")),
Ok((cbs(""), "\u{abcd}".as_bytes().to_vec()))
);
assert_eq!(
rune(cbs("\\uD801")),
Err(Err::Error(Context::Code(cbs("uD801"), ErrorKind::Alt)))
);
assert_eq!(
rune(cbs("\\U00010330")),
Ok((cbs(""), "\u{10330}".as_bytes().to_vec()))
);
assert_eq!(
rune(cbs("\\UdeadDEAD")),
Err(Err::Error(Context::Code(cbs("UdeadDEAD"), ErrorKind::Alt)))
);
assert_eq!(
rune(cbs("\\xxx")),
Err(Err::Error(Context::Code(cbs("xxx"), ErrorKind::Alt)))
);
assert_eq!(
rune(cbs("\\x1")),
Err(Err::Error(Context::Code(cbs("x1"), ErrorKind::Alt)))
);
}
}