use crate::utils::IResult;
use nom::{
AsBytes,
AsChar,
FindToken,
InputIter,
InputLength,
InputTake,
InputTakeAtPosition,
Slice,
};
use std::ops::RangeFrom;
use nom::bytes::complete::{
is_not,
take,
};
use nom::branch::alt;
use nom::character::complete::char;
use nom::combinator::{map, map_opt, map_res};
use nom::multi::many0;
use nom::sequence::{
delimited,
preceded,
};
quick_error! {
#[derive(Debug)]
pub enum UnicodeRuneError {
UTF8(err: ::std::string::FromUtf8Error) {
from()
}
Int(err: ::std::num::ParseIntError) {
from()
}
}
}
macro_rules! fixed_length_radix {
($input:ty, $type:ident, $len:expr, $radix:expr) => {
map_res(
take($len),
|n: $input| -> Result<_, UnicodeRuneError> {
Ok($type::from_str_radix(
&String::from_utf8(n.as_bytes().to_vec())?,
$radix,
)?)
}
)
};
}
fn validate_unicode_scalar(n: u32) -> Option<Vec<u8>> {
::std::char::from_u32(n).map(|c| {
let mut tmp = [0; 4];
c.encode_utf8(&mut tmp).as_bytes().to_vec()
})
}
fn rune<I, C>(input: I) -> IResult<I, Vec<u8>>
where
I: Clone
+ AsBytes
+ InputIter<Item = C>
+ InputTake
+ Slice<RangeFrom<usize>>
,
C: AsChar,
{
preceded(char('\\'),
alt((
map(char('a'), |_| vec![0x07]),
map(char('b'), |_| vec![0x08]),
map(char('f'), |_| vec![0x0c]),
map(char('n'), |_| vec![0x0a]),
map(char('r'), |_| vec![0x0d]),
map(char('t'), |_| vec![0x09]),
map(char('v'), |_| vec![0x0b]),
map(char('\\'), |_| vec![0x5c]),
map(char('\''), |_| vec![0x27]),
map(char('"'), |_| vec![0x22]),
map(
fixed_length_radix!(I, u8, 3u8, 8),
|n| vec![n]
),
map(
preceded(char('x'), fixed_length_radix!(I, u8, 2u8, 16)),
|n| vec![n]
),
map_opt(
preceded(char('u'), fixed_length_radix!(I, u32, 4u8, 16)),
validate_unicode_scalar
),
map_opt(
preceded(char('U'), fixed_length_radix!(I, u32, 8u8, 16)),
validate_unicode_scalar
),
))
)(input)
}
fn is_not_v<I, C>(arg: &'static str) -> impl FnMut(I) -> IResult<I, Vec<u8>>
where
I: Clone
+ AsBytes
+ InputIter<Item = C>
+ InputLength
+ InputTake
+ InputTakeAtPosition<Item = C>
,
&'static str: FindToken<C>,
{
map(is_not(arg), |bytes: I| bytes.as_bytes().to_vec())
}
fn chars_except<I, C>(arg: &'static str) -> impl FnMut(I) -> IResult<I, Vec<u8>>
where
I: Clone
+ AsBytes
+ InputIter<Item = C>
+ InputLength
+ InputTake
+ InputTakeAtPosition<Item = C>
+ Slice<RangeFrom<usize>>
,
C: AsChar,
&'static str: FindToken<C>,
{
map(many0(alt((rune, is_not_v(arg)))), |s| s.concat())
}
pub fn string<I, C>(input: I) -> IResult<I, Vec<u8>>
where
I: Clone
+ AsBytes
+ InputIter<Item = C>
+ InputLength
+ InputTake
+ InputTakeAtPosition<Item = C>
+ Slice<RangeFrom<usize>>
,
C: AsChar,
&'static str: FindToken<C>,
{
alt((
delimited(char('"'), chars_except("\n\"\\"), char('"')),
delimited(char('\''), chars_except("\n'\\"), char('\'')),
delimited(char('`'), is_not_v("`"), char('`')),
))(input)
}
#[allow(unused_imports)]
#[cfg(test)]
mod tests {
use super::*;
use crate::utils::tests::*;
use nom::error::{
ErrorKind,
VerboseErrorKind,
};
#[test]
fn strings() {
assert_eq!(
string(r#""lorem ipsum \"dolor\nsit amet\"""#),
Ok(("", b"lorem ipsum \"dolor\nsit amet\"".to_vec()))
);
assert_eq!(
string(r#"'lorem ipsum \'dolor\nsit\tamet\''"#),
Ok(("", b"lorem ipsum 'dolor\nsit\tamet'".to_vec()))
);
assert_eq!(
string(r#"`lorem ipsum \"dolor\nsit\tamet\"`"#),
Ok((
"",
br#"lorem ipsum \"dolor\nsit\tamet\""#.to_vec() ))
);
assert_eq!(
string("'this\nis not valid'"),
err(vec![
(
"'this\nis not valid'",
VerboseErrorKind::Char('`'),
),
(
"'this\nis not valid'",
VerboseErrorKind::Nom(ErrorKind::Alt),
),
])
);
assert_eq!(
string("`but this\nis`"),
Ok(("", b"but this\nis".to_vec()))
);
for s in [
r#"'inf: ∞'"#,
r#"'inf: \u221e'"#,
r#"'inf: \u221E'"#,
r#"'inf: \U0000221e'"#,
r#"'inf: \U0000221E'"#,
r#"'inf: \xe2\x88\x9e'"#,
r#"'inf: \xE2\x88\x9E'"#,
] {
assert_eq!(
string(s),
Ok(("", b"inf: \xe2\x88\x9e".to_vec()))
);
}
for s in [
r#"'thinking: 🤔'"#,
r#"'thinking: \U0001f914'"#,
r#"'thinking: \U0001F914'"#,
r#"'thinking: \xf0\x9f\xa4\x94'"#,
r#"'thinking: \xF0\x9F\xA4\x94'"#,
] {
assert_eq!(
string(s),
Ok(("", b"thinking: \xf0\x9f\xa4\x94".to_vec()))
);
}
}
#[test]
fn runes() {
assert_eq!(rune("\\123"), Ok(("", vec![0o123])));
assert_eq!(rune("\\x23"), Ok(("", vec![0x23])));
assert_eq!(
rune("\\uabcd"),
Ok(("", "\u{abcd}".as_bytes().to_vec()))
);
assert_eq!(
rune("\\uD801"),
err(vec![
("uD801", VerboseErrorKind::Char('U')),
("uD801", VerboseErrorKind::Nom(ErrorKind::Alt)),
])
);
assert_eq!(
rune("\\U00010330"),
Ok(("", "\u{10330}".as_bytes().to_vec()))
);
assert_eq!(
rune("\\UdeadDEAD"),
err(vec![
("UdeadDEAD", VerboseErrorKind::Nom(ErrorKind::MapOpt)),
("UdeadDEAD", VerboseErrorKind::Nom(ErrorKind::Alt)),
]),
);
assert_eq!(
rune("\\xxx"),
err(vec![
("xxx", VerboseErrorKind::Char('U')),
("xxx", VerboseErrorKind::Nom(ErrorKind::Alt)),
]),
);
assert_eq!(
rune("\\x1"),
err(vec![
("x1", VerboseErrorKind::Char('U')),
("x1", VerboseErrorKind::Nom(ErrorKind::Alt)),
]),
);
}
}