use nom::branch::alt;
use nom::bytes::streaming::{is_not, take_while_m_n};
use nom::character::streaming::{char, multispace1};
use nom::combinator::{map, map_opt, map_res, value, verify};
use nom::error::{FromExternalError, ParseError};
use nom::multi::fold_many0;
use nom::sequence::{delimited, preceded, tuple};
use nom::IResult;
fn parse_unicode<'a, E>(input: &'a str) -> IResult<&'a str, char, E>
where
E: ParseError<&'a str> + FromExternalError<&'a str, std::num::ParseIntError>,
{
let parse_hex = take_while_m_n(1, 6, |c: char| c.is_ascii_hexdigit());
let parse_delimited_hex = preceded(
char('u'),
delimited(char('{'), parse_hex, char('}')),
);
let parse_u32 = map_res(parse_delimited_hex, move |hex| u32::from_str_radix(hex, 16));
map_opt(parse_u32, std::char::from_u32)(input)
}
fn parse_hex<'a, E>(input: &'a str) -> IResult<&'a str, char, E>
where
E: ParseError<&'a str> + FromExternalError<&'a str, std::num::ParseIntError>,
{
map_res(
tuple((
char('x'),
take_while_m_n(2, 2, |c: char| c.is_ascii_hexdigit()),
)),
|(_, hex)| {
let value = u8::from_str_radix(hex, 16)?;
Ok(value as char)
},
)(input)
}
fn parse_oct<'a, E>(input: &'a str) -> IResult<&'a str, char, E>
where
E: ParseError<&'a str> + FromExternalError<&'a str, std::num::ParseIntError>,
{
map_res(
tuple((
char('0'),
take_while_m_n(2, 2, |c: char| c.is_ascii_hexdigit()),
)),
|(_, hex)| {
let value = u8::from_str_radix(hex, 8)?;
Ok(value as char)
},
)(input)
}
fn parse_escaped_char<'a, E>(input: &'a str) -> IResult<&'a str, char, E>
where
E: ParseError<&'a str> + FromExternalError<&'a str, std::num::ParseIntError>,
{
preceded(
char('\\'),
alt((
parse_unicode,
parse_hex,
parse_oct,
value('\n', char('n')),
value('\r', char('r')),
value('\t', char('t')),
value('\u{08}', char('b')),
value('\u{0C}', char('f')),
value('\\', char('\\')),
value('/', char('/')),
value('"', char('"')),
)),
)(input)
}
fn parse_escaped_whitespace<'a, E: ParseError<&'a str>>(
input: &'a str,
) -> IResult<&'a str, &'a str, E> {
preceded(char('\\'), multispace1)(input)
}
fn parse_literal<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, &'a str, E> {
let not_quote_slash = is_not("\"\\");
verify(not_quote_slash, |s: &str| !s.is_empty())(input)
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum StringFragment<'a> {
Literal(&'a str),
EscapedChar(char),
EscapedWS,
}
fn parse_fragment<'a, E>(input: &'a str) -> IResult<&'a str, StringFragment<'a>, E>
where
E: ParseError<&'a str> + FromExternalError<&'a str, std::num::ParseIntError>,
{
alt((
map(parse_literal, StringFragment::Literal),
map(parse_escaped_char, StringFragment::EscapedChar),
value(StringFragment::EscapedWS, parse_escaped_whitespace),
))(input)
}
pub(crate) fn parse_string<'a, E>(input: &'a str) -> IResult<&'a str, String, E>
where
E: ParseError<&'a str> + FromExternalError<&'a str, std::num::ParseIntError>,
{
let build_string = fold_many0(
parse_fragment,
String::new,
|mut string, fragment| {
match fragment {
StringFragment::Literal(s) => string.push_str(s),
StringFragment::EscapedChar(c) => string.push(c),
StringFragment::EscapedWS => {}
}
string
},
);
delimited(char('"'), build_string, char('"'))(input)
}
#[cfg(test)]
mod tests {
use super::parse_string;
use nom::error::VerboseError;
#[test]
fn test_parse_string() {
let input = r#""Hello, world!""#;
let expected_output = Ok(("", "Hello, world!".to_string()));
assert_eq!(parse_string::<VerboseError<&str>>(input), expected_output);
}
#[test]
fn test_parse_escaped() {
let input = r#""Hello, \"world\"!""#;
let expected_output = Ok(("", "Hello, \"world\"!".to_string()));
assert_eq!(parse_string::<VerboseError<&str>>(input), expected_output);
}
#[test]
fn test_parse_escaped_x1b() {
let input = r#""echo \"\x1b""#;
let expected_output = Ok(("", "echo \"\x1b".to_string()));
assert_eq!(parse_string::<VerboseError<&str>>(input), expected_output);
}
#[test]
fn test_parse_escaped_033() {
let input = r#""echo \"\033""#;
let expected_output = Ok(("", "echo \"\x1b".to_string()));
assert_eq!(parse_string::<VerboseError<&str>>(input), expected_output);
}
}