use nom::branch::alt;
use nom::bytes::complete::{is_not, take_while_m_n};
use nom::character::complete::{char, multispace1};
use nom::combinator::{map, map_opt, map_res, value, verify};
use nom::error::{FromExternalError, ParseError};
use nom::multi::fold_many0 as fold;
use nom::sequence::{delimited, preceded};
use nom::{IResult, Parser};
fn parse_unicode<'a, E>(input: &'a str) -> IResult<&'a str, char, E>
where
E: ParseError<&'a str> + FromExternalError<&'a str, std::num::ParseIntError>,
{
let parse_hex = take_while_m_n(1, 6, |c: char| c.is_ascii_hexdigit());
let parse_delimited_hex = preceded(
char('u'),
delimited(char('{'), parse_hex, char('}')),
);
let parse_u32 = map_res(parse_delimited_hex, move |hex| u32::from_str_radix(hex, 16));
map_opt(parse_u32, std::char::from_u32).parse(input)
}
fn parse_escaped_char<'a, E>(input: &'a str) -> IResult<&'a str, char, E>
where
E: ParseError<&'a str> + FromExternalError<&'a str, std::num::ParseIntError>,
{
preceded(
char('\\'),
alt((
parse_unicode,
value('\n', char('n')),
value('\r', char('r')),
value('\t', char('t')),
value('\u{08}', char('b')),
value('\u{0C}', char('f')),
value('\\', char('\\')),
value('/', char('/')),
value('"', char('"')),
)),
)
.parse(input)
}
fn parse_escaped_whitespace<'a, E: ParseError<&'a str>>(
input: &'a str,
) -> IResult<&'a str, &'a str, E> {
preceded(char('\\'), multispace1).parse(input)
}
fn parse_literal<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, &'a str, E> {
let not_quote_slash = is_not("\"\\");
verify(not_quote_slash, |s: &str| !s.is_empty()).parse(input)
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum StringFragment<'a> {
Literal(&'a str),
EscapedChar(char),
EscapedWS,
}
fn parse_fragment<'a, E>(input: &'a str) -> IResult<&'a str, StringFragment<'a>, E>
where
E: ParseError<&'a str> + FromExternalError<&'a str, std::num::ParseIntError>,
{
alt((
map(parse_literal, StringFragment::Literal),
map(parse_escaped_char, StringFragment::EscapedChar),
value(StringFragment::EscapedWS, parse_escaped_whitespace),
))
.parse(input)
}
pub fn parse_string<'a, E>(input: &'a str) -> IResult<&'a str, String, E>
where
E: ParseError<&'a str> + FromExternalError<&'a str, std::num::ParseIntError>,
{
let build_string = fold(
parse_fragment,
String::new,
|mut string, fragment| {
match fragment {
StringFragment::Literal(s) => string.push_str(s),
StringFragment::EscapedChar(c) => string.push(c),
StringFragment::EscapedWS => {}
}
string
},
);
delimited(char('"'), build_string, char('"')).parse(input)
}
#[cfg(test)]
mod tests {
use super::*;
use rstest::rstest;
#[rstest]
#[case(r#""string""#, "string", "")]
#[case(r#""string" is complete"#, "string", " is complete")]
#[case(r#""str\"ing""#, "str\"ing", "")]
#[case(r#""str\"ing" is complete"#, "str\"ing", " is complete")]
#[case(r#""str\ning$()""#, "str\ning$()", "")]
#[case(r#""str {ing}""#, "str {ing}", "")]
#[case(r#""string""#, "string", "")]
#[case(r#""hello""#, "hello", "")]
#[case(r#""he\\llo""#, "he\\llo", "")]
#[case(r#""he\"llo""#, "he\"llo", "")]
#[case(r#""{hello}""#, "{hello}", "")]
#[case(r#""hello world""#, "hello world", "")]
#[case(r#""hello\tworld\nfoo""#, "hello\tworld\nfoo", "")]
#[case(r#""hello\\backslash""#, "hello\\backslash", "")]
fn parse_string_test(#[case] txt: &str, #[case] value: &str, #[case] reminder: &str) {
let (rest, n) = parse_string::<nom::error::Error<_>>(txt).unwrap();
assert_eq!(rest, reminder);
assert_eq!(n, value);
}
}