use std::borrow::Cow;
use nom::branch::alt;
use nom::bytes::streaming::{is_not, take_while_m_n};
use nom::character::complete::anychar;
use nom::character::streaming::{char, multispace1};
use nom::combinator::{map, map_opt, map_res, value, verify};
use nom::error::{FromExternalError, ParseError};
use nom::multi::fold_many0;
use nom::sequence::{delimited, preceded};
use nom::{IResult, Parser};
fn parse_unicode<'a, E>(input: &'a str) -> IResult<&'a str, char, E>
where
E: ParseError<&'a str> + FromExternalError<&'a str, std::num::ParseIntError>,
{
let parse_hex = take_while_m_n(1, 6, |c: char| c.is_ascii_hexdigit());
let parse_delimited_hex = preceded(
char('u'),
delimited(char('{'), parse_hex, char('}')),
);
let parse_u32 = map_res(parse_delimited_hex, move |hex| u32::from_str_radix(hex, 16));
map_opt(parse_u32, std::char::from_u32).parse(input)
}
fn parse_escaped_char<'a, E>(input: &'a str) -> IResult<&'a str, char, E>
where
E: ParseError<&'a str> + FromExternalError<&'a str, std::num::ParseIntError>,
{
preceded(
char('\\'),
alt((
parse_unicode,
value('\n', char('n')),
value('\r', char('r')),
value('\t', char('t')),
value('\u{08}', char('b')),
value('\u{0C}', char('f')),
value('\\', char('\\')),
value('/', char('/')),
value('"', char('"')),
)),
)
.parse(input)
}
fn parse_escaped_whitespace<'a, E: ParseError<&'a str>>(
input: &'a str,
) -> IResult<&'a str, &'a str, E> {
preceded(char('\\'), multispace1).parse(input)
}
fn parse_escaped_anychar<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, char, E> {
preceded(char('\\'), anychar).parse(input)
}
fn parse_literal<'a, E: ParseError<&'a str>>(input: &'a str) -> IResult<&'a str, &'a str, E> {
let not_quote_slash = is_not("\"\\");
verify(not_quote_slash, |s: &str| !s.is_empty()).parse(input)
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum StringFragment<'a> {
Literal(&'a str),
EscapedChar(char),
EscapedWS,
EscapedAnychar(char),
}
fn parse_fragment<'a, E>(input: &'a str) -> IResult<&'a str, StringFragment<'a>, E>
where
E: ParseError<&'a str> + FromExternalError<&'a str, std::num::ParseIntError>,
{
alt((
map(parse_literal, StringFragment::Literal),
map(parse_escaped_char, StringFragment::EscapedChar),
value(StringFragment::EscapedWS, parse_escaped_whitespace),
map(parse_escaped_anychar, StringFragment::EscapedAnychar),
))
.parse(input)
}
pub(crate) fn parse_escaped_string<'a, E>(input: &'a str) -> IResult<&'a str, Cow<'a, str>, E>
where
E: ParseError<&'a str> + FromExternalError<&'a str, std::num::ParseIntError>,
{
let build_string = || {
fold_many0(
parse_fragment,
String::new,
|mut string, fragment| {
match fragment {
StringFragment::Literal(s) => string.push_str(s),
StringFragment::EscapedChar(c) => string.push(c),
StringFragment::EscapedWS => {}
StringFragment::EscapedAnychar(c) => {
string.push('\\');
string.push(c)
}
}
string
},
)
};
map(build_string(), Cow::Owned).parse(input)
}
#[cfg(test)]
mod test {
use std::borrow::Cow;
use nom::{Parser, bytes::complete::tag, sequence::delimited};
use crate::prelude::ParserResult;
use super::parse_escaped_string;
#[test]
fn parse_escaped_string_with_tab() {
let input = "\"\n".to_string()
+ "This is a long comment\n"
+ "that spans multiple lines.\n\n"
+ "It contains spaces, special characters like %, &, and @, and \n"
+ "even new lines. This format ensures the string is \n"
+ "properly encapsulated without breaking syntax rules.\n"
+ "\t\t\n"
+ "You can add as much text as needed here.\"";
let expected = "\n".to_string()
+ "This is a long comment\n"
+ "that spans multiple lines.\n\n"
+ "It contains spaces, special characters like %, &, and @, and \n"
+ "even new lines. This format ensures the string is \n"
+ "properly encapsulated without breaking syntax rules.\n\t\t\n"
+ "You can add as much text as needed here.";
let v: ParserResult<Cow<'_, str>> =
delimited(tag("\""), parse_escaped_string, tag("\"")).parse(&input);
let (_, res) = v.unwrap();
assert_eq!(res, expected);
}
}