use nom::{
IResult, Parser,
branch::alt,
bytes::complete::take_while_m_n,
character::complete::{anychar, char, none_of},
combinator::{map, map_opt, value, verify},
multi::many0,
sequence::{delimited, preceded},
};
pub fn string_literal(input: &str) -> IResult<&str, String> {
alt((double_quoted_string, single_quoted_string, raw_string)).parse(input)
}
pub fn double_quoted_string(input: &str) -> IResult<&str, String> {
delimited(
char('"'),
map(many0(double_quoted_char), |chars| {
chars.into_iter().collect()
}),
char('"'),
)
.parse(input)
}
pub fn single_quoted_string(input: &str) -> IResult<&str, String> {
delimited(
char('\''),
map(many0(single_quoted_char), |chars| {
chars.into_iter().collect()
}),
char('\''),
)
.parse(input)
}
pub fn raw_string(input: &str) -> IResult<&str, String> {
delimited(
char('`'),
map(many0(none_of("`")), |chars| chars.into_iter().collect()),
char('`'),
)
.parse(input)
}
fn double_quoted_char(input: &str) -> IResult<&str, char> {
alt((
preceded(char('\\'), escape_char('"')),
verify(anychar, |&c| c != '"' && c != '\\' && c != '\n'),
))
.parse(input)
}
fn single_quoted_char(input: &str) -> IResult<&str, char> {
alt((
preceded(char('\\'), escape_char('\'')),
verify(anychar, |&c| c != '\'' && c != '\\' && c != '\n'),
))
.parse(input)
}
fn escape_char(quote_char: char) -> impl FnMut(&str) -> IResult<&str, char> {
move |input: &str| {
alt((
value('\x07', char('a')), value('\x08', char('b')), value('\x0c', char('f')), value('\n', char('n')), value('\r', char('r')), value('\t', char('t')), value('\x0b', char('v')), value('\\', char('\\')), value(quote_char, char(quote_char)), value('"', char('"')),
value('\'', char('\'')),
hex_escape,
unicode_escape_short,
unicode_escape_long,
octal_escape,
))
.parse(input)
}
}
fn hex_escape(input: &str) -> IResult<&str, char> {
preceded(
char('x'),
map_opt(
take_while_m_n(2, 2, |c: char| c.is_ascii_hexdigit()),
|hex: &str| {
let val = u8::from_str_radix(hex, 16).ok()?;
Some(val as char)
},
),
)
.parse(input)
}
fn unicode_escape_short(input: &str) -> IResult<&str, char> {
preceded(
char('u'),
map_opt(
take_while_m_n(4, 4, |c: char| c.is_ascii_hexdigit()),
|hex: &str| {
let val = u32::from_str_radix(hex, 16).ok()?;
if (0xD800..0xE000).contains(&val) {
return None;
}
char::from_u32(val)
},
),
)
.parse(input)
}
fn unicode_escape_long(input: &str) -> IResult<&str, char> {
preceded(
char('U'),
map_opt(
take_while_m_n(8, 8, |c: char| c.is_ascii_hexdigit()),
|hex: &str| {
let val = u32::from_str_radix(hex, 16).ok()?;
if (0xD800..0xE000).contains(&val) {
return None;
}
char::from_u32(val)
},
),
)
.parse(input)
}
fn octal_escape(input: &str) -> IResult<&str, char> {
map_opt(
take_while_m_n(3, 3, |c: char| c.is_ascii_digit() && c < '8'),
|oct: &str| {
let val = u8::from_str_radix(oct, 8).ok()?;
Some(val as char)
},
)
.parse(input)
}
#[cfg(test)]
mod tests {
use super::*;
fn assert_string(input: &str, expected: &str) {
let result = string_literal(input);
match result {
Ok((remaining, value)) => {
assert!(
remaining.is_empty(),
"Parser did not consume entire input '{}', remaining: '{}'",
input,
remaining
);
assert_eq!(
value, expected,
"For input '{}', expected {:?}, got {:?}",
input, expected, value
);
}
Err(e) => panic!("Failed to parse '{}': {:?}", input, e),
}
}
fn assert_string_fails(input: &str) {
let result = string_literal(input);
assert!(
result.is_err() || !result.unwrap().0.is_empty(),
"Expected '{}' to fail or not fully parse",
input
);
}
#[test]
fn test_double_quoted_basic() {
assert_string(r#""hello""#, "hello");
assert_string(r#""world""#, "world");
assert_string(r#""test string""#, "test string");
assert_string(r#""""#, ""); }
#[test]
fn test_double_quoted_escaped_quote() {
assert_string(r#""say \"hello\"""#, "say \"hello\"");
}
#[test]
fn test_double_quoted_simple_escapes() {
assert_string(r#""\n""#, "\n");
assert_string(r#""\t""#, "\t");
assert_string(r#""\r""#, "\r");
assert_string(r#""\\""#, "\\");
assert_string(r#""\a""#, "\x07");
assert_string(r#""\b""#, "\x08");
assert_string(r#""\f""#, "\x0c");
assert_string(r#""\v""#, "\x0b");
}
#[test]
fn test_double_quoted_hex_escape() {
assert_string(r#""\xFF""#, "\u{ff}");
assert_string(r#""\x00""#, "\0");
assert_string(r#""\x41""#, "A");
}
#[test]
fn test_double_quoted_unicode_escape() {
assert_string(r#""\u0041""#, "A");
assert_string(r#""\u1234""#, "\u{1234}");
assert_string(r#""\U00010111""#, "\u{10111}");
}
#[test]
fn test_double_quoted_octal_escape() {
assert_string(r#""\377""#, "\u{ff}");
assert_string(r#""\000""#, "\0");
assert_string(r#""\101""#, "A");
}
#[test]
fn test_single_quoted_basic() {
assert_string("'hello'", "hello");
assert_string("'world'", "world");
assert_string("''", ""); }
#[test]
fn test_single_quoted_escaped_quote() {
assert_string(r"'say \'hello\''", "say 'hello'");
}
#[test]
fn test_single_quoted_escapes() {
assert_string(r"'\n'", "\n");
assert_string(r"'\t'", "\t");
assert_string(r"'\\'", "\\");
}
#[test]
fn test_raw_string_basic() {
assert_string("`hello`", "hello");
assert_string("`test string`", "test string");
assert_string("``", ""); }
#[test]
fn test_raw_string_no_escapes() {
assert_string(r"`\n\t\\`", r"\n\t\\");
assert_string(r"`test\.expression`", r"test\.expression");
}
#[test]
fn test_raw_string_can_contain_quotes() {
assert_string(r#"`"hello"`"#, "\"hello\"");
assert_string(r"`'hello'`", "'hello'");
}
#[test]
fn test_complex_escape_sequence() {
assert_string(
r#""\a\b\f\n\r\t\v\\\" - \xFF\377\u1234\U00010111""#,
"\x07\x08\x0c\n\r\t\x0b\\\" - \u{ff}\u{ff}\u{1234}\u{10111}",
);
}
#[test]
fn test_unterminated_double_quoted() {
assert_string_fails(r#"""#);
assert_string_fails(r#""hello"#);
}
#[test]
fn test_unterminated_single_quoted() {
assert_string_fails("'");
assert_string_fails("'hello");
}
#[test]
fn test_unterminated_raw_string() {
assert_string_fails("`");
assert_string_fails("`hello");
}
#[test]
fn test_newline_in_quoted_string() {
assert_string_fails("\"hello\nworld\"");
assert_string_fails("'hello\nworld'");
}
#[test]
fn test_raw_string_can_have_newlines() {
assert_string("`hello\nworld`", "hello\nworld");
}
#[test]
fn test_string_followed_by_other_content() {
let (remaining, value) = string_literal(r#""hello" world"#).unwrap();
assert_eq!(value, "hello");
assert_eq!(remaining, " world");
}
}