cstr 0.2.5

Macro for building static CStr reference
Documentation
use crate::Error;
use proc_macro2::{Ident, Literal, Span, TokenStream, TokenTree};
use std::char;

macro_rules! unexpected_content {
    () => {
        "expected one of: byte string literal, string literal, identifier"
    };
}

pub(crate) fn parse_input(input: TokenStream) -> Result<(Vec<u8>, Span), Error> {
    let mut tokens = input.into_iter();
    let token = match tokens.next() {
        Some(token) => token,
        None => {
            return Err(Error(
                Span::call_site(),
                concat!("unexpected end of input, ", unexpected_content!()),
            ))
        }
    };
    let span = token.span();
    let result = match token {
        TokenTree::Literal(literal) => match parse_literal(literal) {
            Ok(result) => result,
            Err(msg) => return Err(Error(span, msg)),
        },
        TokenTree::Ident(ident) => parse_ident(ident),
        _ => return Err(Error(span, unexpected_content!())),
    };
    if let Some(token) = tokens.next() {
        return Err(Error(token.span(), "unexpected token"));
    }
    Ok((result, span))
}

fn parse_literal(literal: Literal) -> Result<Vec<u8>, &'static str> {
    let s = literal.to_string();
    let s = s.as_bytes();
    match s[0] {
        b'"' => Ok(parse_cooked_content(&s)),
        b'r' => Ok(parse_raw_content(&s[1..])),
        b'b' => match s[1] {
            b'"' => Ok(parse_cooked_content(&s[1..])),
            b'r' => Ok(parse_raw_content(&s[2..])),
            _ => Err(unexpected_content!()),
        },
        _ => Err(unexpected_content!()),
    }
}

fn all_pounds(bytes: &[u8]) -> bool {
    bytes.iter().all(|b| *b == b'#')
}

/// Parses raw string / bytes content after `r` prefix.
fn parse_raw_content(s: &[u8]) -> Vec<u8> {
    let q_start = s.iter().position(|b| *b == b'"').unwrap();
    let q_end = s.iter().rposition(|b| *b == b'"').unwrap();
    assert!(all_pounds(&s[0..q_start]));
    assert!(all_pounds(&s[q_end + 1..q_end + q_start + 1]));
    Vec::from(&s[q_start + 1..q_end])
}

/// Parses the cooked string / bytes content within quotes.
fn parse_cooked_content(mut s: &[u8]) -> Vec<u8> {
    s = &s[1..s.iter().rposition(|b| *b == b'"').unwrap()];
    let mut result = Vec::new();
    while !s.is_empty() {
        match s[0] {
            b'\\' => {}
            b'\r' => {
                assert_eq!(s[1], b'\n');
                result.push(b'\n');
                s = &s[2..];
                continue;
            }
            b => {
                result.push(b);
                s = &s[1..];
                continue;
            }
        }
        let b = s[1];
        s = &s[2..];
        match b {
            b'x' => {
                let (b, rest) = backslash_x(&s);
                result.push(b);
                s = rest;
            }
            b'u' => {
                let (c, rest) = backslash_u(&s);
                result.extend_from_slice(c.encode_utf8(&mut [0; 4]).as_bytes());
                s = rest;
            }
            b'n' => result.push(b'\n'),
            b'r' => result.push(b'\r'),
            b't' => result.push(b'\t'),
            b'\\' => result.push(b'\\'),
            b'0' => result.push(b'\0'),
            b'\'' => result.push(b'\''),
            b'"' => result.push(b'"'),
            b'\r' | b'\n' => {
                let next = s.iter().position(|b| {
                    let ch = char::from_u32(u32::from(*b)).unwrap();
                    !ch.is_whitespace()
                });
                match next {
                    Some(pos) => s = &s[pos..],
                    None => s = b"",
                }
            }
            b => panic!("unexpected byte {:?} after \\", b),
        }
    }
    result
}

fn backslash_x(s: &[u8]) -> (u8, &[u8]) {
    let ch = hex_to_u8(s[0]) * 0x10 + hex_to_u8(s[1]);
    (ch, &s[2..])
}

fn hex_to_u8(b: u8) -> u8 {
    match b {
        b'0'..=b'9' => b - b'0',
        b'a'..=b'f' => b - b'a' + 10,
        b'A'..=b'F' => b - b'A' + 10,
        _ => unreachable!("unexpected non-hex character {:?} after \\x", b),
    }
}

fn backslash_u(s: &[u8]) -> (char, &[u8]) {
    assert_eq!(s[0], b'{');
    let end = s[1..].iter().position(|b| *b == b'}').unwrap();
    let mut ch = 0;
    for b in &s[1..=end] {
        ch *= 0x10;
        ch += u32::from(hex_to_u8(*b));
    }
    (char::from_u32(ch).unwrap(), &s[end + 2..])
}

fn parse_ident(ident: Ident) -> Vec<u8> {
    ident.to_string().into_bytes()
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::str::FromStr;

    // Tests below were modified from
    // https://github.com/dtolnay/syn/blob/cd5fdc0f530f822446fccaf831669cd0cf4a0fc9/tests/test_lit.rs

    fn lit(s: &str) -> Vec<u8> {
        match TokenStream::from_str(s)
            .unwrap()
            .into_iter()
            .next()
            .unwrap()
        {
            TokenTree::Literal(lit) => parse_literal(lit).unwrap(),
            _ => panic!(),
        }
    }

    #[test]
    fn strings() {
        #[track_caller]
        fn test_string(s: &str, value: &[u8]) {
            assert_eq!(lit(s), value);
        }

        test_string("\"a\"", b"a");
        test_string("\"\\n\"", b"\n");
        test_string("\"\\r\"", b"\r");
        test_string("\"\\t\"", b"\t");
        test_string("\"🐕\"", b"\xf0\x9f\x90\x95"); // NOTE: This is an emoji
        test_string("\"\\\"\"", b"\"");
        test_string("\"'\"", b"'");
        test_string("\"\"", b"");
        test_string("\"\\u{1F415}\"", b"\xf0\x9f\x90\x95");
        test_string(
            "\"contains\nnewlines\\\nescaped newlines\"",
            b"contains\nnewlinesescaped newlines",
        );
        test_string("r\"raw\nstring\\\nhere\"", b"raw\nstring\\\nhere");
        test_string("\"...\"q", b"...");
        test_string("r\"...\"q", b"...");
        test_string("r##\"...\"##q", b"...");
    }

    #[test]
    fn byte_strings() {
        #[track_caller]
        fn test_byte_string(s: &str, value: &[u8]) {
            assert_eq!(lit(s), value);
        }

        test_byte_string("b\"a\"", b"a");
        test_byte_string("b\"\\n\"", b"\n");
        test_byte_string("b\"\\r\"", b"\r");
        test_byte_string("b\"\\t\"", b"\t");
        test_byte_string("b\"\\\"\"", b"\"");
        test_byte_string("b\"'\"", b"'");
        test_byte_string("b\"\"", b"");
        test_byte_string(
            "b\"contains\nnewlines\\\nescaped newlines\"",
            b"contains\nnewlinesescaped newlines",
        );
        test_byte_string("br\"raw\nstring\\\nhere\"", b"raw\nstring\\\nhere");
        test_byte_string("b\"...\"q", b"...");
        test_byte_string("br\"...\"q", b"...");
        test_byte_string("br##\"...\"##q", b"...");
    }
}