castle_tokenizer/token_parsers/
parse_string.rs

1use std::io::Read;
2use castle_input_cursor::{Cursor, Position, Span};
3use castle_types::{Primitive, CastleError};
4
5use crate::{TokenKind, Token};
6
7pub fn parse_string(cursor: &mut Cursor<impl Read>, start_pos: Position) -> Result<Token, CastleError> {
8    let mut string = String::new();
9    // consume the "
10    cursor.next_char()?;
11
12    loop {
13        let c = cursor
14            .next_char()?
15            .ok_or(CastleError::syntax("unexpected end of file", cursor.pos()))?;
16
17        let ch = char::try_from(c)
18            .ok()
19            .ok_or(CastleError::syntax("invalid character", cursor.pos()))?;
20
21        // handle escape character \ (backslash)
22        if ch == '\\' {
23            // list of escape characters: (based on JSON)
24            // \b	Backspace (ascii 8)
25            // \f	Form feed (ascii 12)
26            // \n	New line
27            // \r	Carriage return
28            // \t	Horizontal tab
29            // \uXXXX	Character with 16 bit hex value XXXX
30            // \\     Backslash
31            // \/     Forward slash
32            // \"     Double quote
33
34            let c = cursor
35                .next_char()?
36                .ok_or(CastleError::syntax("unexpected end of file", cursor.pos()))?;
37
38            let ch = char::try_from(c)
39                .ok()
40                .ok_or(CastleError::syntax("invalid character", cursor.pos()))?;
41
42            match ch {
43                'b' => string.push('\u{0008}'),
44                'f' => string.push('\u{000C}'),
45                'n' => string.push('\n'),
46                'r' => string.push('\r'),
47                't' => string.push('\t'),
48                'u' => {
49                    let mut hex_string = String::new();
50                    for _ in 0..4 {
51                        let c = cursor
52                            .next_char()?
53                            .ok_or(CastleError::syntax("unexpected end of file", cursor.pos()))?;
54
55                        let ch = char::try_from(c)
56                            .ok()
57                            .ok_or(CastleError::syntax("invalid character", cursor.pos()))?;
58
59                        if ch.is_ascii_hexdigit() {
60                            hex_string.push(ch);
61                        } else {
62                            return Err(CastleError::syntax(
63                                "Invalid hexadecimal escape sequence: missing hexadecimal value",
64                                cursor.pos(),
65                            ));
66                        }
67                    }
68                    let hex_value = u32::from_str_radix(&hex_string, 16).unwrap();
69                    string.push(std::char::from_u32(hex_value).unwrap());
70                }
71                '\\' => string.push('\\'),
72                '/' => string.push('/'),
73                '"' => string.push('"'),
74                _ => {
75                    return Err(CastleError::syntax(
76                        format!("Invalid escape sequence: {}", c),
77                        cursor.pos(),
78                    ));
79                }
80            }
81        } else if ch == '"' {
82            break;
83        } else {
84            string.push(ch);
85        }
86    }
87
88    Ok(Token::new(
89        TokenKind::Primitive(Primitive::String(string.into_boxed_str())),
90        Span::new(start_pos, cursor.pos()),
91    ))
92}