emmylua_parser/syntax/node/token/
string_analyzer.rs

1use crate::{kind::LuaTokenKind, parser_error::LuaParseError, LuaKind, LuaSyntaxToken};
2
3pub fn string_token_value(token: &LuaSyntaxToken) -> Result<String, LuaParseError> {
4    match LuaKind::from(token.kind()) {
5        LuaKind::Token(LuaTokenKind::TkString) => normal_string_value(token),
6        LuaKind::Token(LuaTokenKind::TkLongString) => long_string_value(token),
7        _ => unreachable!(),
8    }
9}
10
11fn long_string_value(token: &LuaSyntaxToken) -> Result<String, LuaParseError> {
12    let range = token.text_range();
13    let text = token.text();
14    if text.len() < 4 {
15        return Err(LuaParseError::new(&t!("String too short"), range));
16    }
17
18    let mut equal_num = 0;
19    let mut i = 0;
20    let mut chars = text.char_indices();
21
22    // check first char
23    if let Some((_, first_char)) = chars.next() {
24        if first_char != '[' {
25            return Err(LuaParseError::new(
26                &t!(
27                    "Invalid long string start, expected '[', found '%{char}'",
28                    char = first_char
29                ),
30                range,
31            ));
32        }
33    } else {
34        return Err(LuaParseError::new(
35            &t!("Invalid long string start, expected '[', found end of input"),
36            range,
37        ));
38    }
39
40    while let Some((idx, c)) = chars.next() {
41        // calc eq num
42        if c == '=' {
43            equal_num += 1;
44        } else if c == '[' {
45            i = idx + 1;
46            break;
47        } else {
48            return Err(LuaParseError::new(&t!("Invalid long string start"), range));
49        }
50    }
51
52    // check string len is enough
53    if text.len() < i + equal_num + 2 {
54        return Err(LuaParseError::new(
55            &t!(
56                "Invalid long string end, expected '%{eq}]'",
57                eq = "=".repeat(equal_num)
58            ),
59            range,
60        ));
61    }
62
63    // lua special rule for long string
64    if let Some((_, first_content_char)) = chars.next() {
65        if first_content_char == '\r' {
66            if let Some((_, next_char)) = chars.next() {
67                if next_char == '\n' {
68                    i += 2;
69                } else {
70                    i += 1;
71                }
72            }
73        } else if first_content_char == '\n' {
74            i += 1;
75        }
76    }
77
78    let content = &text[i..(text.len() - equal_num - 2)];
79
80    Ok(content.to_string())
81}
82
83fn normal_string_value(token: &LuaSyntaxToken) -> Result<String, LuaParseError> {
84    let text = token.text();
85    if text.len() < 2 {
86        return Ok(String::new());
87    }
88
89    let mut result = String::with_capacity(text.len() - 2);
90    let mut chars = text.chars().peekable();
91    let delimiter = chars.next().unwrap();
92
93    while let Some(c) = chars.next() {
94        match c {
95            '\\' => {
96                if let Some(next_char) = chars.next() {
97                    match next_char {
98                        'a' => result.push('\u{0007}'), // Bell
99                        'b' => result.push('\u{0008}'), // Backspace
100                        'f' => result.push('\u{000C}'), // Formfeed
101                        'n' => result.push('\n'),       // Newline
102                        'r' => result.push('\r'),       // Carriage return
103                        't' => result.push('\t'),       // Horizontal tab
104                        'v' => result.push('\u{000B}'), // Vertical tab
105                        'x' => {
106                            // Hexadecimal escape sequence
107                            let hex = chars.by_ref().take(2).collect::<String>();
108                            if hex.len() == 2 && hex.chars().all(|c| c.is_ascii_hexdigit()) {
109                                if let Ok(value) = u8::from_str_radix(&hex, 16) {
110                                    result.push(value as char);
111                                }
112                            } else {
113                                return Err(LuaParseError::new(
114                                    &t!("Invalid hex escape sequence '\\x%{hex}'", hex = hex),
115                                    token.text_range(),
116                                ));
117                            }
118                        }
119                        'u' => {
120                            // Unicode escape sequence
121                            if let Some('{') = chars.next() {
122                                let unicode_hex =
123                                    chars.by_ref().take_while(|c| *c != '}').collect::<String>();
124                                if let Ok(code_point) = u32::from_str_radix(&unicode_hex, 16) {
125                                    if let Some(unicode_char) = std::char::from_u32(code_point) {
126                                        result.push(unicode_char);
127                                    } else {
128                                        return Err(LuaParseError::new(
129                                            &t!(
130                                                "Invalid unicode escape sequence '\\u{{%{unicode_hex}}}'",
131                                                unicode_hex = unicode_hex
132                                            ),
133                                            token.text_range(),
134                                        ));
135                                    }
136                                }
137                            }
138                        }
139                        '0'..='9' => {
140                            // Decimal escape sequence
141                            let mut dec = String::new();
142                            dec.push(next_char);
143                            for _ in 0..2 {
144                                if let Some(digit) = chars.peek() {
145                                    if digit.is_digit(10) {
146                                        dec.push(*digit);
147                                    } else {
148                                        break;
149                                    }
150                                    chars.next();
151                                }
152                            }
153                            if let Ok(value) = u8::from_str_radix(&dec, 10) {
154                                result.push(value as char);
155                            }
156                        }
157                        '\\' | '\'' | '\"' => result.push(next_char),
158                        'z' => {
159                            // Skip whitespace
160                            while let Some(c) = chars.peek() {
161                                if !c.is_whitespace() {
162                                    break;
163                                }
164                                chars.next();
165                            }
166                        }
167                        '\r' | '\n' => {
168                            result.push(next_char);
169                        }
170                        _ => {
171                            return Err(LuaParseError::new(
172                                &t!("Invalid escape sequence '\\%{char}'", char = next_char),
173                                token.text_range(),
174                            ));
175                        }
176                    }
177                }
178            }
179            _ => {
180                if c == delimiter {
181                    break;
182                }
183                result.push(c);
184            }
185        }
186    }
187
188    Ok(result)
189}