darklua_core/nodes/expressions/
string_utils.rs

1use std::{fmt, iter::Peekable, str::CharIndices};
2
3#[derive(Debug, Clone)]
4enum StringErrorKind {
5    Invalid { message: String },
6    MalformedEscapeSequence { position: usize, message: String },
7}
8
9/// Represents an error that occurred while parsing a string.
10#[derive(Debug, Clone)]
11pub struct StringError {
12    kind: StringErrorKind,
13}
14
15impl fmt::Display for StringError {
16    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
17        match &self.kind {
18            StringErrorKind::Invalid { message } => {
19                write!(f, "invalid string: {}", message)
20            }
21            StringErrorKind::MalformedEscapeSequence { position, message } => {
22                write!(f, "malformed escape sequence at {}: {}", position, message)
23            }
24        }
25    }
26}
27
28impl StringError {
29    pub(crate) fn invalid(message: impl Into<String>) -> Self {
30        Self {
31            kind: StringErrorKind::Invalid {
32                message: message.into(),
33            },
34        }
35    }
36    pub(crate) fn malformed_escape_sequence(position: usize, message: impl Into<String>) -> Self {
37        Self {
38            kind: StringErrorKind::MalformedEscapeSequence {
39                position,
40                message: message.into(),
41            },
42        }
43    }
44}
45
46pub(crate) fn read_escaped_string(
47    chars: CharIndices,
48    reserve_size: Option<usize>,
49) -> Result<Vec<u8>, StringError> {
50    let mut chars = chars.peekable();
51
52    let mut value = Vec::new();
53    if let Some(reserve_size) = reserve_size {
54        value.reserve(reserve_size);
55    }
56
57    while let Some((position, char)) = chars.next() {
58        if char == '\\' {
59            if let Some((_, next_char)) = chars.next() {
60                match next_char {
61                    '\n' | '"' | '\'' | '\\' => value.push(next_char as u8),
62                    'n' => value.push(b'\n'),
63                    't' => value.push(b'\t'),
64                    'a' => value.extend("\u{7}".as_bytes()),
65                    'b' => value.extend("\u{8}".as_bytes()),
66                    'v' => value.extend("\u{B}".as_bytes()),
67                    'f' => value.extend("\u{C}".as_bytes()),
68                    'r' => value.push(b'\r'),
69                    first_digit if first_digit.is_ascii_digit() => {
70                        let number = read_number(&mut chars, Some(first_digit), 10, 3);
71
72                        if number < 256 {
73                            value.push(number as u8);
74                        } else {
75                            return Err(StringError::malformed_escape_sequence(
76                                position,
77                                "cannot escape ascii character greater than 256",
78                            ));
79                        }
80                    }
81                    'x' => {
82                        if let (Some(first_digit), Some(second_digit)) = (
83                            chars.next().map(|(_, c)| c).filter(char::is_ascii_hexdigit),
84                            chars.next().map(|(_, c)| c).filter(char::is_ascii_hexdigit),
85                        ) {
86                            let number = 16 * first_digit.to_digit(16).unwrap()
87                                + second_digit.to_digit(16).unwrap();
88
89                            if number < 256 {
90                                value.push(number as u8);
91                            } else {
92                                return Err(StringError::malformed_escape_sequence(
93                                    position,
94                                    "cannot escape ascii character greater than 256",
95                                ));
96                            }
97                        } else {
98                            return Err(StringError::malformed_escape_sequence(
99                                position,
100                                "exactly two hexadecimal digit expected",
101                            ));
102                        }
103                    }
104                    'u' => {
105                        if !contains(&chars.next().map(|(_, c)| c), &'{') {
106                            return Err(StringError::malformed_escape_sequence(
107                                position,
108                                "expected opening curly brace",
109                            ));
110                        }
111
112                        let number = read_number(&mut chars, None, 16, 8);
113
114                        if !contains(&chars.next().map(|(_, c)| c), &'}') {
115                            return Err(StringError::malformed_escape_sequence(
116                                position,
117                                "expected closing curly brace",
118                            ));
119                        }
120
121                        if number > 0x10FFFF {
122                            return Err(StringError::malformed_escape_sequence(
123                                position,
124                                "invalid unicode value",
125                            ));
126                        }
127
128                        let mut buf = [0u8; 4];
129
130                        value.extend(
131                            char::from_u32(number)
132                                .expect("unable to convert u32 to char")
133                                .encode_utf8(&mut buf)
134                                .as_bytes(),
135                        );
136                    }
137                    'z' => {
138                        while chars
139                            .peek()
140                            .filter(|(_, char)| char.is_ascii_whitespace())
141                            .is_some()
142                        {
143                            chars.next();
144                        }
145                    }
146                    _ => {
147                        // an invalid escape does not error: it simply skips the backslash
148                        let mut buf = [0u8; 4];
149
150                        value.extend(next_char.encode_utf8(&mut buf).as_bytes());
151                    }
152                }
153            } else {
154                return Err(StringError::malformed_escape_sequence(
155                    position,
156                    "string ended after '\\'",
157                ));
158            }
159        } else {
160            value.extend(char.to_string().as_bytes());
161        }
162    }
163
164    value.shrink_to_fit();
165
166    Ok(value)
167}
168
169fn read_number(
170    chars: &mut Peekable<CharIndices>,
171    first_digit: Option<char>,
172    radix: u32,
173    max: usize,
174) -> u32 {
175    let filter = match radix {
176        10 => char::is_ascii_digit,
177        16 => char::is_ascii_hexdigit,
178        _ => panic!("unsupported radix {}", radix),
179    };
180    let mut amount = first_digit
181        .map(|char| char.to_digit(radix).unwrap())
182        .unwrap_or(0);
183    let mut iteration_count: usize = first_digit.is_some().into();
184
185    while let Some(next_digit) = chars.peek().map(|(_, c)| *c).filter(filter) {
186        chars.next();
187
188        amount = amount * radix + next_digit.to_digit(radix).unwrap();
189        iteration_count += 1;
190
191        if iteration_count >= max {
192            break;
193        }
194    }
195
196    amount
197}
198
199#[inline]
200fn contains<T, U>(option: &Option<T>, x: &U) -> bool
201where
202    U: PartialEq<T>,
203{
204    match option {
205        Some(y) => x == y,
206        None => false,
207    }
208}