darklua_core/nodes/expressions/
string.rs

1use crate::{
2    generator::utils::write_string,
3    nodes::{StringError, Token},
4};
5
6use super::string_utils;
7
8/// Represents a string literal in Lua source code.
9///
10/// String literals in Lua can be written with single quotes, double quotes,
11/// or with long brackets (`[[...]]` or `[=[...]=]` etc.) for multi-line strings.
12#[derive(Clone, PartialEq, Eq)]
13pub struct StringExpression {
14    value: Vec<u8>,
15    token: Option<Token>,
16}
17
18impl std::fmt::Debug for StringExpression {
19    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
20        f.debug_struct("StringExpression")
21            .field("token", &self.token)
22            .field("value", &{
23                if let Ok(s) = str::from_utf8(&self.value) {
24                    format!("{:?}", s)
25                } else {
26                    let escaped = self
27                        .value
28                        .iter()
29                        .flat_map(|&b| {
30                            if b <= 0x7f {
31                                vec![b as char]
32                            } else {
33                                format!("\\x{:02x}", b).chars().collect()
34                            }
35                        })
36                        .collect::<String>();
37                    format!("{:?}", escaped)
38                }
39            })
40            .finish()
41    }
42}
43
44impl StringExpression {
45    /// Creates a new `StringExpression` from a raw Lua string literal.
46    ///
47    /// Handles quoted strings (with either ' or " delimiters), long bracket strings,
48    /// and processes escape sequences in quoted strings.
49    ///
50    /// ```
51    /// # use darklua_core::nodes::StringExpression;
52    /// let single_quoted = StringExpression::new("'hello'").unwrap();
53    /// let double_quoted = StringExpression::new("\"world\"").unwrap();
54    /// let bracket_string = StringExpression::new("[[multi\nline]]").unwrap();
55    /// ```
56    pub fn new(string: &str) -> Result<Self, StringError> {
57        if string.starts_with('[') {
58            return string
59                .chars()
60                .skip(1)
61                .enumerate()
62                .find_map(|(indice, character)| if character == '[' { Some(indice) } else { None })
63                .ok_or_else(|| StringError::invalid("unable to find `[` delimiter"))
64                .and_then(|indice| {
65                    let length = 2 + indice;
66                    let start = if string
67                        .get(length..length + 1)
68                        .filter(|char| char == &"\n")
69                        .is_some()
70                    {
71                        length + 1
72                    } else {
73                        length
74                    };
75                    string
76                        .get(start..string.len() - length)
77                        .map(str::to_owned)
78                        .ok_or_else(|| StringError::invalid(""))
79                })
80                .map(Self::from_value);
81        }
82
83        let mut chars = string.char_indices();
84
85        match (chars.next(), chars.next_back()) {
86            (Some((_, '"')), Some((_, '"'))) | (Some((_, '\'')), Some((_, '\''))) => {
87                string_utils::read_escaped_string(chars, Some(string.len())).map(Self::from_value)
88            }
89            (Some((_, '"')), Some((_, '\''))) | (Some((_, '\'')), Some((_, '"'))) => {
90                Err(StringError::invalid("quotes do not match"))
91            }
92            _ => Err(StringError::invalid("missing quotes")),
93        }
94    }
95
96    /// Creates an empty string expression.
97    pub fn empty() -> Self {
98        Self {
99            value: b"".to_vec(),
100            token: None,
101        }
102    }
103
104    /// Creates a new `StringExpression` from a string value.
105    pub fn from_value(value: impl IntoLuaStringValue) -> Self {
106        Self {
107            value: value.into_lua_string_value(),
108            token: None,
109        }
110    }
111
112    /// Attaches a token to this string expression.
113    pub fn with_token(mut self, token: Token) -> Self {
114        self.token = Some(token);
115        self
116    }
117
118    /// Sets the token for this string expression.
119    #[inline]
120    pub fn set_token(&mut self, token: Token) {
121        self.token = Some(token);
122    }
123
124    /// Returns the token associated with this string expression, if any.
125    #[inline]
126    pub fn get_token(&self) -> Option<&Token> {
127        self.token.as_ref()
128    }
129
130    /// Returns a mutable reference to the token attached to this string expression,
131    /// creating it if missing.
132    pub(crate) fn mutate_or_insert_token(&mut self) -> &mut Token {
133        if self.token.is_none() {
134            let content = write_string(&self.value);
135            self.token = Some(Token::from_content(content));
136        }
137        self.token.as_mut().unwrap()
138    }
139
140    /// Returns the string value.
141    #[inline]
142    pub fn get_value(&self) -> &[u8] {
143        &self.value
144    }
145
146    /// Returns the string value if it is valid UTF-8.
147    #[inline]
148    pub fn get_string_value(&self) -> Option<&str> {
149        str::from_utf8(&self.value).ok()
150    }
151
152    /// Consumes the string expression and returns the inner string value.
153    #[inline]
154    pub fn into_value(self) -> Vec<u8> {
155        self.value
156    }
157
158    /// Consumes the string expression and returns the inner string value if it is valid UTF-8.
159    #[inline]
160    pub fn into_string(self) -> Option<String> {
161        String::from_utf8(self.value).ok()
162    }
163
164    /// Checks if the string contains newline characters.
165    pub fn is_multiline(&self) -> bool {
166        self.value.contains(&b'\n')
167    }
168
169    /// Checks if the string contains single quotes.
170    ///
171    /// Useful when determining the best quote style to use when serializing the string.
172    pub fn has_single_quote(&self) -> bool {
173        self.find_not_escaped(b'\'').is_some()
174    }
175
176    /// Checks if the string contains double quotes.
177    ///
178    /// Useful when determining the best quote style to use when serializing the string.
179    pub fn has_double_quote(&self) -> bool {
180        self.find_not_escaped(b'"').is_some()
181    }
182
183    fn find_not_escaped(&self, pattern: u8) -> Option<usize> {
184        self.find_not_escaped_from(pattern, &mut self.value.iter().copied().enumerate())
185    }
186
187    fn find_not_escaped_from(
188        &self,
189        pattern: u8,
190        mut chars: impl Iterator<Item = (usize, u8)>,
191    ) -> Option<usize> {
192        let mut escaped = false;
193        chars.find_map(|(index, character)| {
194            if escaped {
195                escaped = false;
196                None
197            } else {
198                match character {
199                    b'\\' => {
200                        escaped = true;
201                        None
202                    }
203                    value => {
204                        if value == pattern {
205                            Some(index)
206                        } else {
207                            None
208                        }
209                    }
210                }
211            }
212        })
213    }
214
215    super::impl_token_fns!(iter = [token]);
216}
217
218/// Trait for converting string related values into a Lua string value.
219pub trait IntoLuaStringValue {
220    fn into_lua_string_value(self) -> Vec<u8>;
221}
222
223impl IntoLuaStringValue for String {
224    fn into_lua_string_value(self) -> Vec<u8> {
225        self.into_bytes()
226    }
227}
228
229impl IntoLuaStringValue for &String {
230    fn into_lua_string_value(self) -> Vec<u8> {
231        self.as_bytes().to_vec()
232    }
233}
234
235impl IntoLuaStringValue for &str {
236    fn into_lua_string_value(self) -> Vec<u8> {
237        self.as_bytes().to_vec()
238    }
239}
240
241impl IntoLuaStringValue for Vec<u8> {
242    fn into_lua_string_value(self) -> Vec<u8> {
243        self
244    }
245}
246
247impl IntoLuaStringValue for &[u8] {
248    fn into_lua_string_value(self) -> Vec<u8> {
249        self.to_vec()
250    }
251}
252
253impl<const N: usize> IntoLuaStringValue for [u8; N] {
254    fn into_lua_string_value(self) -> Vec<u8> {
255        self.to_vec()
256    }
257}
258
259impl<const N: usize> IntoLuaStringValue for &[u8; N] {
260    fn into_lua_string_value(self) -> Vec<u8> {
261        self.to_vec()
262    }
263}
264
265impl IntoLuaStringValue for char {
266    fn into_lua_string_value(self) -> Vec<u8> {
267        let mut buf = [0u8; 4];
268        self.encode_utf8(&mut buf).as_bytes().to_vec()
269    }
270}
271
272#[cfg(test)]
273mod test {
274    use super::*;
275
276    macro_rules! test_quoted {
277        ($($name:ident($input:literal) => $value:literal),* $(,)?) => {
278            mod single_quoted {
279                use super::*;
280                $(
281                    #[test]
282                    fn $name() {
283                        let quoted = format!("'{}'", $input);
284                        assert_eq!(
285                            StringExpression::new(&quoted)
286                                .expect("unable to parse string")
287                                .get_value(),
288                            StringExpression::from_value($value).get_value(),
289                        );
290                    }
291                )*
292            }
293
294            mod double_quoted {
295                use super::*;
296                $(
297                    #[test]
298                    fn $name() {
299                        let quoted = format!("\"{}\"", $input);
300                        assert_eq!(
301                            StringExpression::new(&quoted)
302                                .expect("unable to parse string")
303                                .get_value(),
304                            StringExpression::from_value($value).get_value(),
305                        );
306                    }
307                )*
308            }
309        };
310    }
311
312    test_quoted!(
313        empty("") => "",
314        hello("hello") => "hello",
315        escaped_new_line("\\n") => "\n",
316        escaped_tab("\\t") => "\t",
317        escaped_backslash("\\\\") => "\\",
318        escaped_carriage_return("\\r") => "\r",
319        escaped_bell("\\a") => "\u{7}",
320        escaped_backspace("\\b") => "\u{8}",
321        escaped_vertical_tab("\\v") => "\u{B}",
322        escaped_form_feed("\\f") => "\u{C}",
323        escaped_null("\\0") => "\0",
324        escaped_two_digits("\\65") => "A",
325        escaped_three_digits("\\123") => "{",
326        escaped_null_hex("\\x00") => "\0",
327        escaped_uppercase_a_hex("\\x41") => "A",
328        escaped_tilde_hex_uppercase("\\x7E") => "~",
329        escaped_tilde_hex_lowercase("\\x7e") => "~",
330        skips_whitespaces_but_no_spaces("\\z") => "",
331        skips_whitespaces("a\\z   \n\n   \\nb") => "a\nb",
332        escaped_176("\\176") => b"\xB0",
333        escaped_unicode_single_digit("\\u{0}") => "\0",
334        escaped_unicode_two_hex_digits("\\u{AB}") => "\u{AB}",
335        escaped_unicode_three_digit("\\u{123}") => "\u{123}",
336        escaped_unicode_last_value("\\u{10FFFF}") => "\u{10FFFF}",
337    );
338
339    mod invalid_string_errors {
340        use super::*;
341
342        #[test]
343        fn double_quoted_single_backslash() {
344            insta::assert_snapshot!(StringExpression::new("\"\\\"").unwrap_err().to_string(), @r###"malformed escape sequence at 1: string ended after '\'"###);
345        }
346
347        #[test]
348        fn single_quoted_single_backslash() {
349            insta::assert_snapshot!(StringExpression::new("'\\'").unwrap_err().to_string(), @r###"malformed escape sequence at 1: string ended after '\'"###);
350        }
351
352        #[test]
353        fn double_quoted_escaped_too_large_ascii() {
354            insta::assert_snapshot!(StringExpression::new("\"\\256\"").unwrap_err().to_string(), @"malformed escape sequence at 1: cannot escape ascii character greater than 256");
355        }
356
357        #[test]
358        fn single_quoted_escaped_too_large_ascii() {
359            insta::assert_snapshot!(StringExpression::new("'\\256'").unwrap_err().to_string(), @"malformed escape sequence at 1: cannot escape ascii character greater than 256");
360        }
361
362        #[test]
363        fn double_quoted_escaped_too_large_unicode() {
364            insta::assert_snapshot!(StringExpression::new("\"\\u{110000}\"").unwrap_err().to_string(), @"malformed escape sequence at 1: invalid unicode value");
365        }
366
367        #[test]
368        fn single_quoted_escaped_too_large_unicode() {
369            insta::assert_snapshot!(StringExpression::new("'\\u{110000}'").unwrap_err().to_string(), @"malformed escape sequence at 1: invalid unicode value");
370        }
371
372        #[test]
373        fn double_quoted_escaped_missing_opening_brace_unicode() {
374            insta::assert_snapshot!(StringExpression::new("\"\\uAB\"").unwrap_err().to_string(), @"malformed escape sequence at 1: expected opening curly brace");
375        }
376
377        #[test]
378        fn single_quoted_escaped_missing_opening_brace_unicode() {
379            insta::assert_snapshot!(StringExpression::new("'\\uAB'").unwrap_err().to_string(), @"malformed escape sequence at 1: expected opening curly brace");
380        }
381
382        #[test]
383        fn double_quoted_escaped_missing_closing_brace_unicode() {
384            insta::assert_snapshot!(StringExpression::new("\"\\u{0p\"").unwrap_err().to_string(), @"malformed escape sequence at 1: expected closing curly brace");
385        }
386
387        #[test]
388        fn single_quoted_escaped_missing_closing_brace_unicode() {
389            insta::assert_snapshot!(StringExpression::new("'\\u{0p'").unwrap_err().to_string(), @"malformed escape sequence at 1: expected closing curly brace");
390        }
391
392        #[test]
393        fn empty_string() {
394            insta::assert_snapshot!(StringExpression::new("").unwrap_err().to_string(), @"invalid string: missing quotes");
395        }
396
397        #[test]
398        fn missing_quotes() {
399            insta::assert_snapshot!(StringExpression::new("hello").unwrap_err().to_string(), @"invalid string: missing quotes");
400        }
401
402        #[test]
403        fn delimiters_matching_but_not_quotes() {
404            insta::assert_snapshot!(StringExpression::new("aa").unwrap_err().to_string(), @"invalid string: missing quotes");
405        }
406
407        #[test]
408        fn single_quote() {
409            insta::assert_snapshot!(StringExpression::new("'").unwrap_err().to_string(), @"invalid string: missing quotes");
410        }
411
412        #[test]
413        fn double_quote() {
414            insta::assert_snapshot!(StringExpression::new("\"").unwrap_err().to_string(), @"invalid string: missing quotes");
415        }
416
417        #[test]
418        fn quotes_not_matching() {
419            insta::assert_snapshot!(StringExpression::new("'\"").unwrap_err().to_string(), @"invalid string: quotes do not match");
420        }
421    }
422
423    #[test]
424    fn new_removes_double_quotes() {
425        let string = StringExpression::new(r#""hello""#).unwrap();
426
427        assert_eq!(string.get_value(), b"hello");
428    }
429
430    #[test]
431    fn new_removes_single_quotes() {
432        let string = StringExpression::new("'hello'").unwrap();
433
434        assert_eq!(string.get_value(), b"hello");
435    }
436
437    #[test]
438    fn new_removes_double_brackets() {
439        let string = StringExpression::new("[[hello]]").unwrap();
440
441        assert_eq!(string.get_value(), b"hello");
442    }
443
444    #[test]
445    fn new_removes_double_brackets_and_skip_first_new_line() {
446        let string = StringExpression::new("[[\nhello]]").unwrap();
447
448        assert_eq!(string.get_value(), b"hello");
449    }
450
451    #[test]
452    fn new_removes_double_brackets_with_one_equals() {
453        let string = StringExpression::new("[=[hello]=]").unwrap();
454
455        assert_eq!(string.get_value(), b"hello");
456    }
457
458    #[test]
459    fn new_removes_double_brackets_with_multiple_equals() {
460        let string = StringExpression::new("[==[hello]==]").unwrap();
461
462        assert_eq!(string.get_value(), b"hello");
463    }
464
465    #[test]
466    fn new_skip_invalid_escape_in_double_quoted_string() {
467        let string = StringExpression::new("'\\oo'").unwrap();
468
469        assert_eq!(string.get_value(), b"oo");
470    }
471
472    #[test]
473    fn new_skip_invalid_escape_in_single_quoted_string() {
474        let string = StringExpression::new("\"\\oo\"").unwrap();
475
476        assert_eq!(string.get_value(), b"oo");
477    }
478
479    #[test]
480    fn has_single_quote_is_false_if_no_single_quotes() {
481        let string = StringExpression::from_value("hello");
482
483        assert!(!string.has_single_quote());
484    }
485
486    #[test]
487    fn has_single_quote_is_true_if_unescaped_single_quotes() {
488        let string = StringExpression::from_value("don't");
489
490        assert!(string.has_single_quote());
491    }
492
493    #[test]
494    fn has_single_quote_is_true_if_unescaped_single_quotes_with_escaped_backslash() {
495        let string = StringExpression::from_value(r"don\\'t");
496
497        assert!(string.has_single_quote());
498    }
499
500    #[test]
501    fn has_single_quote_is_false_if_escaped_single_quotes() {
502        let string = StringExpression::from_value(r"don\'t");
503
504        assert!(!string.has_single_quote());
505    }
506
507    #[test]
508    fn has_double_quote_is_false_if_no_double_quotes() {
509        let string = StringExpression::from_value("hello");
510
511        assert!(!string.has_double_quote());
512    }
513
514    #[test]
515    fn has_double_quote_is_true_if_unescaped_double_quotes() {
516        let string = StringExpression::from_value(r#"Say: "Hi!""#);
517
518        assert!(string.has_double_quote());
519    }
520
521    #[test]
522    fn has_double_quote_is_false_if_escaped_double_quotes() {
523        let string = StringExpression::from_value(r#"hel\"o"#);
524
525        assert!(!string.has_double_quote());
526    }
527}