Skip to main content

shape_ast/parser/
string_literals.rs

1//! String literal decoding helpers.
2//!
3//! Supports:
4//! - simple strings: `"text"`
5//! - triple strings: `"""multiline"""`
6
7use crate::ast::InterpolationMode;
8use crate::error::{Result, ShapeError};
9
10#[derive(Debug, Clone, PartialEq, Eq)]
11pub struct ParsedStringLiteral {
12    pub value: String,
13    pub interpolation_mode: Option<InterpolationMode>,
14}
15
16/// Decode a parsed string literal (including surrounding quotes) into its runtime content.
17pub fn parse_string_literal(raw: &str) -> Result<String> {
18    Ok(parse_string_literal_with_kind(raw)?.value)
19}
20
21/// Decode a parsed string literal and report whether it used the `f` prefix.
22pub fn parse_string_literal_with_kind(raw: &str) -> Result<ParsedStringLiteral> {
23    let (interpolation_mode, unprefixed) = strip_interpolation_prefix(raw);
24    let is_interpolated = interpolation_mode.is_some();
25    let value = if is_triple_quoted(unprefixed) {
26        parse_triple_quoted(unprefixed)
27    } else if is_simple_quoted(unprefixed) {
28        parse_simple_quoted(&unprefixed[1..unprefixed.len() - 1], is_interpolated)?
29    } else {
30        unprefixed.to_string()
31    };
32    Ok(ParsedStringLiteral {
33        value,
34        interpolation_mode,
35    })
36}
37
38/// Strip `f`/`f$`/`f#` prefix and return (mode, rest).
39fn strip_interpolation_prefix(raw: &str) -> (Option<InterpolationMode>, &str) {
40    if raw.starts_with("f$") && raw.get(2..).is_some_and(|rest| rest.starts_with('"')) {
41        (Some(InterpolationMode::Dollar), &raw[2..])
42    } else if raw.starts_with("f#") && raw.get(2..).is_some_and(|rest| rest.starts_with('"')) {
43        (Some(InterpolationMode::Hash), &raw[2..])
44    } else if raw.starts_with('f') && raw.get(1..).is_some_and(|rest| rest.starts_with('"')) {
45        (Some(InterpolationMode::Braces), &raw[1..])
46    } else {
47        (None, raw)
48    }
49}
50
51fn is_simple_quoted(raw: &str) -> bool {
52    raw.len() >= 2 && raw.starts_with('"') && raw.ends_with('"')
53}
54
55fn is_triple_quoted(raw: &str) -> bool {
56    raw.len() >= 6 && raw.starts_with("\"\"\"") && raw.ends_with("\"\"\"")
57}
58
59fn parse_triple_quoted(raw: &str) -> String {
60    // Normalize line endings first so trimming rules are deterministic.
61    let normalized = raw[3..raw.len() - 3].replace("\r\n", "\n");
62    let mut lines: Vec<&str> = normalized.split('\n').collect();
63
64    // Ignore delimiter-adjacent blank lines when they only contain whitespace.
65    if lines.first().is_some_and(|line| line.trim().is_empty()) {
66        lines.remove(0);
67    }
68    if lines.last().is_some_and(|line| line.trim().is_empty()) {
69        lines.pop();
70    }
71
72    let common_indent = lines
73        .iter()
74        .filter(|line| !line.trim().is_empty())
75        .map(|line| leading_indent(line))
76        .min()
77        .unwrap_or(0);
78
79    lines
80        .into_iter()
81        .map(|line| {
82            if line.trim().is_empty() {
83                String::new()
84            } else {
85                line.chars().skip(common_indent).collect()
86            }
87        })
88        .collect::<Vec<String>>()
89        .join("\n")
90}
91
92/// Decode escape sequences in a simple quoted string.
93///
94/// When `preserve_brace_escapes` is true (for f-strings / c-strings), `\{` and
95/// `\}` are kept as-is so the downstream interpolation parser can treat them as
96/// literal brace escapes rather than interpolation delimiters.
97fn parse_simple_quoted(inner: &str, preserve_brace_escapes: bool) -> Result<String> {
98    let mut out = String::with_capacity(inner.len());
99    let mut chars = inner.chars();
100
101    while let Some(ch) = chars.next() {
102        if ch != '\\' {
103            out.push(ch);
104            continue;
105        }
106
107        let Some(escaped) = chars.next() else {
108            out.push('\\');
109            break;
110        };
111
112        match escaped {
113            'n' => out.push('\n'),
114            't' => out.push('\t'),
115            'r' => out.push('\r'),
116            '0' => out.push('\0'),
117            '\\' => out.push('\\'),
118            '"' => out.push('"'),
119            '\'' => out.push('\''),
120            '{' | '}' | '$' | '#' if preserve_brace_escapes => {
121                // Keep `\{`, `\}`, `\$`, `\#` verbatim for the interpolation parser
122                out.push('\\');
123                out.push(escaped);
124            }
125            '{' => out.push('{'),
126            '}' => out.push('}'),
127            '$' => out.push('$'),
128            '#' => out.push('#'),
129            other => {
130                return Err(ShapeError::ParseError {
131                    message: format!(
132                        "unknown escape sequence '\\{}', expected one of: \\n, \\t, \\r, \\\\, \\\", \\', \\0, \\{{, \\}}, \\$, \\#",
133                        other
134                    ),
135                    location: None,
136                });
137            }
138        }
139    }
140
141    Ok(out)
142}
143
144fn leading_indent(line: &str) -> usize {
145    line.chars()
146        .take_while(|ch| *ch == ' ' || *ch == '\t')
147        .count()
148}
149
150#[cfg(test)]
151mod tests {
152    use super::{parse_string_literal, parse_string_literal_with_kind};
153    use crate::ast::InterpolationMode;
154
155    #[test]
156    fn simple_string_is_unwrapped() {
157        assert_eq!(parse_string_literal("\"hello\"").unwrap(), "hello");
158    }
159
160    #[test]
161    fn triple_string_trims_delimiter_blank_lines_and_dedent() {
162        let raw = "\"\"\"\n        this\n        is\n        a\n        multiline\n        \"\"\"";
163        assert_eq!(parse_string_literal(raw).unwrap(), "this\nis\na\nmultiline");
164    }
165
166    #[test]
167    fn triple_string_preserves_relative_indentation() {
168        let raw =
169            "\"\"\"\n            root\n              nested\n            end\n            \"\"\"";
170        assert_eq!(parse_string_literal(raw).unwrap(), "root\n  nested\nend");
171    }
172
173    #[test]
174    fn triple_string_keeps_inline_form() {
175        let raw = "\"\"\"a\n  b\"\"\"";
176        assert_eq!(parse_string_literal(raw).unwrap(), "a\n  b");
177    }
178
179    #[test]
180    fn formatted_simple_string_sets_formatted_flag() {
181        let parsed = parse_string_literal_with_kind("f\"value: {x}\"").unwrap();
182        assert_eq!(parsed.interpolation_mode, Some(InterpolationMode::Braces));
183        assert_eq!(parsed.value, "value: {x}");
184    }
185
186    #[test]
187    fn formatted_triple_string_sets_formatted_flag() {
188        let parsed = parse_string_literal_with_kind("f\"\"\"\n  x\n\"\"\"").unwrap();
189        assert_eq!(parsed.interpolation_mode, Some(InterpolationMode::Braces));
190        assert_eq!(parsed.value, "x");
191    }
192
193    #[test]
194    fn formatted_triple_string_preserves_relative_indentation() {
195        let parsed = parse_string_literal_with_kind(
196            "f\"\"\"\n            value:\n              {33+1}\n            \"\"\"",
197        )
198        .unwrap();
199        assert_eq!(parsed.interpolation_mode, Some(InterpolationMode::Braces));
200        assert_eq!(parsed.value, "value:\n  {33+1}");
201    }
202
203    #[test]
204    fn formatted_dollar_prefix_sets_mode() {
205        let parsed = parse_string_literal_with_kind("f$\"value: ${x}\"").unwrap();
206        assert_eq!(parsed.interpolation_mode, Some(InterpolationMode::Dollar));
207        assert_eq!(parsed.value, "value: ${x}");
208    }
209
210    #[test]
211    fn formatted_hash_prefix_sets_mode() {
212        let parsed = parse_string_literal_with_kind("f#\"value: #{x}\"").unwrap();
213        assert_eq!(parsed.interpolation_mode, Some(InterpolationMode::Hash));
214        assert_eq!(parsed.value, "value: #{x}");
215    }
216
217    #[test]
218    fn simple_string_decodes_common_escapes() {
219        let parsed = parse_string_literal_with_kind("\"a\\n\\t\\\"b\\\\c\"").unwrap();
220        assert_eq!(parsed.interpolation_mode, None);
221        assert_eq!(parsed.value, "a\n\t\"b\\c");
222    }
223
224    // --- User-specified multiline triple-string behavior ---
225
226    #[test]
227    fn triple_string_multiline_with_relative_indent() {
228        let raw = "\"\"\"\n            this is\n            a multiline\n            string.\n              -it should indent\n              -but remove the block spaces\n            \"\"\"";
229        assert_eq!(
230            parse_string_literal(raw).unwrap(),
231            "this is\na multiline\nstring.\n  -it should indent\n  -but remove the block spaces"
232        );
233    }
234
235    #[test]
236    fn triple_string_inline_with_inner_quotes() {
237        let raw = "\"\"\"a string with quotes\"\"\"";
238        assert_eq!(parse_string_literal(raw).unwrap(), "a string with quotes");
239    }
240
241    #[test]
242    fn triple_string_inline_with_single_inner_quote() {
243        let raw = "\"\"\"she said \"hello\" today\"\"\"";
244        assert_eq!(
245            parse_string_literal(raw).unwrap(),
246            "she said \"hello\" today"
247        );
248    }
249
250    #[test]
251    fn triple_string_no_leading_trailing_newline() {
252        let raw = "\"\"\"\n  hello world\n  \"\"\"";
253        let result = parse_string_literal(raw).unwrap();
254        assert!(
255            !result.starts_with('\n'),
256            "should not start with newline, got: {:?}",
257            result
258        );
259        assert!(
260            !result.ends_with('\n'),
261            "should not end with newline, got: {:?}",
262            result
263        );
264        assert_eq!(result, "hello world");
265    }
266
267    #[test]
268    fn triple_string_empty_lines_preserved_in_middle() {
269        let raw = "\"\"\"\n    first\n\n    last\n    \"\"\"";
270        assert_eq!(parse_string_literal(raw).unwrap(), "first\n\nlast");
271    }
272
273    #[test]
274    fn triple_string_does_not_process_escape_sequences() {
275        let raw = "\"\"\"\n    line with \\n in it\n    \"\"\"";
276        let result = parse_string_literal(raw).unwrap();
277        assert_eq!(result, "line with \\n in it");
278    }
279
280    #[test]
281    fn simple_string_escape_newline() {
282        assert_eq!(
283            parse_string_literal("\"hello\\nworld\"").unwrap(),
284            "hello\nworld"
285        );
286    }
287
288    #[test]
289    fn simple_string_escape_tab() {
290        assert_eq!(
291            parse_string_literal("\"col1\\tcol2\"").unwrap(),
292            "col1\tcol2"
293        );
294    }
295
296    #[test]
297    fn simple_string_escape_backslash() {
298        assert_eq!(
299            parse_string_literal("\"path\\\\file\"").unwrap(),
300            "path\\file"
301        );
302    }
303
304    #[test]
305    fn simple_string_escape_quote() {
306        assert_eq!(
307            parse_string_literal("\"say \\\"hi\\\"\"").unwrap(),
308            "say \"hi\""
309        );
310    }
311
312    #[test]
313    fn simple_string_unknown_escape_is_error() {
314        // BUG-12: Unknown escape sequences must produce an error
315        let result = parse_string_literal("\"hello\\q\"");
316        assert!(result.is_err(), "expected error for unknown escape \\q");
317        let err_msg = result.unwrap_err().to_string();
318        assert!(
319            err_msg.contains("unknown escape sequence"),
320            "error should mention 'unknown escape sequence', got: {}",
321            err_msg
322        );
323        assert!(
324            err_msg.contains("\\q"),
325            "error should mention the bad escape \\q, got: {}",
326            err_msg
327        );
328    }
329
330    #[test]
331    fn simple_string_unknown_escape_x_is_error() {
332        // \x is not a supported escape sequence (no hex escape support yet)
333        let result = parse_string_literal("\"\\x41\"");
334        assert!(result.is_err(), "expected error for unsupported \\x escape");
335    }
336
337    #[test]
338    fn simple_string_escape_null() {
339        // \0 should produce a null byte
340        assert_eq!(parse_string_literal("\"a\\0b\"").unwrap(), "a\0b");
341    }
342
343    // --- LOW-2: f-string backslash-escaped braces ---
344
345    #[test]
346    fn fstring_backslash_brace_preserves_literal_brace() {
347        // f"hello \{world\}" should produce value with preserved \{ and \}
348        // so the interpolation parser sees them as literal braces, not interpolation.
349        let parsed = parse_string_literal_with_kind("f\"hello \\{world\\}\"").unwrap();
350        assert_eq!(parsed.interpolation_mode, Some(InterpolationMode::Braces));
351        // The value should contain `\{` and `\}` so the interpolation parser
352        // can distinguish them from real interpolation delimiters.
353        assert_eq!(parsed.value, "hello \\{world\\}");
354    }
355
356    #[test]
357    fn plain_string_backslash_brace_decodes_to_literal() {
358        // In a plain (non-interpolated) string, \{ should still decode to {
359        let parsed = parse_string_literal_with_kind("\"hello \\{world\\}\"").unwrap();
360        assert_eq!(parsed.interpolation_mode, None);
361        assert_eq!(parsed.value, "hello {world}");
362    }
363}