Skip to main content

shape_ast/parser/
string_literals.rs

1//! String literal decoding helpers.
2//!
3//! Supports:
4//! - simple strings: `"text"`
5//! - triple strings: `"""multiline"""`
6
7use crate::ast::InterpolationMode;
8use crate::error::{Result, ShapeError};
9
10#[derive(Debug, Clone, PartialEq, Eq)]
11pub struct ParsedStringLiteral {
12    pub value: String,
13    pub interpolation_mode: Option<InterpolationMode>,
14    /// `true` when the source used a `c` prefix (content string).
15    pub is_content: bool,
16}
17
18/// Decode a parsed string literal (including surrounding quotes) into its runtime content.
19pub fn parse_string_literal(raw: &str) -> Result<String> {
20    Ok(parse_string_literal_with_kind(raw)?.value)
21}
22
23/// Decode a parsed string literal and report whether it used the `f` or `c` prefix.
24pub fn parse_string_literal_with_kind(raw: &str) -> Result<ParsedStringLiteral> {
25    let (interpolation_mode, is_content, unprefixed) = strip_interpolation_prefix(raw);
26    let is_interpolated = interpolation_mode.is_some();
27    let value = if is_triple_quoted(unprefixed) {
28        parse_triple_quoted(unprefixed)
29    } else if is_simple_quoted(unprefixed) {
30        parse_simple_quoted(&unprefixed[1..unprefixed.len() - 1], is_interpolated)?
31    } else {
32        unprefixed.to_string()
33    };
34    Ok(ParsedStringLiteral {
35        value,
36        interpolation_mode,
37        is_content,
38    })
39}
40
41/// Strip `f`/`f$`/`f#`/`c`/`c$`/`c#` prefix and return (mode, is_content, rest).
42fn strip_interpolation_prefix(raw: &str) -> (Option<InterpolationMode>, bool, &str) {
43    // Try f-string prefixes first (higher priority)
44    if raw.starts_with("f$") && raw.get(2..).is_some_and(|rest| rest.starts_with('"')) {
45        (Some(InterpolationMode::Dollar), false, &raw[2..])
46    } else if raw.starts_with("f#") && raw.get(2..).is_some_and(|rest| rest.starts_with('"')) {
47        (Some(InterpolationMode::Hash), false, &raw[2..])
48    } else if raw.starts_with('f') && raw.get(1..).is_some_and(|rest| rest.starts_with('"')) {
49        (Some(InterpolationMode::Braces), false, &raw[1..])
50    }
51    // Then c-string prefixes
52    else if raw.starts_with("c$") && raw.get(2..).is_some_and(|rest| rest.starts_with('"')) {
53        (Some(InterpolationMode::Dollar), true, &raw[2..])
54    } else if raw.starts_with("c#") && raw.get(2..).is_some_and(|rest| rest.starts_with('"')) {
55        (Some(InterpolationMode::Hash), true, &raw[2..])
56    } else if raw.starts_with('c') && raw.get(1..).is_some_and(|rest| rest.starts_with('"')) {
57        (Some(InterpolationMode::Braces), true, &raw[1..])
58    } else {
59        (None, false, raw)
60    }
61}
62
63fn is_simple_quoted(raw: &str) -> bool {
64    raw.len() >= 2 && raw.starts_with('"') && raw.ends_with('"')
65}
66
67fn is_triple_quoted(raw: &str) -> bool {
68    raw.len() >= 6 && raw.starts_with("\"\"\"") && raw.ends_with("\"\"\"")
69}
70
71fn parse_triple_quoted(raw: &str) -> String {
72    // Normalize line endings first so trimming rules are deterministic.
73    let normalized = raw[3..raw.len() - 3].replace("\r\n", "\n");
74    let mut lines: Vec<&str> = normalized.split('\n').collect();
75
76    // Ignore delimiter-adjacent blank lines when they only contain whitespace.
77    if lines.first().is_some_and(|line| line.trim().is_empty()) {
78        lines.remove(0);
79    }
80    if lines.last().is_some_and(|line| line.trim().is_empty()) {
81        lines.pop();
82    }
83
84    let common_indent = lines
85        .iter()
86        .filter(|line| !line.trim().is_empty())
87        .map(|line| leading_indent(line))
88        .min()
89        .unwrap_or(0);
90
91    lines
92        .into_iter()
93        .map(|line| {
94            if line.trim().is_empty() {
95                String::new()
96            } else {
97                line.chars().skip(common_indent).collect()
98            }
99        })
100        .collect::<Vec<String>>()
101        .join("\n")
102}
103
104/// Decode escape sequences in a simple quoted string.
105///
106/// When `preserve_brace_escapes` is true (for f-strings / c-strings), `\{` and
107/// `\}` are kept as-is so the downstream interpolation parser can treat them as
108/// literal brace escapes rather than interpolation delimiters.
109fn parse_simple_quoted(inner: &str, preserve_brace_escapes: bool) -> Result<String> {
110    let mut out = String::with_capacity(inner.len());
111    let mut chars = inner.chars();
112
113    while let Some(ch) = chars.next() {
114        if ch != '\\' {
115            out.push(ch);
116            continue;
117        }
118
119        let Some(escaped) = chars.next() else {
120            out.push('\\');
121            break;
122        };
123
124        match escaped {
125            'n' => out.push('\n'),
126            't' => out.push('\t'),
127            'r' => out.push('\r'),
128            '0' => out.push('\0'),
129            '\\' => out.push('\\'),
130            '"' => out.push('"'),
131            '\'' => out.push('\''),
132            '{' | '}' | '$' | '#' if preserve_brace_escapes => {
133                // Keep `\{`, `\}`, `\$`, `\#` verbatim for the interpolation parser
134                out.push('\\');
135                out.push(escaped);
136            }
137            '{' => out.push('{'),
138            '}' => out.push('}'),
139            '$' => out.push('$'),
140            '#' => out.push('#'),
141            other => {
142                return Err(ShapeError::ParseError {
143                    message: format!(
144                        "unknown escape sequence '\\{}', expected one of: \\n, \\t, \\r, \\\\, \\\", \\', \\0, \\{{, \\}}, \\$, \\#",
145                        other
146                    ),
147                    location: None,
148                });
149            }
150        }
151    }
152
153    Ok(out)
154}
155
156fn leading_indent(line: &str) -> usize {
157    line.chars()
158        .take_while(|ch| *ch == ' ' || *ch == '\t')
159        .count()
160}
161
162#[cfg(test)]
163mod tests {
164    use super::{parse_string_literal, parse_string_literal_with_kind};
165    use crate::ast::InterpolationMode;
166
167    #[test]
168    fn simple_string_is_unwrapped() {
169        assert_eq!(parse_string_literal("\"hello\"").unwrap(), "hello");
170    }
171
172    #[test]
173    fn triple_string_trims_delimiter_blank_lines_and_dedent() {
174        let raw = "\"\"\"\n        this\n        is\n        a\n        multiline\n        \"\"\"";
175        assert_eq!(parse_string_literal(raw).unwrap(), "this\nis\na\nmultiline");
176    }
177
178    #[test]
179    fn triple_string_preserves_relative_indentation() {
180        let raw =
181            "\"\"\"\n            root\n              nested\n            end\n            \"\"\"";
182        assert_eq!(parse_string_literal(raw).unwrap(), "root\n  nested\nend");
183    }
184
185    #[test]
186    fn triple_string_keeps_inline_form() {
187        let raw = "\"\"\"a\n  b\"\"\"";
188        assert_eq!(parse_string_literal(raw).unwrap(), "a\n  b");
189    }
190
191    #[test]
192    fn formatted_simple_string_sets_formatted_flag() {
193        let parsed = parse_string_literal_with_kind("f\"value: {x}\"").unwrap();
194        assert_eq!(parsed.interpolation_mode, Some(InterpolationMode::Braces));
195        assert_eq!(parsed.value, "value: {x}");
196    }
197
198    #[test]
199    fn formatted_triple_string_sets_formatted_flag() {
200        let parsed = parse_string_literal_with_kind("f\"\"\"\n  x\n\"\"\"").unwrap();
201        assert_eq!(parsed.interpolation_mode, Some(InterpolationMode::Braces));
202        assert_eq!(parsed.value, "x");
203    }
204
205    #[test]
206    fn formatted_triple_string_preserves_relative_indentation() {
207        let parsed = parse_string_literal_with_kind(
208            "f\"\"\"\n            value:\n              {33+1}\n            \"\"\"",
209        )
210        .unwrap();
211        assert_eq!(parsed.interpolation_mode, Some(InterpolationMode::Braces));
212        assert_eq!(parsed.value, "value:\n  {33+1}");
213    }
214
215    #[test]
216    fn formatted_dollar_prefix_sets_mode() {
217        let parsed = parse_string_literal_with_kind("f$\"value: ${x}\"").unwrap();
218        assert_eq!(parsed.interpolation_mode, Some(InterpolationMode::Dollar));
219        assert_eq!(parsed.value, "value: ${x}");
220    }
221
222    #[test]
223    fn formatted_hash_prefix_sets_mode() {
224        let parsed = parse_string_literal_with_kind("f#\"value: #{x}\"").unwrap();
225        assert_eq!(parsed.interpolation_mode, Some(InterpolationMode::Hash));
226        assert_eq!(parsed.value, "value: #{x}");
227    }
228
229    #[test]
230    fn simple_string_decodes_common_escapes() {
231        let parsed = parse_string_literal_with_kind("\"a\\n\\t\\\"b\\\\c\"").unwrap();
232        assert_eq!(parsed.interpolation_mode, None);
233        assert_eq!(parsed.value, "a\n\t\"b\\c");
234    }
235
236    // --- User-specified multiline triple-string behavior ---
237
238    #[test]
239    fn triple_string_multiline_with_relative_indent() {
240        let raw = "\"\"\"\n            this is\n            a multiline\n            string.\n              -it should indent\n              -but remove the block spaces\n            \"\"\"";
241        assert_eq!(
242            parse_string_literal(raw).unwrap(),
243            "this is\na multiline\nstring.\n  -it should indent\n  -but remove the block spaces"
244        );
245    }
246
247    #[test]
248    fn triple_string_inline_with_inner_quotes() {
249        let raw = "\"\"\"a string with quotes\"\"\"";
250        assert_eq!(parse_string_literal(raw).unwrap(), "a string with quotes");
251    }
252
253    #[test]
254    fn triple_string_inline_with_single_inner_quote() {
255        let raw = "\"\"\"she said \"hello\" today\"\"\"";
256        assert_eq!(
257            parse_string_literal(raw).unwrap(),
258            "she said \"hello\" today"
259        );
260    }
261
262    #[test]
263    fn triple_string_no_leading_trailing_newline() {
264        let raw = "\"\"\"\n  hello world\n  \"\"\"";
265        let result = parse_string_literal(raw).unwrap();
266        assert!(
267            !result.starts_with('\n'),
268            "should not start with newline, got: {:?}",
269            result
270        );
271        assert!(
272            !result.ends_with('\n'),
273            "should not end with newline, got: {:?}",
274            result
275        );
276        assert_eq!(result, "hello world");
277    }
278
279    #[test]
280    fn triple_string_empty_lines_preserved_in_middle() {
281        let raw = "\"\"\"\n    first\n\n    last\n    \"\"\"";
282        assert_eq!(parse_string_literal(raw).unwrap(), "first\n\nlast");
283    }
284
285    #[test]
286    fn triple_string_does_not_process_escape_sequences() {
287        let raw = "\"\"\"\n    line with \\n in it\n    \"\"\"";
288        let result = parse_string_literal(raw).unwrap();
289        assert_eq!(result, "line with \\n in it");
290    }
291
292    #[test]
293    fn simple_string_escape_newline() {
294        assert_eq!(
295            parse_string_literal("\"hello\\nworld\"").unwrap(),
296            "hello\nworld"
297        );
298    }
299
300    #[test]
301    fn simple_string_escape_tab() {
302        assert_eq!(
303            parse_string_literal("\"col1\\tcol2\"").unwrap(),
304            "col1\tcol2"
305        );
306    }
307
308    #[test]
309    fn simple_string_escape_backslash() {
310        assert_eq!(
311            parse_string_literal("\"path\\\\file\"").unwrap(),
312            "path\\file"
313        );
314    }
315
316    #[test]
317    fn simple_string_escape_quote() {
318        assert_eq!(
319            parse_string_literal("\"say \\\"hi\\\"\"").unwrap(),
320            "say \"hi\""
321        );
322    }
323
324    #[test]
325    fn simple_string_unknown_escape_is_error() {
326        // BUG-12: Unknown escape sequences must produce an error
327        let result = parse_string_literal("\"hello\\q\"");
328        assert!(result.is_err(), "expected error for unknown escape \\q");
329        let err_msg = result.unwrap_err().to_string();
330        assert!(
331            err_msg.contains("unknown escape sequence"),
332            "error should mention 'unknown escape sequence', got: {}",
333            err_msg
334        );
335        assert!(
336            err_msg.contains("\\q"),
337            "error should mention the bad escape \\q, got: {}",
338            err_msg
339        );
340    }
341
342    #[test]
343    fn simple_string_unknown_escape_x_is_error() {
344        // \x is not a supported escape sequence (no hex escape support yet)
345        let result = parse_string_literal("\"\\x41\"");
346        assert!(result.is_err(), "expected error for unsupported \\x escape");
347    }
348
349    #[test]
350    fn simple_string_escape_null() {
351        // \0 should produce a null byte
352        assert_eq!(parse_string_literal("\"a\\0b\"").unwrap(), "a\0b");
353    }
354
355    // --- Content string (c-prefix) tests ---
356
357    #[test]
358    fn content_simple_string_sets_content_flag() {
359        let parsed = parse_string_literal_with_kind("c\"hello {x}\"").unwrap();
360        assert_eq!(parsed.interpolation_mode, Some(InterpolationMode::Braces));
361        assert!(parsed.is_content);
362        assert_eq!(parsed.value, "hello {x}");
363    }
364
365    #[test]
366    fn content_dollar_prefix_sets_mode_and_content() {
367        let parsed = parse_string_literal_with_kind("c$\"value: ${x}\"").unwrap();
368        assert_eq!(parsed.interpolation_mode, Some(InterpolationMode::Dollar));
369        assert!(parsed.is_content);
370        assert_eq!(parsed.value, "value: ${x}");
371    }
372
373    #[test]
374    fn content_hash_prefix_sets_mode_and_content() {
375        let parsed = parse_string_literal_with_kind("c#\"value: #{x}\"").unwrap();
376        assert_eq!(parsed.interpolation_mode, Some(InterpolationMode::Hash));
377        assert!(parsed.is_content);
378        assert_eq!(parsed.value, "value: #{x}");
379    }
380
381    #[test]
382    fn content_triple_string_sets_content_flag() {
383        let parsed = parse_string_literal_with_kind("c\"\"\"\n  row: {x}\n\"\"\"").unwrap();
384        assert_eq!(parsed.interpolation_mode, Some(InterpolationMode::Braces));
385        assert!(parsed.is_content);
386        assert_eq!(parsed.value, "row: {x}");
387    }
388
389    #[test]
390    fn formatted_string_is_not_content() {
391        let parsed = parse_string_literal_with_kind("f\"value: {x}\"").unwrap();
392        assert_eq!(parsed.interpolation_mode, Some(InterpolationMode::Braces));
393        assert!(!parsed.is_content);
394    }
395
396    #[test]
397    fn plain_string_is_not_content() {
398        let parsed = parse_string_literal_with_kind("\"plain\"").unwrap();
399        assert_eq!(parsed.interpolation_mode, None);
400        assert!(!parsed.is_content);
401    }
402
403    // --- LOW-2: f-string backslash-escaped braces ---
404
405    #[test]
406    fn fstring_backslash_brace_preserves_literal_brace() {
407        // f"hello \{world\}" should produce value with preserved \{ and \}
408        // so the interpolation parser sees them as literal braces, not interpolation.
409        let parsed = parse_string_literal_with_kind("f\"hello \\{world\\}\"").unwrap();
410        assert_eq!(parsed.interpolation_mode, Some(InterpolationMode::Braces));
411        // The value should contain `\{` and `\}` so the interpolation parser
412        // can distinguish them from real interpolation delimiters.
413        assert_eq!(parsed.value, "hello \\{world\\}");
414    }
415
416    #[test]
417    fn plain_string_backslash_brace_decodes_to_literal() {
418        // In a plain (non-interpolated) string, \{ should still decode to {
419        let parsed = parse_string_literal_with_kind("\"hello \\{world\\}\"").unwrap();
420        assert_eq!(parsed.interpolation_mode, None);
421        assert_eq!(parsed.value, "hello {world}");
422    }
423}