Skip to main content

shape_ast/parser/
string_literals.rs

1//! String literal decoding helpers.
2//!
3//! Supports:
4//! - simple strings: `"text"`
5//! - triple strings: `"""multiline"""`
6
7use crate::ast::InterpolationMode;
8use crate::error::{Result, ShapeError};
9
10#[derive(Debug, Clone, PartialEq, Eq)]
11pub struct ParsedStringLiteral {
12    pub value: String,
13    pub interpolation_mode: Option<InterpolationMode>,
14    /// `true` when the source used a `c` prefix (content string).
15    pub is_content: bool,
16}
17
18/// Decode a parsed string literal (including surrounding quotes) into its runtime content.
19pub fn parse_string_literal(raw: &str) -> Result<String> {
20    Ok(parse_string_literal_with_kind(raw)?.value)
21}
22
23/// Decode a parsed string literal and report whether it used the `f` or `c` prefix.
24pub fn parse_string_literal_with_kind(raw: &str) -> Result<ParsedStringLiteral> {
25    let (interpolation_mode, is_content, unprefixed) = strip_interpolation_prefix(raw);
26    let value = if is_triple_quoted(unprefixed) {
27        parse_triple_quoted(unprefixed)
28    } else if is_simple_quoted(unprefixed) {
29        parse_simple_quoted(&unprefixed[1..unprefixed.len() - 1])?
30    } else {
31        unprefixed.to_string()
32    };
33    Ok(ParsedStringLiteral {
34        value,
35        interpolation_mode,
36        is_content,
37    })
38}
39
40/// Strip `f`/`f$`/`f#`/`c`/`c$`/`c#` prefix and return (mode, is_content, rest).
41fn strip_interpolation_prefix(raw: &str) -> (Option<InterpolationMode>, bool, &str) {
42    // Try f-string prefixes first (higher priority)
43    if raw.starts_with("f$") && raw.get(2..).is_some_and(|rest| rest.starts_with('"')) {
44        (Some(InterpolationMode::Dollar), false, &raw[2..])
45    } else if raw.starts_with("f#") && raw.get(2..).is_some_and(|rest| rest.starts_with('"')) {
46        (Some(InterpolationMode::Hash), false, &raw[2..])
47    } else if raw.starts_with('f') && raw.get(1..).is_some_and(|rest| rest.starts_with('"')) {
48        (Some(InterpolationMode::Braces), false, &raw[1..])
49    }
50    // Then c-string prefixes
51    else if raw.starts_with("c$") && raw.get(2..).is_some_and(|rest| rest.starts_with('"')) {
52        (Some(InterpolationMode::Dollar), true, &raw[2..])
53    } else if raw.starts_with("c#") && raw.get(2..).is_some_and(|rest| rest.starts_with('"')) {
54        (Some(InterpolationMode::Hash), true, &raw[2..])
55    } else if raw.starts_with('c') && raw.get(1..).is_some_and(|rest| rest.starts_with('"')) {
56        (Some(InterpolationMode::Braces), true, &raw[1..])
57    } else {
58        (None, false, raw)
59    }
60}
61
62fn is_simple_quoted(raw: &str) -> bool {
63    raw.len() >= 2 && raw.starts_with('"') && raw.ends_with('"')
64}
65
66fn is_triple_quoted(raw: &str) -> bool {
67    raw.len() >= 6 && raw.starts_with("\"\"\"") && raw.ends_with("\"\"\"")
68}
69
70fn parse_triple_quoted(raw: &str) -> String {
71    // Normalize line endings first so trimming rules are deterministic.
72    let normalized = raw[3..raw.len() - 3].replace("\r\n", "\n");
73    let mut lines: Vec<&str> = normalized.split('\n').collect();
74
75    // Ignore delimiter-adjacent blank lines when they only contain whitespace.
76    if lines.first().is_some_and(|line| line.trim().is_empty()) {
77        lines.remove(0);
78    }
79    if lines.last().is_some_and(|line| line.trim().is_empty()) {
80        lines.pop();
81    }
82
83    let common_indent = lines
84        .iter()
85        .filter(|line| !line.trim().is_empty())
86        .map(|line| leading_indent(line))
87        .min()
88        .unwrap_or(0);
89
90    lines
91        .into_iter()
92        .map(|line| {
93            if line.trim().is_empty() {
94                String::new()
95            } else {
96                line.chars().skip(common_indent).collect()
97            }
98        })
99        .collect::<Vec<String>>()
100        .join("\n")
101}
102
103fn parse_simple_quoted(inner: &str) -> Result<String> {
104    let mut out = String::with_capacity(inner.len());
105    let mut chars = inner.chars();
106
107    while let Some(ch) = chars.next() {
108        if ch != '\\' {
109            out.push(ch);
110            continue;
111        }
112
113        let Some(escaped) = chars.next() else {
114            out.push('\\');
115            break;
116        };
117
118        match escaped {
119            'n' => out.push('\n'),
120            't' => out.push('\t'),
121            'r' => out.push('\r'),
122            '0' => out.push('\0'),
123            '\\' => out.push('\\'),
124            '"' => out.push('"'),
125            '\'' => out.push('\''),
126            '{' => out.push('{'),
127            '}' => out.push('}'),
128            '$' => out.push('$'),
129            '#' => out.push('#'),
130            other => {
131                return Err(ShapeError::ParseError {
132                    message: format!(
133                        "unknown escape sequence '\\{}', expected one of: \\n, \\t, \\r, \\\\, \\\", \\', \\0, \\{{, \\}}, \\$, \\#",
134                        other
135                    ),
136                    location: None,
137                });
138            }
139        }
140    }
141
142    Ok(out)
143}
144
145fn leading_indent(line: &str) -> usize {
146    line.chars()
147        .take_while(|ch| *ch == ' ' || *ch == '\t')
148        .count()
149}
150
151#[cfg(test)]
152mod tests {
153    use super::{parse_string_literal, parse_string_literal_with_kind};
154    use crate::ast::InterpolationMode;
155
156    #[test]
157    fn simple_string_is_unwrapped() {
158        assert_eq!(parse_string_literal("\"hello\"").unwrap(), "hello");
159    }
160
161    #[test]
162    fn triple_string_trims_delimiter_blank_lines_and_dedent() {
163        let raw = "\"\"\"\n        this\n        is\n        a\n        multiline\n        \"\"\"";
164        assert_eq!(parse_string_literal(raw).unwrap(), "this\nis\na\nmultiline");
165    }
166
167    #[test]
168    fn triple_string_preserves_relative_indentation() {
169        let raw =
170            "\"\"\"\n            root\n              nested\n            end\n            \"\"\"";
171        assert_eq!(parse_string_literal(raw).unwrap(), "root\n  nested\nend");
172    }
173
174    #[test]
175    fn triple_string_keeps_inline_form() {
176        let raw = "\"\"\"a\n  b\"\"\"";
177        assert_eq!(parse_string_literal(raw).unwrap(), "a\n  b");
178    }
179
180    #[test]
181    fn formatted_simple_string_sets_formatted_flag() {
182        let parsed = parse_string_literal_with_kind("f\"value: {x}\"").unwrap();
183        assert_eq!(parsed.interpolation_mode, Some(InterpolationMode::Braces));
184        assert_eq!(parsed.value, "value: {x}");
185    }
186
187    #[test]
188    fn formatted_triple_string_sets_formatted_flag() {
189        let parsed = parse_string_literal_with_kind("f\"\"\"\n  x\n\"\"\"").unwrap();
190        assert_eq!(parsed.interpolation_mode, Some(InterpolationMode::Braces));
191        assert_eq!(parsed.value, "x");
192    }
193
194    #[test]
195    fn formatted_triple_string_preserves_relative_indentation() {
196        let parsed = parse_string_literal_with_kind(
197            "f\"\"\"\n            value:\n              {33+1}\n            \"\"\"",
198        )
199        .unwrap();
200        assert_eq!(parsed.interpolation_mode, Some(InterpolationMode::Braces));
201        assert_eq!(parsed.value, "value:\n  {33+1}");
202    }
203
204    #[test]
205    fn formatted_dollar_prefix_sets_mode() {
206        let parsed = parse_string_literal_with_kind("f$\"value: ${x}\"").unwrap();
207        assert_eq!(parsed.interpolation_mode, Some(InterpolationMode::Dollar));
208        assert_eq!(parsed.value, "value: ${x}");
209    }
210
211    #[test]
212    fn formatted_hash_prefix_sets_mode() {
213        let parsed = parse_string_literal_with_kind("f#\"value: #{x}\"").unwrap();
214        assert_eq!(parsed.interpolation_mode, Some(InterpolationMode::Hash));
215        assert_eq!(parsed.value, "value: #{x}");
216    }
217
218    #[test]
219    fn simple_string_decodes_common_escapes() {
220        let parsed = parse_string_literal_with_kind("\"a\\n\\t\\\"b\\\\c\"").unwrap();
221        assert_eq!(parsed.interpolation_mode, None);
222        assert_eq!(parsed.value, "a\n\t\"b\\c");
223    }
224
225    // --- User-specified multiline triple-string behavior ---
226
227    #[test]
228    fn triple_string_multiline_with_relative_indent() {
229        let raw = "\"\"\"\n            this is\n            a multiline\n            string.\n              -it should indent\n              -but remove the block spaces\n            \"\"\"";
230        assert_eq!(
231            parse_string_literal(raw).unwrap(),
232            "this is\na multiline\nstring.\n  -it should indent\n  -but remove the block spaces"
233        );
234    }
235
236    #[test]
237    fn triple_string_inline_with_inner_quotes() {
238        let raw = "\"\"\"a string with quotes\"\"\"";
239        assert_eq!(parse_string_literal(raw).unwrap(), "a string with quotes");
240    }
241
242    #[test]
243    fn triple_string_inline_with_single_inner_quote() {
244        let raw = "\"\"\"she said \"hello\" today\"\"\"";
245        assert_eq!(
246            parse_string_literal(raw).unwrap(),
247            "she said \"hello\" today"
248        );
249    }
250
251    #[test]
252    fn triple_string_no_leading_trailing_newline() {
253        let raw = "\"\"\"\n  hello world\n  \"\"\"";
254        let result = parse_string_literal(raw).unwrap();
255        assert!(
256            !result.starts_with('\n'),
257            "should not start with newline, got: {:?}",
258            result
259        );
260        assert!(
261            !result.ends_with('\n'),
262            "should not end with newline, got: {:?}",
263            result
264        );
265        assert_eq!(result, "hello world");
266    }
267
268    #[test]
269    fn triple_string_empty_lines_preserved_in_middle() {
270        let raw = "\"\"\"\n    first\n\n    last\n    \"\"\"";
271        assert_eq!(parse_string_literal(raw).unwrap(), "first\n\nlast");
272    }
273
274    #[test]
275    fn triple_string_does_not_process_escape_sequences() {
276        let raw = "\"\"\"\n    line with \\n in it\n    \"\"\"";
277        let result = parse_string_literal(raw).unwrap();
278        assert_eq!(result, "line with \\n in it");
279    }
280
281    #[test]
282    fn simple_string_escape_newline() {
283        assert_eq!(
284            parse_string_literal("\"hello\\nworld\"").unwrap(),
285            "hello\nworld"
286        );
287    }
288
289    #[test]
290    fn simple_string_escape_tab() {
291        assert_eq!(
292            parse_string_literal("\"col1\\tcol2\"").unwrap(),
293            "col1\tcol2"
294        );
295    }
296
297    #[test]
298    fn simple_string_escape_backslash() {
299        assert_eq!(
300            parse_string_literal("\"path\\\\file\"").unwrap(),
301            "path\\file"
302        );
303    }
304
305    #[test]
306    fn simple_string_escape_quote() {
307        assert_eq!(
308            parse_string_literal("\"say \\\"hi\\\"\"").unwrap(),
309            "say \"hi\""
310        );
311    }
312
313    #[test]
314    fn simple_string_unknown_escape_is_error() {
315        // BUG-12: Unknown escape sequences must produce an error
316        let result = parse_string_literal("\"hello\\q\"");
317        assert!(result.is_err(), "expected error for unknown escape \\q");
318        let err_msg = result.unwrap_err().to_string();
319        assert!(
320            err_msg.contains("unknown escape sequence"),
321            "error should mention 'unknown escape sequence', got: {}",
322            err_msg
323        );
324        assert!(
325            err_msg.contains("\\q"),
326            "error should mention the bad escape \\q, got: {}",
327            err_msg
328        );
329    }
330
331    #[test]
332    fn simple_string_unknown_escape_x_is_error() {
333        // \x is not a supported escape sequence (no hex escape support yet)
334        let result = parse_string_literal("\"\\x41\"");
335        assert!(result.is_err(), "expected error for unsupported \\x escape");
336    }
337
338    #[test]
339    fn simple_string_escape_null() {
340        // \0 should produce a null byte
341        assert_eq!(parse_string_literal("\"a\\0b\"").unwrap(), "a\0b");
342    }
343
344    // --- Content string (c-prefix) tests ---
345
346    #[test]
347    fn content_simple_string_sets_content_flag() {
348        let parsed = parse_string_literal_with_kind("c\"hello {x}\"").unwrap();
349        assert_eq!(parsed.interpolation_mode, Some(InterpolationMode::Braces));
350        assert!(parsed.is_content);
351        assert_eq!(parsed.value, "hello {x}");
352    }
353
354    #[test]
355    fn content_dollar_prefix_sets_mode_and_content() {
356        let parsed = parse_string_literal_with_kind("c$\"value: ${x}\"").unwrap();
357        assert_eq!(parsed.interpolation_mode, Some(InterpolationMode::Dollar));
358        assert!(parsed.is_content);
359        assert_eq!(parsed.value, "value: ${x}");
360    }
361
362    #[test]
363    fn content_hash_prefix_sets_mode_and_content() {
364        let parsed = parse_string_literal_with_kind("c#\"value: #{x}\"").unwrap();
365        assert_eq!(parsed.interpolation_mode, Some(InterpolationMode::Hash));
366        assert!(parsed.is_content);
367        assert_eq!(parsed.value, "value: #{x}");
368    }
369
370    #[test]
371    fn content_triple_string_sets_content_flag() {
372        let parsed = parse_string_literal_with_kind("c\"\"\"\n  row: {x}\n\"\"\"").unwrap();
373        assert_eq!(parsed.interpolation_mode, Some(InterpolationMode::Braces));
374        assert!(parsed.is_content);
375        assert_eq!(parsed.value, "row: {x}");
376    }
377
378    #[test]
379    fn formatted_string_is_not_content() {
380        let parsed = parse_string_literal_with_kind("f\"value: {x}\"").unwrap();
381        assert_eq!(parsed.interpolation_mode, Some(InterpolationMode::Braces));
382        assert!(!parsed.is_content);
383    }
384
385    #[test]
386    fn plain_string_is_not_content() {
387        let parsed = parse_string_literal_with_kind("\"plain\"").unwrap();
388        assert_eq!(parsed.interpolation_mode, None);
389        assert!(!parsed.is_content);
390    }
391}