1use crate::ast::InterpolationMode;
8use crate::error::{Result, ShapeError};
9
10#[derive(Debug, Clone, PartialEq, Eq)]
11pub struct ParsedStringLiteral {
12 pub value: String,
13 pub interpolation_mode: Option<InterpolationMode>,
14 pub is_content: bool,
16}
17
18pub fn parse_string_literal(raw: &str) -> Result<String> {
20 Ok(parse_string_literal_with_kind(raw)?.value)
21}
22
23pub fn parse_string_literal_with_kind(raw: &str) -> Result<ParsedStringLiteral> {
25 let (interpolation_mode, is_content, unprefixed) = strip_interpolation_prefix(raw);
26 let value = if is_triple_quoted(unprefixed) {
27 parse_triple_quoted(unprefixed)
28 } else if is_simple_quoted(unprefixed) {
29 parse_simple_quoted(&unprefixed[1..unprefixed.len() - 1])?
30 } else {
31 unprefixed.to_string()
32 };
33 Ok(ParsedStringLiteral {
34 value,
35 interpolation_mode,
36 is_content,
37 })
38}
39
40fn strip_interpolation_prefix(raw: &str) -> (Option<InterpolationMode>, bool, &str) {
42 if raw.starts_with("f$") && raw.get(2..).is_some_and(|rest| rest.starts_with('"')) {
44 (Some(InterpolationMode::Dollar), false, &raw[2..])
45 } else if raw.starts_with("f#") && raw.get(2..).is_some_and(|rest| rest.starts_with('"')) {
46 (Some(InterpolationMode::Hash), false, &raw[2..])
47 } else if raw.starts_with('f') && raw.get(1..).is_some_and(|rest| rest.starts_with('"')) {
48 (Some(InterpolationMode::Braces), false, &raw[1..])
49 }
50 else if raw.starts_with("c$") && raw.get(2..).is_some_and(|rest| rest.starts_with('"')) {
52 (Some(InterpolationMode::Dollar), true, &raw[2..])
53 } else if raw.starts_with("c#") && raw.get(2..).is_some_and(|rest| rest.starts_with('"')) {
54 (Some(InterpolationMode::Hash), true, &raw[2..])
55 } else if raw.starts_with('c') && raw.get(1..).is_some_and(|rest| rest.starts_with('"')) {
56 (Some(InterpolationMode::Braces), true, &raw[1..])
57 } else {
58 (None, false, raw)
59 }
60}
61
62fn is_simple_quoted(raw: &str) -> bool {
63 raw.len() >= 2 && raw.starts_with('"') && raw.ends_with('"')
64}
65
66fn is_triple_quoted(raw: &str) -> bool {
67 raw.len() >= 6 && raw.starts_with("\"\"\"") && raw.ends_with("\"\"\"")
68}
69
70fn parse_triple_quoted(raw: &str) -> String {
71 let normalized = raw[3..raw.len() - 3].replace("\r\n", "\n");
73 let mut lines: Vec<&str> = normalized.split('\n').collect();
74
75 if lines.first().is_some_and(|line| line.trim().is_empty()) {
77 lines.remove(0);
78 }
79 if lines.last().is_some_and(|line| line.trim().is_empty()) {
80 lines.pop();
81 }
82
83 let common_indent = lines
84 .iter()
85 .filter(|line| !line.trim().is_empty())
86 .map(|line| leading_indent(line))
87 .min()
88 .unwrap_or(0);
89
90 lines
91 .into_iter()
92 .map(|line| {
93 if line.trim().is_empty() {
94 String::new()
95 } else {
96 line.chars().skip(common_indent).collect()
97 }
98 })
99 .collect::<Vec<String>>()
100 .join("\n")
101}
102
103fn parse_simple_quoted(inner: &str) -> Result<String> {
104 let mut out = String::with_capacity(inner.len());
105 let mut chars = inner.chars();
106
107 while let Some(ch) = chars.next() {
108 if ch != '\\' {
109 out.push(ch);
110 continue;
111 }
112
113 let Some(escaped) = chars.next() else {
114 out.push('\\');
115 break;
116 };
117
118 match escaped {
119 'n' => out.push('\n'),
120 't' => out.push('\t'),
121 'r' => out.push('\r'),
122 '0' => out.push('\0'),
123 '\\' => out.push('\\'),
124 '"' => out.push('"'),
125 '\'' => out.push('\''),
126 '{' => out.push('{'),
127 '}' => out.push('}'),
128 '$' => out.push('$'),
129 '#' => out.push('#'),
130 other => {
131 return Err(ShapeError::ParseError {
132 message: format!(
133 "unknown escape sequence '\\{}', expected one of: \\n, \\t, \\r, \\\\, \\\", \\', \\0, \\{{, \\}}, \\$, \\#",
134 other
135 ),
136 location: None,
137 });
138 }
139 }
140 }
141
142 Ok(out)
143}
144
145fn leading_indent(line: &str) -> usize {
146 line.chars()
147 .take_while(|ch| *ch == ' ' || *ch == '\t')
148 .count()
149}
150
151#[cfg(test)]
152mod tests {
153 use super::{parse_string_literal, parse_string_literal_with_kind};
154 use crate::ast::InterpolationMode;
155
156 #[test]
157 fn simple_string_is_unwrapped() {
158 assert_eq!(parse_string_literal("\"hello\"").unwrap(), "hello");
159 }
160
161 #[test]
162 fn triple_string_trims_delimiter_blank_lines_and_dedent() {
163 let raw = "\"\"\"\n this\n is\n a\n multiline\n \"\"\"";
164 assert_eq!(parse_string_literal(raw).unwrap(), "this\nis\na\nmultiline");
165 }
166
167 #[test]
168 fn triple_string_preserves_relative_indentation() {
169 let raw =
170 "\"\"\"\n root\n nested\n end\n \"\"\"";
171 assert_eq!(parse_string_literal(raw).unwrap(), "root\n nested\nend");
172 }
173
174 #[test]
175 fn triple_string_keeps_inline_form() {
176 let raw = "\"\"\"a\n b\"\"\"";
177 assert_eq!(parse_string_literal(raw).unwrap(), "a\n b");
178 }
179
180 #[test]
181 fn formatted_simple_string_sets_formatted_flag() {
182 let parsed = parse_string_literal_with_kind("f\"value: {x}\"").unwrap();
183 assert_eq!(parsed.interpolation_mode, Some(InterpolationMode::Braces));
184 assert_eq!(parsed.value, "value: {x}");
185 }
186
187 #[test]
188 fn formatted_triple_string_sets_formatted_flag() {
189 let parsed = parse_string_literal_with_kind("f\"\"\"\n x\n\"\"\"").unwrap();
190 assert_eq!(parsed.interpolation_mode, Some(InterpolationMode::Braces));
191 assert_eq!(parsed.value, "x");
192 }
193
194 #[test]
195 fn formatted_triple_string_preserves_relative_indentation() {
196 let parsed = parse_string_literal_with_kind(
197 "f\"\"\"\n value:\n {33+1}\n \"\"\"",
198 )
199 .unwrap();
200 assert_eq!(parsed.interpolation_mode, Some(InterpolationMode::Braces));
201 assert_eq!(parsed.value, "value:\n {33+1}");
202 }
203
204 #[test]
205 fn formatted_dollar_prefix_sets_mode() {
206 let parsed = parse_string_literal_with_kind("f$\"value: ${x}\"").unwrap();
207 assert_eq!(parsed.interpolation_mode, Some(InterpolationMode::Dollar));
208 assert_eq!(parsed.value, "value: ${x}");
209 }
210
211 #[test]
212 fn formatted_hash_prefix_sets_mode() {
213 let parsed = parse_string_literal_with_kind("f#\"value: #{x}\"").unwrap();
214 assert_eq!(parsed.interpolation_mode, Some(InterpolationMode::Hash));
215 assert_eq!(parsed.value, "value: #{x}");
216 }
217
218 #[test]
219 fn simple_string_decodes_common_escapes() {
220 let parsed = parse_string_literal_with_kind("\"a\\n\\t\\\"b\\\\c\"").unwrap();
221 assert_eq!(parsed.interpolation_mode, None);
222 assert_eq!(parsed.value, "a\n\t\"b\\c");
223 }
224
225 #[test]
228 fn triple_string_multiline_with_relative_indent() {
229 let raw = "\"\"\"\n this is\n a multiline\n string.\n -it should indent\n -but remove the block spaces\n \"\"\"";
230 assert_eq!(
231 parse_string_literal(raw).unwrap(),
232 "this is\na multiline\nstring.\n -it should indent\n -but remove the block spaces"
233 );
234 }
235
236 #[test]
237 fn triple_string_inline_with_inner_quotes() {
238 let raw = "\"\"\"a string with quotes\"\"\"";
239 assert_eq!(parse_string_literal(raw).unwrap(), "a string with quotes");
240 }
241
242 #[test]
243 fn triple_string_inline_with_single_inner_quote() {
244 let raw = "\"\"\"she said \"hello\" today\"\"\"";
245 assert_eq!(
246 parse_string_literal(raw).unwrap(),
247 "she said \"hello\" today"
248 );
249 }
250
251 #[test]
252 fn triple_string_no_leading_trailing_newline() {
253 let raw = "\"\"\"\n hello world\n \"\"\"";
254 let result = parse_string_literal(raw).unwrap();
255 assert!(
256 !result.starts_with('\n'),
257 "should not start with newline, got: {:?}",
258 result
259 );
260 assert!(
261 !result.ends_with('\n'),
262 "should not end with newline, got: {:?}",
263 result
264 );
265 assert_eq!(result, "hello world");
266 }
267
268 #[test]
269 fn triple_string_empty_lines_preserved_in_middle() {
270 let raw = "\"\"\"\n first\n\n last\n \"\"\"";
271 assert_eq!(parse_string_literal(raw).unwrap(), "first\n\nlast");
272 }
273
274 #[test]
275 fn triple_string_does_not_process_escape_sequences() {
276 let raw = "\"\"\"\n line with \\n in it\n \"\"\"";
277 let result = parse_string_literal(raw).unwrap();
278 assert_eq!(result, "line with \\n in it");
279 }
280
281 #[test]
282 fn simple_string_escape_newline() {
283 assert_eq!(
284 parse_string_literal("\"hello\\nworld\"").unwrap(),
285 "hello\nworld"
286 );
287 }
288
289 #[test]
290 fn simple_string_escape_tab() {
291 assert_eq!(
292 parse_string_literal("\"col1\\tcol2\"").unwrap(),
293 "col1\tcol2"
294 );
295 }
296
297 #[test]
298 fn simple_string_escape_backslash() {
299 assert_eq!(
300 parse_string_literal("\"path\\\\file\"").unwrap(),
301 "path\\file"
302 );
303 }
304
305 #[test]
306 fn simple_string_escape_quote() {
307 assert_eq!(
308 parse_string_literal("\"say \\\"hi\\\"\"").unwrap(),
309 "say \"hi\""
310 );
311 }
312
313 #[test]
314 fn simple_string_unknown_escape_is_error() {
315 let result = parse_string_literal("\"hello\\q\"");
317 assert!(result.is_err(), "expected error for unknown escape \\q");
318 let err_msg = result.unwrap_err().to_string();
319 assert!(
320 err_msg.contains("unknown escape sequence"),
321 "error should mention 'unknown escape sequence', got: {}",
322 err_msg
323 );
324 assert!(
325 err_msg.contains("\\q"),
326 "error should mention the bad escape \\q, got: {}",
327 err_msg
328 );
329 }
330
331 #[test]
332 fn simple_string_unknown_escape_x_is_error() {
333 let result = parse_string_literal("\"\\x41\"");
335 assert!(result.is_err(), "expected error for unsupported \\x escape");
336 }
337
338 #[test]
339 fn simple_string_escape_null() {
340 assert_eq!(parse_string_literal("\"a\\0b\"").unwrap(), "a\0b");
342 }
343
344 #[test]
347 fn content_simple_string_sets_content_flag() {
348 let parsed = parse_string_literal_with_kind("c\"hello {x}\"").unwrap();
349 assert_eq!(parsed.interpolation_mode, Some(InterpolationMode::Braces));
350 assert!(parsed.is_content);
351 assert_eq!(parsed.value, "hello {x}");
352 }
353
354 #[test]
355 fn content_dollar_prefix_sets_mode_and_content() {
356 let parsed = parse_string_literal_with_kind("c$\"value: ${x}\"").unwrap();
357 assert_eq!(parsed.interpolation_mode, Some(InterpolationMode::Dollar));
358 assert!(parsed.is_content);
359 assert_eq!(parsed.value, "value: ${x}");
360 }
361
362 #[test]
363 fn content_hash_prefix_sets_mode_and_content() {
364 let parsed = parse_string_literal_with_kind("c#\"value: #{x}\"").unwrap();
365 assert_eq!(parsed.interpolation_mode, Some(InterpolationMode::Hash));
366 assert!(parsed.is_content);
367 assert_eq!(parsed.value, "value: #{x}");
368 }
369
370 #[test]
371 fn content_triple_string_sets_content_flag() {
372 let parsed = parse_string_literal_with_kind("c\"\"\"\n row: {x}\n\"\"\"").unwrap();
373 assert_eq!(parsed.interpolation_mode, Some(InterpolationMode::Braces));
374 assert!(parsed.is_content);
375 assert_eq!(parsed.value, "row: {x}");
376 }
377
378 #[test]
379 fn formatted_string_is_not_content() {
380 let parsed = parse_string_literal_with_kind("f\"value: {x}\"").unwrap();
381 assert_eq!(parsed.interpolation_mode, Some(InterpolationMode::Braces));
382 assert!(!parsed.is_content);
383 }
384
385 #[test]
386 fn plain_string_is_not_content() {
387 let parsed = parse_string_literal_with_kind("\"plain\"").unwrap();
388 assert_eq!(parsed.interpolation_mode, None);
389 assert!(!parsed.is_content);
390 }
391}