1use crate::diagnostics::{BuildDiagnostics, SourceLocation, Span, Spanned};
6use crate::expression_tree::Expression;
7use crate::expression_tree::Unit;
8use itertools::Itertools;
9use smol_str::SmolStr;
10use strum::IntoEnumIterator;
11
12enum EscapeChunk<'a> {
14 Plain(&'a str),
16 Escape { source_len: usize, decoded: char },
18}
19
20struct EscapeError {
23 offset: usize,
24 length: usize,
25 message: &'static str,
26}
27
28fn walk_escapes<'a>(
32 raw_token: &'a str,
33 mut callback: impl FnMut(EscapeChunk<'a>),
34) -> Result<(), EscapeError> {
35 if raw_token.contains('\n') {
36 return Err(EscapeError { offset: 0, length: 0, message: "Newline in string literal" });
37 }
38 let prefix_len = if raw_token.starts_with('"') || raw_token.starts_with('}') {
39 1
40 } else {
41 return Err(EscapeError { offset: 0, length: 0, message: "Cannot parse string literal" });
42 };
43 let content = &raw_token[prefix_len..];
44 let content = content
45 .strip_suffix('"')
46 .or_else(|| content.strip_suffix("\\{"))
47 .ok_or(EscapeError { offset: 0, length: 0, message: "Cannot parse string literal" })?;
48
49 let mut pos = 0;
50 while pos < content.len() {
51 if content.as_bytes()[pos] == b'\\' {
52 if pos + 1 >= content.len() {
53 return Err(EscapeError {
54 offset: prefix_len + pos,
55 length: 1,
56 message: r"Unknown escape sequence. Use '\\' to escape a literal backslash",
57 });
58 }
59 let (source_len, decoded) = match content.as_bytes()[pos + 1] {
60 b'"' => (2, '"'),
61 b'\\' => (2, '\\'),
62 b'n' => (2, '\n'),
63 b'u' => {
64 let brace_start = pos + 2;
65 let has_brace = content.as_bytes().get(brace_start) == Some(&b'{');
66 if !has_brace {
67 return Err(EscapeError {
68 offset: prefix_len + brace_start,
69 length: 0,
70 message: "Invalid unicode escape: expected '{'",
71 });
72 }
73 let brace_end = match content[brace_start..].find('}') {
74 Some(i) => i + brace_start,
75 None => {
76 return Err(EscapeError {
77 offset: prefix_len + brace_start,
78 length: 0,
79 message: "Unterminated unicode escape",
80 });
81 }
82 };
83 let hex = &content[brace_start + 1..brace_end];
84 let x = u32::from_str_radix(hex, 16).map_err(|_| EscapeError {
85 offset: prefix_len + brace_start + 1,
86 length: hex.len(),
87 message: "Invalid hexadecimal in unicode escape",
88 })?;
89 let ch = std::char::from_u32(x).ok_or(EscapeError {
90 offset: prefix_len + brace_start + 1,
91 length: hex.len(),
92 message: "Invalid unicode code point",
93 })?;
94 (brace_end + 1 - pos, ch)
95 }
96 _ => {
97 let next_char_len =
98 content[pos + 1..].chars().next().map_or(1, |c| c.len_utf8());
99 return Err(EscapeError {
100 offset: prefix_len + pos,
101 length: 1 + next_char_len,
102 message: r"Unknown escape sequence. Use '\\' to escape a literal backslash",
103 });
104 }
105 };
106 callback(EscapeChunk::Escape { source_len, decoded });
107 pos += source_len;
108 } else {
109 let start = pos;
110 pos = content[pos..].find('\\').map_or(content.len(), |i| pos + i);
111 callback(EscapeChunk::Plain(&content[start..pos]));
112 }
113 }
114 Ok(())
115}
116
117pub fn unescape_string(string: &str) -> Option<SmolStr> {
119 let mut result = String::with_capacity(string.len());
120 walk_escapes(string, |chunk| match chunk {
121 EscapeChunk::Plain(s) => result += s,
122 EscapeChunk::Escape { decoded, .. } => result.push(decoded),
123 })
124 .ok()?;
125 Some(result.into())
126}
127
128pub fn unescape_string_reporting(
132 token: Option<&crate::parser::SyntaxToken>,
133 diag: &mut BuildDiagnostics,
134 fallback: &dyn Spanned,
135) -> Option<SmolStr> {
136 let Some(token) = token else {
137 diag.push_error("Cannot parse string literal".into(), fallback);
138 return None;
139 };
140 let mut result = String::with_capacity(token.text().len());
141 match walk_escapes(token.text(), |chunk| match chunk {
142 EscapeChunk::Plain(s) => result += s,
143 EscapeChunk::Escape { decoded, .. } => result.push(decoded),
144 }) {
145 Ok(()) => Some(result.into()),
146 Err(e) => {
147 let loc = token.to_source_location();
148 diag.push_error_with_span(
149 e.message.into(),
150 SourceLocation {
151 source_file: loc.source_file,
152 span: Span::new(loc.span.offset + e.offset, e.length),
153 },
154 );
155 None
156 }
157 }
158}
159
160#[test]
161fn test_unescape_string() {
162 assert_eq!(unescape_string(r#""foo_bar""#).as_deref(), Some("foo_bar"));
163 assert_eq!(unescape_string(r#""foo\"bar""#).as_deref(), Some("foo\"bar"));
164 assert_eq!(unescape_string(r#""foo\\\"bar""#).as_deref(), Some("foo\\\"bar"));
165 assert_eq!(unescape_string(r#""fo\na\\r""#).as_deref(), Some("fo\na\\r"));
166 assert_eq!(unescape_string(r#""fo\xa""#), None);
167 assert_eq!(unescape_string(r#""fooo\""#), None);
168 assert_eq!(unescape_string(r#""f\n\n\nf""#).as_deref(), Some("f\n\n\nf"));
169 assert_eq!(unescape_string(r#""music\♪xx""#), None);
170 assert_eq!(unescape_string(r#""music\"♪\"🎝""#).as_deref(), Some("music\"♪\"🎝"));
171 assert_eq!(unescape_string(r#""foo_bar"#), None);
172 assert_eq!(unescape_string(r#""foo_bar\"#), None);
173 assert_eq!(unescape_string(r#"foo_bar""#), None);
174 assert_eq!(
175 unescape_string(r#""d\u{8}a\u{d4}f\u{Ed3}""#).as_deref(),
176 Some("d\u{8}a\u{d4}f\u{ED3}")
177 );
178 assert_eq!(unescape_string(r#""xxx\""#), None);
179 assert_eq!(unescape_string(r#""xxx\u""#), None);
180 assert_eq!(unescape_string(r#""xxx\uxx""#), None);
181 assert_eq!(unescape_string(r#""xxx\u{""#), None);
182 assert_eq!(unescape_string(r#""xxx\u{22""#), None);
183 assert_eq!(unescape_string(r#""xxx\u{qsdf}""#), None);
184 assert_eq!(unescape_string(r#""xxx\u{1234567890}""#), None);
185}
186
187#[derive(Default)]
190pub struct StringLiteralSourceMap {
191 assembled: String,
192 entries: Vec<SourceMapEntry>,
193}
194
195struct SourceMapEntry {
198 assembled_start: usize,
200 source_offset: usize,
202 source_file: Option<crate::diagnostics::SourceFile>,
203}
204
205impl StringLiteralSourceMap {
206 pub fn new() -> Self {
207 Self::default()
208 }
209
210 pub fn as_str(&self) -> &str {
212 &self.assembled
213 }
214
215 pub fn into_string(self) -> String {
217 self.assembled
218 }
219
220 pub fn push(
224 &mut self,
225 token: &crate::parser::SyntaxToken,
226 diag: &mut BuildDiagnostics,
227 ) -> bool {
228 let loc = token.to_source_location();
229 let token_offset = loc.span.offset;
230 let raw = token.text();
231 let base = self.assembled.len();
232
233 let mut source_pos = 1usize;
234 let mut segment_start_assembled = base;
235 let mut segment_start_source = 1usize;
236
237 let result = walk_escapes(raw, |chunk| match chunk {
238 EscapeChunk::Plain(s) => {
239 self.assembled += s;
240 source_pos += s.len();
241 }
242 EscapeChunk::Escape { source_len, decoded } => {
243 if self.assembled.len() > segment_start_assembled {
244 self.entries.push(SourceMapEntry {
245 assembled_start: segment_start_assembled,
246 source_offset: token_offset + segment_start_source,
247 source_file: loc.source_file.clone(),
248 });
249 }
250 self.entries.push(SourceMapEntry {
251 assembled_start: self.assembled.len(),
252 source_offset: token_offset + source_pos,
253 source_file: loc.source_file.clone(),
254 });
255 self.assembled.push(decoded);
256 source_pos += source_len;
257 segment_start_assembled = self.assembled.len();
258 segment_start_source = source_pos;
259 }
260 });
261
262 match result {
263 Ok(()) => {
264 if self.assembled.len() > segment_start_assembled {
265 self.entries.push(SourceMapEntry {
266 assembled_start: segment_start_assembled,
267 source_offset: token_offset + segment_start_source,
268 source_file: loc.source_file,
269 });
270 }
271 true
272 }
273 Err(e) => {
274 self.assembled.truncate(base);
275 diag.push_error_with_span(
276 e.message.into(),
277 SourceLocation {
278 source_file: loc.source_file,
279 span: Span::new(loc.span.offset + e.offset, e.length),
280 },
281 );
282 false
283 }
284 }
285 }
286
287 pub fn push_raw_char(&mut self, ch: char, loc: SourceLocation) {
290 let start = self.assembled.len();
291 self.assembled.push(ch);
292 self.entries.push(SourceMapEntry {
293 assembled_start: start,
294 source_offset: loc.span.offset,
295 source_file: loc.source_file,
296 });
297 }
298
299 pub fn resolve(&self, range: std::ops::Range<usize>) -> Option<SourceLocation> {
302 let idx = self.entries.partition_point(|e| e.assembled_start <= range.start);
303 if idx == 0 {
304 return None;
305 }
306 let entry = &self.entries[idx - 1];
307 let delta = range.start - entry.assembled_start;
308 Some(SourceLocation {
309 source_file: entry.source_file.clone(),
310 span: Span::new(entry.source_offset + delta, range.len()),
311 })
312 }
313
314 pub fn report(
317 &self,
318 diag: &mut BuildDiagnostics,
319 message: String,
320 range: std::ops::Range<usize>,
321 fallback: &dyn Spanned,
322 ) {
323 if let Some(loc) = self.resolve(range) {
324 diag.push_error_with_span(message, loc);
325 } else {
326 diag.push_error(message, fallback);
327 }
328 }
329}
330
331pub fn parse_number_literal(s: SmolStr) -> Result<Expression, SmolStr> {
332 let bytes = s.as_bytes();
333 let mut end = 0;
334 while end < bytes.len() && matches!(bytes[end], b'0'..=b'9' | b'.') {
335 end += 1;
336 }
337 let val = s[..end].parse().map_err(|_| "Cannot parse number literal".to_owned())?;
338 let unit = s[end..].parse().map_err(|_| {
339 format!(
340 "Invalid unit '{}'. Valid units are: {}",
341 s.get(end..).unwrap_or(&s),
342 Unit::iter().filter(|x| !x.to_string().is_empty()).join(", ")
343 )
344 })?;
345 Ok(Expression::NumberLiteral(val, unit))
346}
347
348#[test]
349fn test_parse_number_literal() {
350 use crate::expression_tree::Unit;
351 use smol_str::{ToSmolStr, format_smolstr};
352
353 fn doit(s: &str) -> Result<(f64, Unit), SmolStr> {
354 parse_number_literal(s.into()).map(|e| match e {
355 Expression::NumberLiteral(a, b) => (a, b),
356 _ => panic!(),
357 })
358 }
359
360 assert_eq!(doit("10"), Ok((10., Unit::None)));
361 assert_eq!(doit("10phx"), Ok((10., Unit::Phx)));
362 assert_eq!(doit("10.0phx"), Ok((10., Unit::Phx)));
363 assert_eq!(doit("10.0"), Ok((10., Unit::None)));
364 assert_eq!(doit("1.1phx"), Ok((1.1, Unit::Phx)));
365 assert_eq!(doit("10.10"), Ok((10.10, Unit::None)));
366 assert_eq!(doit("10000000"), Ok((10000000., Unit::None)));
367 assert_eq!(doit("10000001phx"), Ok((10000001., Unit::Phx)));
368
369 let cannot_parse = Err("Cannot parse number literal".to_smolstr());
370 assert_eq!(doit("12.10.12phx"), cannot_parse);
371
372 let valid_units = Unit::iter().filter(|x| !x.to_string().is_empty()).join(", ");
373 let wrong_unit_spaced =
374 Err(format_smolstr!("Invalid unit ' phx'. Valid units are: {}", valid_units));
375 assert_eq!(doit("10000001 phx"), wrong_unit_spaced);
376 let wrong_unit_oo = Err(format_smolstr!("Invalid unit 'oo'. Valid units are: {}", valid_units));
377 assert_eq!(doit("12.12oo"), wrong_unit_oo);
378 let wrong_unit_euro =
379 Err(format_smolstr!("Invalid unit '€'. Valid units are: {}", valid_units));
380 assert_eq!(doit("12.12€"), wrong_unit_euro);
381}