Skip to main content

i_slint_compiler/
literals.rs

1// Copyright © SixtyFPS GmbH <info@slint.dev>
2// SPDX-License-Identifier: GPL-3.0-only OR LicenseRef-Slint-Royalty-free-2.0 OR LicenseRef-Slint-Software-3.0
3
4// cSpell: ignore qsdf
5use crate::diagnostics::{BuildDiagnostics, SourceLocation, Span, Spanned};
6use crate::expression_tree::Expression;
7use crate::expression_tree::Unit;
8use itertools::Itertools;
9use smol_str::SmolStr;
10use strum::IntoEnumIterator;
11
12/// Describes one chunk produced by [`walk_escapes`].
13enum EscapeChunk<'a> {
14    /// Consecutive plain characters (same bytes in source and output).
15    Plain(&'a str),
16    /// An escape sequence: `source_len` bytes in the source produce `decoded`.
17    Escape { source_len: usize, decoded: char },
18}
19
20/// Error returned by [`walk_escapes`]: byte offset within the raw token and a
21/// human-readable message.
22struct EscapeError {
23    offset: usize,
24    length: usize,
25    message: &'static str,
26}
27
28/// Walk a string literal token (including its delimiters), strip the delimiters,
29/// and call `callback` for each chunk of the content. Returns `Ok(())` on success,
30/// or an [`EscapeError`] pointing at the problematic byte in the raw token.
31fn walk_escapes<'a>(
32    raw_token: &'a str,
33    mut callback: impl FnMut(EscapeChunk<'a>),
34) -> Result<(), EscapeError> {
35    if raw_token.contains('\n') {
36        return Err(EscapeError { offset: 0, length: 0, message: "Newline in string literal" });
37    }
38    let prefix_len = if raw_token.starts_with('"') || raw_token.starts_with('}') {
39        1
40    } else {
41        return Err(EscapeError { offset: 0, length: 0, message: "Cannot parse string literal" });
42    };
43    let content = &raw_token[prefix_len..];
44    let content = content
45        .strip_suffix('"')
46        .or_else(|| content.strip_suffix("\\{"))
47        .ok_or(EscapeError { offset: 0, length: 0, message: "Cannot parse string literal" })?;
48
49    let mut pos = 0;
50    while pos < content.len() {
51        if content.as_bytes()[pos] == b'\\' {
52            if pos + 1 >= content.len() {
53                return Err(EscapeError {
54                    offset: prefix_len + pos,
55                    length: 1,
56                    message: r"Unknown escape sequence. Use '\\' to escape a literal backslash",
57                });
58            }
59            let (source_len, decoded) = match content.as_bytes()[pos + 1] {
60                b'"' => (2, '"'),
61                b'\\' => (2, '\\'),
62                b'n' => (2, '\n'),
63                b'u' => {
64                    let brace_start = pos + 2;
65                    let has_brace = content.as_bytes().get(brace_start) == Some(&b'{');
66                    if !has_brace {
67                        return Err(EscapeError {
68                            offset: prefix_len + brace_start,
69                            length: 0,
70                            message: "Invalid unicode escape: expected '{'",
71                        });
72                    }
73                    let brace_end = match content[brace_start..].find('}') {
74                        Some(i) => i + brace_start,
75                        None => {
76                            return Err(EscapeError {
77                                offset: prefix_len + brace_start,
78                                length: 0,
79                                message: "Unterminated unicode escape",
80                            });
81                        }
82                    };
83                    let hex = &content[brace_start + 1..brace_end];
84                    let x = u32::from_str_radix(hex, 16).map_err(|_| EscapeError {
85                        offset: prefix_len + brace_start + 1,
86                        length: hex.len(),
87                        message: "Invalid hexadecimal in unicode escape",
88                    })?;
89                    let ch = std::char::from_u32(x).ok_or(EscapeError {
90                        offset: prefix_len + brace_start + 1,
91                        length: hex.len(),
92                        message: "Invalid unicode code point",
93                    })?;
94                    (brace_end + 1 - pos, ch)
95                }
96                _ => {
97                    let next_char_len =
98                        content[pos + 1..].chars().next().map_or(1, |c| c.len_utf8());
99                    return Err(EscapeError {
100                        offset: prefix_len + pos,
101                        length: 1 + next_char_len,
102                        message: r"Unknown escape sequence. Use '\\' to escape a literal backslash",
103                    });
104                }
105            };
106            callback(EscapeChunk::Escape { source_len, decoded });
107            pos += source_len;
108        } else {
109            let start = pos;
110            pos = content[pos..].find('\\').map_or(content.len(), |i| pos + i);
111            callback(EscapeChunk::Plain(&content[start..pos]));
112        }
113    }
114    Ok(())
115}
116
117/// Unescape a string literal token, returning `None` on error.
118pub fn unescape_string(string: &str) -> Option<SmolStr> {
119    let mut result = String::with_capacity(string.len());
120    walk_escapes(string, |chunk| match chunk {
121        EscapeChunk::Plain(s) => result += s,
122        EscapeChunk::Escape { decoded, .. } => result.push(decoded),
123    })
124    .ok()?;
125    Some(result.into())
126}
127
128/// Unescape a string literal token, reporting any error on the token's source location
129/// with the span pointing at the invalid escape sequence.
130/// If `token` is `None` (no string literal found), reports a generic error on `fallback`.
131pub fn unescape_string_reporting(
132    token: Option<&crate::parser::SyntaxToken>,
133    diag: &mut BuildDiagnostics,
134    fallback: &dyn Spanned,
135) -> Option<SmolStr> {
136    let Some(token) = token else {
137        diag.push_error("Cannot parse string literal".into(), fallback);
138        return None;
139    };
140    let mut result = String::with_capacity(token.text().len());
141    match walk_escapes(token.text(), |chunk| match chunk {
142        EscapeChunk::Plain(s) => result += s,
143        EscapeChunk::Escape { decoded, .. } => result.push(decoded),
144    }) {
145        Ok(()) => Some(result.into()),
146        Err(e) => {
147            let loc = token.to_source_location();
148            diag.push_error_with_span(
149                e.message.into(),
150                SourceLocation {
151                    source_file: loc.source_file,
152                    span: Span::new(loc.span.offset + e.offset, e.length),
153                },
154            );
155            None
156        }
157    }
158}
159
160#[test]
161fn test_unescape_string() {
162    assert_eq!(unescape_string(r#""foo_bar""#).as_deref(), Some("foo_bar"));
163    assert_eq!(unescape_string(r#""foo\"bar""#).as_deref(), Some("foo\"bar"));
164    assert_eq!(unescape_string(r#""foo\\\"bar""#).as_deref(), Some("foo\\\"bar"));
165    assert_eq!(unescape_string(r#""fo\na\\r""#).as_deref(), Some("fo\na\\r"));
166    assert_eq!(unescape_string(r#""fo\xa""#), None);
167    assert_eq!(unescape_string(r#""fooo\""#), None);
168    assert_eq!(unescape_string(r#""f\n\n\nf""#).as_deref(), Some("f\n\n\nf"));
169    assert_eq!(unescape_string(r#""music\♪xx""#), None);
170    assert_eq!(unescape_string(r#""music\"♪\"🎝""#).as_deref(), Some("music\"♪\"🎝"));
171    assert_eq!(unescape_string(r#""foo_bar"#), None);
172    assert_eq!(unescape_string(r#""foo_bar\"#), None);
173    assert_eq!(unescape_string(r#"foo_bar""#), None);
174    assert_eq!(
175        unescape_string(r#""d\u{8}a\u{d4}f\u{Ed3}""#).as_deref(),
176        Some("d\u{8}a\u{d4}f\u{ED3}")
177    );
178    assert_eq!(unescape_string(r#""xxx\""#), None);
179    assert_eq!(unescape_string(r#""xxx\u""#), None);
180    assert_eq!(unescape_string(r#""xxx\uxx""#), None);
181    assert_eq!(unescape_string(r#""xxx\u{""#), None);
182    assert_eq!(unescape_string(r#""xxx\u{22""#), None);
183    assert_eq!(unescape_string(r#""xxx\u{qsdf}""#), None);
184    assert_eq!(unescape_string(r#""xxx\u{1234567890}""#), None);
185}
186
187/// Maps byte offsets in a string assembled from one or more string literal tokens
188/// back to precise source locations, accounting for escape sequences.
189#[derive(Default)]
190pub struct StringLiteralSourceMap {
191    assembled: String,
192    entries: Vec<SourceMapEntry>,
193}
194
195/// One segment where assembled-string offsets map 1:1 to source-file offsets.
196/// A new entry is created at every escape boundary.
197struct SourceMapEntry {
198    /// Start byte offset in the assembled (unescaped) string.
199    assembled_start: usize,
200    /// Absolute byte offset in the source file corresponding to `assembled_start`.
201    source_offset: usize,
202    source_file: Option<crate::diagnostics::SourceFile>,
203}
204
205impl StringLiteralSourceMap {
206    pub fn new() -> Self {
207        Self::default()
208    }
209
210    /// Return the assembled (unescaped) string.
211    pub fn as_str(&self) -> &str {
212        &self.assembled
213    }
214
215    /// Consume the source map and return the assembled string.
216    pub fn into_string(self) -> String {
217        self.assembled
218    }
219
220    /// Unescape a string literal token, appending to the internal assembled string
221    /// and recording the source mapping. Reports errors to `diag` and returns
222    /// `false` on failure.
223    pub fn push(
224        &mut self,
225        token: &crate::parser::SyntaxToken,
226        diag: &mut BuildDiagnostics,
227    ) -> bool {
228        let loc = token.to_source_location();
229        let token_offset = loc.span.offset;
230        let raw = token.text();
231        let base = self.assembled.len();
232
233        let mut source_pos = 1usize;
234        let mut segment_start_assembled = base;
235        let mut segment_start_source = 1usize;
236
237        let result = walk_escapes(raw, |chunk| match chunk {
238            EscapeChunk::Plain(s) => {
239                self.assembled += s;
240                source_pos += s.len();
241            }
242            EscapeChunk::Escape { source_len, decoded } => {
243                if self.assembled.len() > segment_start_assembled {
244                    self.entries.push(SourceMapEntry {
245                        assembled_start: segment_start_assembled,
246                        source_offset: token_offset + segment_start_source,
247                        source_file: loc.source_file.clone(),
248                    });
249                }
250                self.entries.push(SourceMapEntry {
251                    assembled_start: self.assembled.len(),
252                    source_offset: token_offset + source_pos,
253                    source_file: loc.source_file.clone(),
254                });
255                self.assembled.push(decoded);
256                source_pos += source_len;
257                segment_start_assembled = self.assembled.len();
258                segment_start_source = source_pos;
259            }
260        });
261
262        match result {
263            Ok(()) => {
264                if self.assembled.len() > segment_start_assembled {
265                    self.entries.push(SourceMapEntry {
266                        assembled_start: segment_start_assembled,
267                        source_offset: token_offset + segment_start_source,
268                        source_file: loc.source_file,
269                    });
270                }
271                true
272            }
273            Err(e) => {
274                self.assembled.truncate(base);
275                diag.push_error_with_span(
276                    e.message.into(),
277                    SourceLocation {
278                        source_file: loc.source_file,
279                        span: Span::new(loc.span.offset + e.offset, e.length),
280                    },
281                );
282                false
283            }
284        }
285    }
286
287    /// Append a non-literal character (e.g., an interpolation placeholder)
288    /// where source and assembled offsets correspond 1:1.
289    pub fn push_raw_char(&mut self, ch: char, loc: SourceLocation) {
290        let start = self.assembled.len();
291        self.assembled.push(ch);
292        self.entries.push(SourceMapEntry {
293            assembled_start: start,
294            source_offset: loc.span.offset,
295            source_file: loc.source_file,
296        });
297    }
298
299    /// Resolve a byte range in the assembled string to a precise source location.
300    /// The returned span points at the specific position within the string literal.
301    pub fn resolve(&self, range: std::ops::Range<usize>) -> Option<SourceLocation> {
302        let idx = self.entries.partition_point(|e| e.assembled_start <= range.start);
303        if idx == 0 {
304            return None;
305        }
306        let entry = &self.entries[idx - 1];
307        let delta = range.start - entry.assembled_start;
308        Some(SourceLocation {
309            source_file: entry.source_file.clone(),
310            span: Span::new(entry.source_offset + delta, range.len()),
311        })
312    }
313
314    /// Report an error at a precise position within the string, falling back to
315    /// the full node if the position cannot be resolved.
316    pub fn report(
317        &self,
318        diag: &mut BuildDiagnostics,
319        message: String,
320        range: std::ops::Range<usize>,
321        fallback: &dyn Spanned,
322    ) {
323        if let Some(loc) = self.resolve(range) {
324            diag.push_error_with_span(message, loc);
325        } else {
326            diag.push_error(message, fallback);
327        }
328    }
329}
330
331pub fn parse_number_literal(s: SmolStr) -> Result<Expression, SmolStr> {
332    let bytes = s.as_bytes();
333    let mut end = 0;
334    while end < bytes.len() && matches!(bytes[end], b'0'..=b'9' | b'.') {
335        end += 1;
336    }
337    let val = s[..end].parse().map_err(|_| "Cannot parse number literal".to_owned())?;
338    let unit = s[end..].parse().map_err(|_| {
339        format!(
340            "Invalid unit '{}'. Valid units are: {}",
341            s.get(end..).unwrap_or(&s),
342            Unit::iter().filter(|x| !x.to_string().is_empty()).join(", ")
343        )
344    })?;
345    Ok(Expression::NumberLiteral(val, unit))
346}
347
348#[test]
349fn test_parse_number_literal() {
350    use crate::expression_tree::Unit;
351    use smol_str::{ToSmolStr, format_smolstr};
352
353    fn doit(s: &str) -> Result<(f64, Unit), SmolStr> {
354        parse_number_literal(s.into()).map(|e| match e {
355            Expression::NumberLiteral(a, b) => (a, b),
356            _ => panic!(),
357        })
358    }
359
360    assert_eq!(doit("10"), Ok((10., Unit::None)));
361    assert_eq!(doit("10phx"), Ok((10., Unit::Phx)));
362    assert_eq!(doit("10.0phx"), Ok((10., Unit::Phx)));
363    assert_eq!(doit("10.0"), Ok((10., Unit::None)));
364    assert_eq!(doit("1.1phx"), Ok((1.1, Unit::Phx)));
365    assert_eq!(doit("10.10"), Ok((10.10, Unit::None)));
366    assert_eq!(doit("10000000"), Ok((10000000., Unit::None)));
367    assert_eq!(doit("10000001phx"), Ok((10000001., Unit::Phx)));
368
369    let cannot_parse = Err("Cannot parse number literal".to_smolstr());
370    assert_eq!(doit("12.10.12phx"), cannot_parse);
371
372    let valid_units = Unit::iter().filter(|x| !x.to_string().is_empty()).join(", ");
373    let wrong_unit_spaced =
374        Err(format_smolstr!("Invalid unit ' phx'. Valid units are: {}", valid_units));
375    assert_eq!(doit("10000001 phx"), wrong_unit_spaced);
376    let wrong_unit_oo = Err(format_smolstr!("Invalid unit 'oo'. Valid units are: {}", valid_units));
377    assert_eq!(doit("12.12oo"), wrong_unit_oo);
378    let wrong_unit_euro =
379        Err(format_smolstr!("Invalid unit '€'. Valid units are: {}", valid_units));
380    assert_eq!(doit("12.12€"), wrong_unit_euro);
381}