Skip to main content

gdscript_scene/
parse.rs

1//! The `.tscn`/`.tres` text parser (Phase-4 M0) — a wasm-clean, never-panic, byte-offset-tracking
2//! scanner that produces a [`SceneModel`].
3//!
4//! Strategy (per `PHASE-4-M0-PLAYBOOK.md` §4): detect a binary resource and degrade; then a
5//! **two-pass** scan — pass 1 sectionizes with a Variant-aware header lexer + a lossless multiline
6//! value-skipper, pass 2 builds the node tree. The parser **never errors**: every malformed/unknown
7//! form becomes a [`SceneProblem`] and the model degrades to the engine's `Node`-everywhere floor.
8//!
9//! Only ASCII bytes are structurally significant (`[ ] { } ( ) " = ; # & @ /` + newlines); UTF-8
10//! multibyte sequences in names/values are all `>= 0x80` and never collide with a delimiter, so the
11//! byte scan is safe and every slice boundary lands on a char boundary (it's an ASCII delimiter).
12
13use gdscript_base::TextRange;
14use rustc_hash::FxHashMap;
15use smol_str::SmolStr;
16
17use crate::model::{
18    ExtId, ExtResource, NodeIdx, SceneKind, SceneModel, SceneNode, SceneProblem, SubResource,
19};
20
21/// Parse `.tscn`/`.tres` text into a [`SceneModel`]. Pure, never panics, never returns `Err`.
22#[must_use]
23pub fn parse_scene(text: &str) -> SceneModel {
24    if binary_magic(text) {
25        let mut m = SceneModel::empty(SceneKind::Scene);
26        m.problems.push(SceneProblem::BinaryResource);
27        return m;
28    }
29    let mut p = Parser::new(text);
30    p.run();
31    p.build_tree();
32    p.model
33}
34
35/// Whether the (whitespace-skipped) head is a binary resource magic (`RSRC`/`RSCC`).
36fn binary_magic(text: &str) -> bool {
37    let b = text.as_bytes();
38    let mut i = 0;
39    while i < b.len() && matches!(b[i], b' ' | b'\t' | b'\r' | b'\n') {
40        i += 1;
41    }
42    let rest = &b[i..];
43    rest.starts_with(b"RSRC") || rest.starts_with(b"RSCC")
44}
45
46/// A byte range `[start, end)` into the source.
47type Span = (usize, usize);
48
49/// The header attributes we recognize (raw value byte-ranges; interpreted at dispatch).
50#[derive(Default)]
51struct HeaderAttrs {
52    name: Option<Span>,
53    typ: Option<Span>,
54    parent: Option<Span>,
55    instance: Option<Span>,
56    instance_placeholder: Option<Span>,
57    format: Option<Span>,
58    uid: Option<Span>,
59    script_class: Option<Span>,
60    id: Option<Span>,
61    path: Option<Span>,
62}
63
64impl HeaderAttrs {
65    fn set(&mut self, key: &str, value: Span) {
66        let slot = match key {
67            "name" => &mut self.name,
68            "type" => &mut self.typ,
69            "parent" => &mut self.parent,
70            "instance" => &mut self.instance,
71            "instance_placeholder" => &mut self.instance_placeholder,
72            "format" => &mut self.format,
73            "uid" => &mut self.uid,
74            "script_class" => &mut self.script_class,
75            "id" => &mut self.id,
76            "path" => &mut self.path,
77            _ => return, // unknown attribute — ignored, never an error
78        };
79        *slot = Some(value);
80    }
81}
82
83struct Parser<'a> {
84    src: &'a str,
85    bytes: &'a [u8],
86    pos: usize,
87    model: SceneModel,
88}
89
90impl<'a> Parser<'a> {
91    fn new(src: &'a str) -> Self {
92        Self {
93            src,
94            bytes: src.as_bytes(),
95            pos: 0,
96            model: SceneModel::empty(SceneKind::Scene),
97        }
98    }
99
100    // ---- low-level cursor ----
101
102    fn peek(&self) -> Option<u8> {
103        self.bytes.get(self.pos).copied()
104    }
105
106    fn bump(&mut self) {
107        self.pos += 1;
108    }
109
110    fn at_eof(&self) -> bool {
111        self.pos >= self.bytes.len()
112    }
113
114    fn skip_inline_ws(&mut self) {
115        while matches!(self.peek(), Some(b' ' | b'\t')) {
116            self.bump();
117        }
118    }
119
120    /// Skip whitespace, newlines, and `;`-comment lines (the trivia between/around sections).
121    fn skip_trivia(&mut self) {
122        loop {
123            match self.peek() {
124                Some(b' ' | b'\t' | b'\r' | b'\n') => self.bump(),
125                Some(b';') => self.skip_to_eol(),
126                _ => break,
127            }
128        }
129    }
130
131    fn skip_to_eol(&mut self) {
132        while !matches!(self.peek(), None | Some(b'\n')) {
133            self.bump();
134        }
135        if self.peek() == Some(b'\n') {
136            self.bump();
137        }
138    }
139
140    /// Read an identifier `[A-Za-z0-9_/]+` (header tag, or a key — keys may contain `/`).
141    fn read_ident(&mut self) -> Option<SmolStr> {
142        let start = self.pos;
143        while matches!(self.peek(), Some(b) if b.is_ascii_alphanumeric() || b == b'_' || b == b'/')
144        {
145            self.bump();
146        }
147        if self.pos == start {
148            None
149        } else {
150            self.src.get(start..self.pos).map(SmolStr::new)
151        }
152    }
153
154    // ---- value lexing (the lossless skipper) ----
155
156    /// Consume one complete value expression (string / array / dict / constructor / bare / color /
157    /// `&"…"`), returning its byte span. Never panics; stops at EOF.
158    fn consume_value(&mut self) -> Span {
159        self.skip_inline_ws();
160        let start = self.pos;
161        match self.peek() {
162            Some(b'"') => self.consume_quoted(),
163            Some(b'&' | b'@') => {
164                self.bump();
165                if self.peek() == Some(b'"') {
166                    self.consume_quoted();
167                } else {
168                    self.consume_bare();
169                }
170            }
171            Some(b'[' | b'{' | b'(') => self.consume_balanced(),
172            Some(b'#') => self.consume_color(),
173            Some(_) => {
174                self.consume_bare();
175                // Trailing constructor / typed-array brackets: `Vector2(…)`, `Array[T]([…])`.
176                while matches!(self.peek(), Some(b'(' | b'[')) {
177                    self.consume_balanced();
178                }
179            }
180            None => {}
181        }
182        (start, self.pos)
183    }
184
185    /// Consume a `"…"` string: honors `\\`/`\"` escapes and **literal embedded newlines** (C12).
186    fn consume_quoted(&mut self) {
187        self.bump(); // opening quote
188        loop {
189            match self.peek() {
190                None => break,
191                Some(b'\\') => {
192                    self.bump();
193                    self.bump(); // skip the escaped byte
194                }
195                Some(b'"') => {
196                    self.bump();
197                    break;
198                }
199                Some(_) => self.bump(),
200            }
201        }
202    }
203
204    /// Consume a `(…)`/`[…]`/`{…}` value with combined bracket depth, quote- and color-aware,
205    /// across physical newlines (C2, C12).
206    fn consume_balanced(&mut self) {
207        let mut depth: u32 = 0;
208        loop {
209            match self.peek() {
210                None => break,
211                Some(b'"') => self.consume_quoted(),
212                Some(b'#') => self.consume_color(), // a Color literal, NOT a comment (C11)
213                Some(b';') => self.skip_to_eol(),
214                Some(b'(' | b'[' | b'{') => {
215                    depth += 1;
216                    self.bump();
217                }
218                Some(b')' | b']' | b'}') => {
219                    self.bump();
220                    depth = depth.saturating_sub(1);
221                    if depth == 0 {
222                        break;
223                    }
224                }
225                Some(_) => self.bump(),
226            }
227        }
228    }
229
230    /// Consume a `#RRGGBBAA` color token (hex run after `#`).
231    fn consume_color(&mut self) {
232        self.bump(); // '#'
233        while matches!(self.peek(), Some(b) if b.is_ascii_hexdigit()) {
234            self.bump();
235        }
236    }
237
238    /// Consume a bare token (ident / number / sign / `inf` / `nan` / `true` / `null`).
239    fn consume_bare(&mut self) {
240        while matches!(
241            self.peek(),
242            Some(b) if b.is_ascii_alphanumeric() || matches!(b, b'_' | b'+' | b'-' | b'.')
243        ) {
244            self.bump();
245        }
246    }
247
248    // ---- header + body ----
249
250    /// Parse a `[tag …]` header. Returns the tag and the recognized attrs. `pos` ends just after
251    /// the closing `]` (or EOF if malformed). Assumes `pos` is at `[`.
252    fn read_header(&mut self) -> (Option<SmolStr>, HeaderAttrs, bool) {
253        self.bump(); // '['
254        self.skip_inline_ws();
255        let tag = self.read_ident();
256        let mut attrs = HeaderAttrs::default();
257        let mut closed = false;
258        loop {
259            self.skip_inline_ws();
260            match self.peek() {
261                Some(b']') => {
262                    self.bump();
263                    closed = true;
264                    break;
265                }
266                // EOF or newline before `]` — a header never wraps, so this is an unclosed bracket.
267                None | Some(b'\n') => break,
268                Some(_) => {
269                    let Some(key) = self.read_ident() else {
270                        self.bump(); // stray byte — advance to avoid looping
271                        continue;
272                    };
273                    self.skip_inline_ws();
274                    if self.peek() != Some(b'=') {
275                        continue; // a bare flag (none expected); ignore
276                    }
277                    self.bump(); // '='
278                    let value = self.consume_value();
279                    attrs.set(&key, value);
280                }
281            }
282        }
283        (tag, attrs, closed)
284    }
285
286    /// Read the body property lines of the current section until the next header / EOF. When
287    /// `is_node`, capture `script =` and `unique_name_in_owner =`; otherwise skip every value
288    /// losslessly. Returns `(script, unique_name_in_owner)`.
289    fn consume_body(&mut self, is_node: bool) -> (Option<ExtId>, bool) {
290        let mut script = None;
291        let mut unique = false;
292        loop {
293            self.skip_trivia();
294            match self.peek() {
295                None | Some(b'[') => break, // EOF or next section
296                Some(_) => {}
297            }
298            let Some(key) = self.read_ident() else {
299                self.skip_to_eol(); // not a key line — skip it
300                continue;
301            };
302            self.skip_inline_ws();
303            if self.peek() != Some(b'=') {
304                self.skip_to_eol();
305                continue;
306            }
307            self.bump(); // '='
308            let (vs, ve) = self.consume_value();
309            if is_node {
310                match key.as_str() {
311                    "script" => script = self.extract_ext_id(vs, ve),
312                    "unique_name_in_owner" => {
313                        unique = self.src.get(vs..ve).is_some_and(|v| v.trim() == "true");
314                    }
315                    _ => {}
316                }
317            }
318            self.skip_to_eol();
319        }
320        (script, unique)
321    }
322
323    // ---- value extraction (interpret a recorded span) ----
324
325    /// The content of a quoted-string value (escapes resolved), or the bare token text. `None` for
326    /// an empty value.
327    fn extract_string(&self, span: Span) -> Option<SmolStr> {
328        let raw = self.src.get(span.0..span.1)?.trim();
329        if raw.len() >= 2 && raw.starts_with('"') && raw.ends_with('"') {
330            Some(SmolStr::new(unescape(&raw[1..raw.len() - 1])))
331        } else if raw.is_empty() {
332            None
333        } else {
334            Some(SmolStr::new(raw))
335        }
336    }
337
338    /// Parse a `format=`/numeric value to `u8` (best effort).
339    fn extract_u8(&self, span: Span) -> Option<u8> {
340        self.extract_string(span)?.trim().parse().ok()
341    }
342
343    /// Extract the id from an `ExtResource("id")` / `ExtResource(1)` value → the string `"id"`/`"1"`.
344    /// Returns `None` for any other constructor (notably `SubResource("…")`, an *inline* script /
345    /// resource that has no external path — M0 records no attachment for it; M1 types the node by
346    /// its declared `type=` instead).
347    fn extract_ext_id(&self, start: usize, end: usize) -> Option<ExtId> {
348        let v = self.src.get(start..end)?;
349        let open = v.find('(')?;
350        if v.get(..open)?.trim() != "ExtResource" {
351            return None;
352        }
353        let close = v.rfind(')')?;
354        if close <= open {
355            return None;
356        }
357        let inner = v.get(open + 1..close)?.trim().trim_matches('"').trim();
358        (!inner.is_empty()).then(|| ExtId(SmolStr::new(inner)))
359    }
360
361    // ---- pass 1: sectionize ----
362
363    fn run(&mut self) {
364        loop {
365            self.skip_trivia();
366            if self.at_eof() {
367                break;
368            }
369            if self.peek() == Some(b'[') {
370                self.section();
371            } else {
372                self.skip_to_eol(); // stray content outside a section — skip
373            }
374        }
375    }
376
377    fn section(&mut self) {
378        let start = self.pos;
379        let (tag, attrs, closed) = self.read_header();
380        let header_span = TextRange::new(to_u32(start), to_u32(self.pos));
381        if !closed {
382            self.model
383                .problems
384                .push(SceneProblem::MalformedHeader { at: header_span });
385            // body (if any) is consumed by the dispatch's consume_body below as a best effort
386        }
387        match tag.as_deref() {
388            Some("gd_scene") => {
389                self.model.kind = SceneKind::Scene;
390                self.read_scene_header(&attrs);
391                self.consume_body(false);
392            }
393            Some("gd_resource") => {
394                self.model.kind = SceneKind::Resource;
395                self.read_resource_header(&attrs);
396                self.consume_body(false);
397            }
398            Some("ext_resource") => {
399                self.add_ext_resource(&attrs, header_span);
400                self.consume_body(false);
401            }
402            Some("sub_resource") => {
403                self.add_sub_resource(&attrs, header_span);
404                self.consume_body(false);
405            }
406            Some("node") => self.add_node(&attrs, header_span),
407            Some("connection" | "editable" | "resource") => {
408                self.consume_body(false); // recognized, structurally ignored in M0
409            }
410            Some(_) => {
411                self.model
412                    .problems
413                    .push(SceneProblem::UnknownTag { at: header_span });
414                self.consume_body(false);
415            }
416            None => {
417                self.model
418                    .problems
419                    .push(SceneProblem::MalformedHeader { at: header_span });
420                self.consume_body(false);
421            }
422        }
423    }
424
425    fn read_scene_header(&mut self, a: &HeaderAttrs) {
426        self.model.format = a.format.and_then(|s| self.extract_u8(s));
427        self.model.uid = a.uid.and_then(|s| self.extract_string(s));
428        self.model.script_class = a.script_class.and_then(|s| self.extract_string(s));
429    }
430
431    fn read_resource_header(&mut self, a: &HeaderAttrs) {
432        self.model.format = a.format.and_then(|s| self.extract_u8(s));
433        self.model.uid = a.uid.and_then(|s| self.extract_string(s));
434        self.model.script_class = a.script_class.and_then(|s| self.extract_string(s));
435        self.model.resource_type = a.typ.and_then(|s| self.extract_string(s));
436    }
437
438    fn add_ext_resource(&mut self, a: &HeaderAttrs, span: TextRange) {
439        let res_type = a.typ.and_then(|s| self.extract_string(s));
440        let path = a.path.and_then(|s| self.extract_string(s));
441        let uid = a.uid.and_then(|s| self.extract_string(s));
442        let id = a.id.and_then(|s| self.extract_string(s));
443        match id {
444            Some(id) => {
445                if res_type.is_none() || path.is_none() {
446                    self.model
447                        .problems
448                        .push(SceneProblem::MissingExtField { at: span });
449                }
450                self.model.ext_resources.insert(
451                    ExtId(id),
452                    ExtResource {
453                        res_type: res_type.unwrap_or_default(),
454                        path,
455                        uid,
456                        span,
457                    },
458                );
459            }
460            None => self
461                .model
462                .problems
463                .push(SceneProblem::MissingExtField { at: span }),
464        }
465    }
466
467    fn add_sub_resource(&mut self, a: &HeaderAttrs, span: TextRange) {
468        let res_type = a
469            .typ
470            .and_then(|s| self.extract_string(s))
471            .unwrap_or_default();
472        if let Some(id) = a.id.and_then(|s| self.extract_string(s)) {
473            self.model
474                .sub_resources
475                .insert(ExtId(id), SubResource { res_type, span });
476        }
477    }
478
479    fn add_node(&mut self, a: &HeaderAttrs, header_span: TextRange) {
480        let name = a
481            .name
482            .and_then(|s| self.extract_string(s))
483            .unwrap_or_default();
484        let name_span = a
485            .name
486            .map_or(header_span, |(s, e)| TextRange::new(to_u32(s), to_u32(e)));
487        let decl_type = a.typ.and_then(|s| self.extract_string(s));
488        let parent_path = a.parent.and_then(|s| self.extract_string(s));
489        let instance = a.instance.and_then(|(s, e)| self.extract_ext_id(s, e));
490        let instance_placeholder = a.instance_placeholder.is_some();
491        let (script, unique_name_in_owner) = self.consume_body(true);
492        self.model.nodes.push(SceneNode {
493            name,
494            decl_type,
495            parent_path,
496            parent_idx: None,
497            script,
498            instance,
499            instance_is_inherited_root: false,
500            instance_placeholder,
501            unique_name_in_owner,
502            header_span,
503            name_span,
504        });
505    }
506
507    // ---- pass 2: build the tree ----
508
509    fn build_tree(&mut self) {
510        let n = self.model.nodes.len();
511        if n == 0 {
512            return;
513        }
514        // 1. Root(s): the parent-less nodes.
515        let roots: Vec<NodeIdx> = (0..n)
516            .filter(|&i| self.model.nodes[i].parent_path.is_none())
517            .map(|i| NodeIdx(to_u32(i)))
518            .collect();
519        self.model.root = roots.first().copied();
520        if roots.len() > 1 {
521            self.model.problems.push(SceneProblem::MultipleRoots {
522                roots: roots.clone(),
523            });
524        } else if roots.is_empty() {
525            self.model.problems.push(SceneProblem::NoRoot);
526        }
527        let root = self.model.root;
528
529        // 2. Resolve parents in file order (pre-order ⇒ ancestors already registered), building the
530        //    child index, the children lists, and the full-path map.
531        let mut child_index: FxHashMap<(NodeIdx, SmolStr), NodeIdx> = FxHashMap::default();
532        let mut children: FxHashMap<NodeIdx, Vec<NodeIdx>> = FxHashMap::default();
533        let mut full_paths: Vec<SmolStr> = vec![SmolStr::default(); n];
534
535        for i in 0..n {
536            let idx = NodeIdx(to_u32(i));
537            let parent_path = self.model.nodes[i].parent_path.clone();
538            let name = self.model.nodes[i].name.clone();
539
540            // Inherited-scene root: the chosen root carrying `instance=` (`set_base_scene`). Set
541            // BEFORE resolving any child paths so the into-instance check below can see it. Gated on
542            // being THE root (a spurious extra parent-less node in a MultipleRoots scene is not one).
543            if Some(idx) == root && self.model.nodes[i].instance.is_some() {
544                self.model.nodes[i].instance_is_inherited_root = true;
545            }
546
547            let parent_idx = match parent_path.as_deref() {
548                None => None,
549                Some(".") => root,
550                Some(p) => match walk_path(root, p, &child_index) {
551                    Walk::Resolved(found) => Some(found),
552                    // An absolute/`..` escape is out of the slice → silently unresolved, never a
553                    // dangling parent (Playbook §5/§7 — M1 degrades it to `Node`).
554                    Walk::Escaped => None,
555                    Walk::Missed(deepest) => {
556                        // A genuine in-scene miss. If the deepest node reached — or any ancestor up
557                        // to the root — is an instance boundary, the missing tail lives in an
558                        // instanced/inherited sub-scene we don't recurse into (an override line) —
559                        // expected, NOT dangling (Playbook C12/C13/C20). The root being an inherited
560                        // scene makes every override child's missing segment a base-scene node.
561                        if !self.model.descends_from_instance(deepest) {
562                            self.model.problems.push(SceneProblem::DanglingParent {
563                                node: idx,
564                                parent_path: SmolStr::new(p),
565                            });
566                        }
567                        None
568                    }
569                },
570            };
571            self.model.nodes[i].parent_idx = parent_idx;
572
573            if let Some(p) = parent_idx {
574                // First sibling of a given name keeps the navigable slot (matches `unique_nodes`'
575                // first-wins; Godot auto-uniquifies sibling names anyway).
576                child_index.entry((p, name.clone())).or_insert(idx);
577                children.entry(p).or_default().push(idx);
578                let pfp = &full_paths[p.0 as usize];
579                let fp = if pfp.is_empty() {
580                    name
581                } else {
582                    SmolStr::new(format!("{pfp}/{name}"))
583                };
584                full_paths[i] = fp.clone();
585                self.model.by_path.entry(fp).or_insert(idx);
586            }
587        }
588
589        // 3. Unique-name index (first wins on collision).
590        for i in 0..n {
591            if self.model.nodes[i].unique_name_in_owner {
592                self.model
593                    .unique_nodes
594                    .entry(self.model.nodes[i].name.clone())
595                    .or_insert(NodeIdx(to_u32(i)));
596            }
597        }
598
599        // 4. Validate `script=`/`instance=` ids against the ext-resource table.
600        for i in 0..n {
601            let span = self.model.nodes[i].header_span;
602            let refs = [
603                self.model.nodes[i].script.clone(),
604                self.model.nodes[i].instance.clone(),
605            ];
606            for id in refs.into_iter().flatten() {
607                if !self.model.ext_resources.contains_key(&id) {
608                    self.model
609                        .problems
610                        .push(SceneProblem::UnknownExtResource { id, at: span });
611                }
612            }
613        }
614
615        self.model.set_indices(child_index, children);
616    }
617}
618
619/// The outcome of resolving a `parent=`/node path against the in-scene tree.
620enum Walk {
621    /// Fully resolved to a node.
622    Resolved(NodeIdx),
623    /// The path escapes the scene (an absolute `/root/…` or a `..` segment). Out of the M0 slice —
624    /// resolves to nothing **silently** (not a dangling parent; M1 degrades it to `Node`).
625    Escaped,
626    /// A genuine in-scene child miss. `deepest` is the last node reached (so the caller can tell an
627    /// override-into-an-instance from a real dangling parent).
628    Missed(Option<NodeIdx>),
629}
630
631/// Walk a relative name-path from `root`, segment by segment, via the incrementally-built child
632/// index.
633fn walk_path(
634    root: Option<NodeIdx>,
635    path: &str,
636    child_index: &FxHashMap<(NodeIdx, SmolStr), NodeIdx>,
637) -> Walk {
638    if path.starts_with('/') {
639        return Walk::Escaped; // absolute `/root/…` — detect before splitting (leading "" segment)
640    }
641    let Some(mut cur) = root else {
642        return Walk::Missed(None);
643    };
644    for seg in path.split('/') {
645        if seg.is_empty() || seg == "." {
646            continue;
647        }
648        if seg == ".." {
649            return Walk::Escaped; // parent escape — needs the runtime tree
650        }
651        match child_index.get(&(cur, SmolStr::new(seg))) {
652            Some(&next) => cur = next,
653            None => return Walk::Missed(Some(cur)),
654        }
655    }
656    Walk::Resolved(cur)
657}
658
659/// Resolve the C-style escapes a `.tscn` quoted string may carry. Unknown escapes pass through
660/// (the backslash is dropped, the next char kept) — a lossy-but-safe simplification for M0 (escapes
661/// in node names are vanishingly rare and resolved consistently for both `name=` and `parent=`).
662fn unescape(s: &str) -> String {
663    if !s.contains('\\') {
664        return s.to_owned();
665    }
666    let mut out = String::with_capacity(s.len());
667    let mut chars = s.chars();
668    while let Some(c) = chars.next() {
669        if c != '\\' {
670            out.push(c);
671            continue;
672        }
673        match chars.next() {
674            Some('n') => out.push('\n'),
675            Some('t') => out.push('\t'),
676            Some('r') => out.push('\r'),
677            Some(other) => out.push(other), // \" \\ and anything else → the literal char
678            None => out.push('\\'),
679        }
680    }
681    out
682}
683
684/// `usize → u32`, saturating (a `.tscn` over 4 GiB / 4 G nodes is not a real input).
685fn to_u32(v: usize) -> u32 {
686    u32::try_from(v).unwrap_or(u32::MAX)
687}