Skip to main content

panproto_parse/
emit_pretty.rs

1#![allow(
2    clippy::module_name_repetitions,
3    clippy::too_many_lines,
4    clippy::too_many_arguments,
5    clippy::map_unwrap_or,
6    clippy::option_if_let_else,
7    clippy::elidable_lifetime_names,
8    clippy::items_after_statements,
9    clippy::needless_pass_by_value,
10    clippy::single_match_else,
11    clippy::manual_let_else,
12    clippy::match_same_arms,
13    clippy::missing_const_for_fn,
14    clippy::single_char_pattern,
15    clippy::naive_bytecount,
16    clippy::expect_used,
17    clippy::redundant_pub_crate,
18    clippy::used_underscore_binding,
19    clippy::redundant_field_names,
20    clippy::struct_field_names,
21    clippy::redundant_else,
22    clippy::similar_names
23)]
24
25//! De-novo source emission from a by-construction schema.
26//!
27//! [`AstParser::emit`] reconstructs source from byte-position fragments
28//! that the parser stored on the schema during `parse`. That works for
29//! edit pipelines (`parse → transform → emit`) but fails for schemas
30//! built by hand (`SchemaBuilder` with no parse history): they carry
31//! no `start-byte`, no `interstitial-N`, no `literal-value`, and the
32//! reconstructor returns `Err(EmitFailed { reason: "schema has no
33//! text fragments" })`.
34//!
35//! This module renders such schemas to source bytes by walking
36//! tree-sitter's `grammar.json` production rules. For each schema
37//! vertex of kind `K`, the walker looks up `K`'s production in the
38//! grammar and emits its body in order:
39//!
40//! - `STRING` nodes contribute literal token bytes directly.
41//! - `SYMBOL` and `FIELD` nodes recurse into the schema's children,
42//!   matching by edge kind (which is the tree-sitter field name).
43//! - `SEQ` emits its members in order.
44//! - `CHOICE` picks the alternative whose head `SYMBOL` matches an
45//!   actual child kind, or whose terminals appear in the rendered
46//!   prefix; falls back to the first non-`BLANK` alternative when no
47//!   alternative matches.
48//! - `REPEAT` and `REPEAT1` emit their content once per matching
49//!   child edge in declared order.
50//! - `OPTIONAL` emits its content iff a corresponding child edge or
51//!   constraint is populated.
52//! - `PATTERN` is a regex placeholder for variable-text terminals
53//!   (identifiers, numbers, quoted strings). The walker emits a
54//!   `literal-value` constraint when present and otherwise falls
55//!   back to a placeholder derived from the regex shape.
56//! - `BLANK`, `TOKEN`, `IMMEDIATE_TOKEN`, `ALIAS`, `PREC*` are
57//!   handled transparently (the inner content is emitted; the
58//!   wrapper is dropped).
59//!
60//! Whitespace and indentation come from a `FormatPolicy` applied
61//! during emission. The default policy inserts a single space between
62//! adjacent tokens, a newline after `;` / `}` / `{`, and tracks an
63//! indent counter on `{` / `}` boundaries.
64//!
65//! Output is *syntactically valid* for any grammar that ships
66//! `grammar.json`. Idiomatic formatting (rustfmt-style spacing rules,
67//! per-language conventions) is a polish layer that lives outside
68//! this module.
69
70use std::collections::BTreeMap;
71
72use panproto_schema::{Edge, Schema};
73use serde::Deserialize;
74
75use crate::error::ParseError;
76
77// ═══════════════════════════════════════════════════════════════════
78// Grammar JSON model
79// ═══════════════════════════════════════════════════════════════════
80
81/// A single tree-sitter production rule.
82///
83/// Mirrors the shape emitted by `tree-sitter generate`: every node has
84/// a `type` discriminator that selects a structural variant. The
85/// untyped subset (`PATTERN`, `STRING`, `SYMBOL`, `BLANK`) handles
86/// terminals; the structural subset (`SEQ`, `CHOICE`, `REPEAT`,
87/// `REPEAT1`, `OPTIONAL`, `FIELD`, `ALIAS`, `TOKEN`,
88/// `IMMEDIATE_TOKEN`, `PREC*`) builds composite productions.
89#[derive(Debug, Clone, Deserialize)]
90#[serde(tag = "type")]
91#[non_exhaustive]
92pub enum Production {
93    /// Concatenation of productions.
94    #[serde(rename = "SEQ")]
95    Seq {
96        /// Ordered members; each is emitted in turn.
97        members: Vec<Self>,
98    },
99    /// Alternation between productions.
100    #[serde(rename = "CHOICE")]
101    Choice {
102        /// Alternatives; the walker picks one based on the schema's
103        /// children and constraints.
104        members: Vec<Self>,
105    },
106    /// Zero-or-more repetition.
107    #[serde(rename = "REPEAT")]
108    Repeat {
109        /// The repeated body.
110        content: Box<Self>,
111    },
112    /// One-or-more repetition.
113    #[serde(rename = "REPEAT1")]
114    Repeat1 {
115        /// The repeated body.
116        content: Box<Self>,
117    },
118    /// Optional inclusion (zero or one).
119    ///
120    /// Tree-sitter usually emits `OPTIONAL` as `CHOICE { content,
121    /// BLANK }`, but recent generator versions also emit explicit
122    /// `OPTIONAL` nodes; both shapes are accepted.
123    #[serde(rename = "OPTIONAL")]
124    Optional {
125        /// The optional body.
126        content: Box<Self>,
127    },
128    /// Reference to another rule by name.
129    #[serde(rename = "SYMBOL")]
130    Symbol {
131        /// Name of the referenced rule (matches a vertex kind on the
132        /// schema side).
133        name: String,
134    },
135    /// Literal token bytes.
136    #[serde(rename = "STRING")]
137    String {
138        /// The literal token. Emitted verbatim.
139        value: String,
140    },
141    /// Regex-matched terminal.
142    ///
143    /// At parse time this matches arbitrary bytes; at emit time the
144    /// walker substitutes a `literal-value` constraint when present
145    /// and falls back to a placeholder otherwise.
146    #[serde(rename = "PATTERN")]
147    Pattern {
148        /// The original regex.
149        value: String,
150    },
151    /// The empty production. Emits nothing.
152    #[serde(rename = "BLANK")]
153    Blank,
154    /// Named field over a content production.
155    ///
156    /// The field `name` matches an edge kind on the schema side; the
157    /// walker resolves the corresponding child vertex and recurses
158    /// into `content` with that child as context.
159    #[serde(rename = "FIELD")]
160    Field {
161        /// Field name (matches edge kind).
162        name: String,
163        /// The contents of the field.
164        content: Box<Self>,
165    },
166    /// An aliased production.
167    ///
168    /// `value` records the parser-visible kind; the walker emits
169    /// `content` and ignores the alias rename.
170    #[serde(rename = "ALIAS")]
171    Alias {
172        /// The aliased content.
173        content: Box<Self>,
174        /// Whether the alias is a named node.
175        #[serde(default)]
176        named: bool,
177        /// The alias's surface name.
178        #[serde(default)]
179        value: String,
180    },
181    /// A token wrapper.
182    ///
183    /// Tree-sitter uses `TOKEN` to mark a sub-rule as a single
184    /// lexical token; the walker emits the inner content unchanged.
185    #[serde(rename = "TOKEN")]
186    Token {
187        /// The wrapped content.
188        content: Box<Self>,
189    },
190    /// An immediate-token wrapper (no preceding whitespace).
191    ///
192    /// Treated like [`Production::Token`] for emit purposes.
193    #[serde(rename = "IMMEDIATE_TOKEN")]
194    ImmediateToken {
195        /// The wrapped content.
196        content: Box<Self>,
197    },
198    /// Precedence wrapper.
199    #[serde(rename = "PREC")]
200    Prec {
201        /// Precedence value (numeric or string). Ignored at emit time.
202        #[allow(dead_code)]
203        value: serde_json::Value,
204        /// The wrapped content.
205        content: Box<Self>,
206    },
207    /// Left-associative precedence wrapper.
208    #[serde(rename = "PREC_LEFT")]
209    PrecLeft {
210        /// Precedence value. Ignored at emit time.
211        #[allow(dead_code)]
212        value: serde_json::Value,
213        /// The wrapped content.
214        content: Box<Self>,
215    },
216    /// Right-associative precedence wrapper.
217    #[serde(rename = "PREC_RIGHT")]
218    PrecRight {
219        /// Precedence value. Ignored at emit time.
220        #[allow(dead_code)]
221        value: serde_json::Value,
222        /// The wrapped content.
223        content: Box<Self>,
224    },
225    /// Dynamic precedence wrapper.
226    #[serde(rename = "PREC_DYNAMIC")]
227    PrecDynamic {
228        /// Precedence value. Ignored at emit time.
229        #[allow(dead_code)]
230        value: serde_json::Value,
231        /// The wrapped content.
232        content: Box<Self>,
233    },
234    /// Reserved-word wrapper (tree-sitter ≥ 0.25).
235    ///
236    /// Tree-sitter's `RESERVED` rule marks an inner production as a
237    /// reserved-word context: the parser excludes the listed identifiers
238    /// from being treated as the inner symbol. The `context_name`
239    /// metadata names the reserved-word set; the emitter does not need
240    /// it (we are walking schema → bytes, not enforcing reserved-word
241    /// constraints), so we emit the inner content unchanged, the same
242    /// way [`Production::Token`] and [`Production::ImmediateToken`] do.
243    #[serde(rename = "RESERVED")]
244    Reserved {
245        /// The wrapped content.
246        content: Box<Self>,
247        /// Name of the reserved-word context. Ignored at emit time.
248        #[allow(dead_code)]
249        #[serde(default)]
250        context_name: String,
251    },
252}
253
254/// A grammar's production-rule table, deserialized from `grammar.json`.
255///
256/// Only the fields the emitter consumes are decoded; precedences,
257/// conflicts, externals, and other parser-only metadata are ignored.
258#[derive(Debug, Clone, Deserialize)]
259#[non_exhaustive]
260pub struct Grammar {
261    /// Grammar name (e.g. `"rust"`, `"typescript"`).
262    #[allow(dead_code)]
263    pub name: String,
264    /// Map from rule name (a vertex kind on the schema side) to
265    /// production. Entries are kept in lexical order so iteration
266    /// is deterministic.
267    pub rules: BTreeMap<String, Production>,
268    /// Supertypes declared in the grammar's `supertypes` field. A
269    /// supertype is a rule whose body is a `CHOICE` of `SYMBOL`
270    /// references; tree-sitter parsers report a node's kind as one
271    /// of the subtypes (e.g. `identifier`, `typed_parameter`) rather
272    /// than the supertype name (`parameter`), so the emitter needs to
273    /// know that a child kind in a subtype set should match the
274    /// supertype name when a SYMBOL references it.
275    #[serde(default, deserialize_with = "deserialize_supertypes")]
276    pub supertypes: std::collections::HashSet<String>,
277    /// Tree-sitter `extras` rules: the named symbols (typically comments)
278    /// that tree-sitter skips at parse time but records as children of the
279    /// surrounding vertex. They appear nowhere in the production grammar,
280    /// so the rule walker cannot reconcile them against the cursor — the
281    /// emit pass therefore drains them as a side channel: at vertex entry
282    /// and between REPEAT iterations any leading extras-kind edges are
283    /// consumed and emitted directly. The set is populated at
284    /// `Grammar::from_bytes` by collecting every `SYMBOL { name }` and
285    /// named `ALIAS { value, named: true }` under the top-level `extras`
286    /// array. Pattern-only extras (e.g. `\s` whitespace) are not vertex
287    /// kinds and are excluded.
288    #[serde(default, deserialize_with = "deserialize_extras")]
289    pub extras: std::collections::HashSet<String>,
290    /// Precomputed subtyping closure: `subtypes[symbol_name]` is the
291    /// set of vertex kinds that satisfy a SYMBOL `symbol_name`
292    /// reference on the schema side.
293    ///
294    /// Built once at [`Grammar::from_bytes`] time by walking each
295    /// hidden rule (`_`-prefixed), declared supertype, and named
296    /// `ALIAS { value: K, ... }` production to its leaf SYMBOLs and
297    /// recording the closure. This replaces the prior heuristic
298    /// `kind_satisfies_symbol` that walked the rule body on every
299    /// query: lookups are now O(1) and the relation is exactly the
300    /// transitive closure of "is reachable via hidden / supertype /
301    /// alias dispatch", with no over-expansion through non-hidden
302    /// non-supertype rule references.
303    #[serde(skip)]
304    pub subtypes: std::collections::HashMap<String, std::collections::HashSet<String>>,
305    /// Precomputed Yield sets: `yield_sets[rule_name]` is the set of
306    /// concrete vertex kinds that can appear as the **first named
307    /// child** when that rule's production is taken.
308    ///
309    /// Defined inductively:
310    /// - `Yield(SYMBOL S)` where S is hidden/supertype = `Yield(rules[S])`
311    /// - `Yield(SYMBOL S)` where S is concrete = `{S}`
312    /// - `Yield(SEQ [M1, ...])` = `Yield(M1)` (only first member)
313    /// - `Yield(CHOICE [M1, ..., Mn])` = `⋃ Yield(Mi)`
314    /// - `Yield(OPTIONAL { c })` = `Yield(c) ∪ {ε}`
315    /// - `Yield(BLANK)` = `{ε}`
316    /// - Wrappers (PREC*, TOKEN, FIELD, REPEAT, etc.) = `Yield(content)`
317    /// - `Yield(STRING)` = `Yield(PATTERN)` = `∅`
318    /// - `Yield(ALIAS { value: V, named: true })` = `{V}`
319    ///
320    /// Epsilon is represented as the empty string `""`.
321    #[serde(skip)]
322    pub yield_sets: std::collections::HashMap<String, std::collections::HashSet<String>>,
323}
324
325fn deserialize_supertypes<'de, D>(
326    deserializer: D,
327) -> Result<std::collections::HashSet<String>, D::Error>
328where
329    D: serde::Deserializer<'de>,
330{
331    let entries: Vec<serde_json::Value> = Vec::deserialize(deserializer)?;
332    let mut out = std::collections::HashSet::new();
333    for entry in entries {
334        match entry {
335            serde_json::Value::String(s) => {
336                out.insert(s);
337            }
338            serde_json::Value::Object(map) => {
339                if let Some(serde_json::Value::String(name)) = map.get("name") {
340                    out.insert(name.clone());
341                }
342            }
343            _ => {}
344        }
345    }
346    Ok(out)
347}
348
349fn deserialize_extras<'de, D>(
350    deserializer: D,
351) -> Result<std::collections::HashSet<String>, D::Error>
352where
353    D: serde::Deserializer<'de>,
354{
355    let entries: Vec<serde_json::Value> = Vec::deserialize(deserializer)?;
356    let mut out = std::collections::HashSet::new();
357    for entry in entries {
358        if let serde_json::Value::Object(map) = entry {
359            let ty = map.get("type").and_then(serde_json::Value::as_str);
360            match ty {
361                // SYMBOL { name: K } — the extras rule is a named symbol
362                // (typically `line_comment` / `block_comment`). The kind
363                // K appears as a real child vertex on the schema side.
364                Some("SYMBOL") => {
365                    if let Some(serde_json::Value::String(name)) = map.get("name") {
366                        out.insert(name.clone());
367                    }
368                }
369                // ALIAS { content, value: V, named: true } — the extras
370                // rule renames its content; V is the kind on the schema.
371                Some("ALIAS") => {
372                    let named = map
373                        .get("named")
374                        .and_then(serde_json::Value::as_bool)
375                        .unwrap_or(false);
376                    if named {
377                        if let Some(serde_json::Value::String(value)) = map.get("value") {
378                            out.insert(value.clone());
379                        }
380                    }
381                }
382                // PATTERN / STRING / TOKEN entries describe inter-token
383                // whitespace and have no vertex-side representation.
384                _ => {}
385            }
386        }
387    }
388    Ok(out)
389}
390
391impl Grammar {
392    /// Parse a grammar's `grammar.json` bytes.
393    ///
394    /// Builds the subtyping closure as part of construction so every
395    /// downstream lookup is O(1). The closure is the least relation
396    /// containing `(K, K)` for every rule key `K` and closed under:
397    ///
398    /// - hidden-rule expansion: if `S` is hidden and a SYMBOL `S` may
399    ///   reach SYMBOL `K`, then `K ⊑ S`.
400    /// - supertype expansion: if `S` is in the grammar's supertypes
401    ///   block and `K` is one of `S`'s alternatives, then `K ⊑ S`.
402    /// - alias renaming: if a rule body contains
403    ///   `ALIAS { content: SYMBOL R, value: A, named: true }` where
404    ///   `R` reaches kind `K` (or `K = R` when no further hop), then
405    ///   `A ⊑ R` and `K ⊑ A`.
406    ///
407    /// # Errors
408    ///
409    /// Returns [`ParseError::EmitFailed`] when the bytes are not a
410    /// valid `grammar.json` document.
411    pub fn from_bytes(protocol: &str, bytes: &[u8]) -> Result<Self, ParseError> {
412        let mut grammar: Self =
413            serde_json::from_slice(bytes).map_err(|e| ParseError::EmitFailed {
414                protocol: protocol.to_owned(),
415                reason: format!("grammar.json deserialization failed: {e}"),
416            })?;
417        grammar.subtypes = compute_subtype_closure(&grammar);
418        grammar.yield_sets = compute_yield_sets(&grammar);
419        Ok(grammar)
420    }
421}
422
423/// Compute the subtyping relation as a forward-indexed map from a
424/// SYMBOL name to the set of vertex kinds that satisfy that SYMBOL.
425fn compute_subtype_closure(
426    grammar: &Grammar,
427) -> std::collections::HashMap<String, std::collections::HashSet<String>> {
428    use std::collections::{HashMap, HashSet};
429    // Edges of the "kind X satisfies SYMBOL Y" relation. `K ⊑ Y` is
430    // recorded whenever Y is reached by walking the grammar's
431    // ALIAS / hidden-rule / supertype dispatch from a position where
432    // K is the actual vertex kind.
433    let mut subtypes: HashMap<String, HashSet<String>> = HashMap::new();
434    for name in grammar.rules.keys() {
435        subtypes
436            .entry(name.clone())
437            .or_default()
438            .insert(name.clone());
439    }
440
441    // First pass: collect the immediate "satisfies" edges from each
442    // expandable rule (hidden, supertype) to the kinds reachable by
443    // walking its body, plus alias edges.
444    fn walk<'g>(
445        grammar: &'g Grammar,
446        production: &'g Production,
447        visited: &mut HashSet<&'g str>,
448        out: &mut HashSet<String>,
449    ) {
450        match production {
451            Production::Symbol { name } => {
452                // Direct subtype.
453                out.insert(name.clone());
454                // Continue expansion through hidden / supertype rules
455                // so the closure traverses pass-through dispatch.
456                let expand = name.starts_with('_') || grammar.supertypes.contains(name.as_str());
457                if expand && visited.insert(name.as_str()) {
458                    if let Some(rule) = grammar.rules.get(name) {
459                        walk(grammar, rule, visited, out);
460                    }
461                }
462            }
463            Production::Choice { members } | Production::Seq { members } => {
464                for m in members {
465                    walk(grammar, m, visited, out);
466                }
467            }
468            Production::Alias {
469                content,
470                named,
471                value,
472            } => {
473                if *named && !value.is_empty() {
474                    out.insert(value.clone());
475                }
476                walk(grammar, content, visited, out);
477            }
478            Production::Repeat { content }
479            | Production::Repeat1 { content }
480            | Production::Optional { content }
481            | Production::Field { content, .. }
482            | Production::Token { content }
483            | Production::ImmediateToken { content }
484            | Production::Prec { content, .. }
485            | Production::PrecLeft { content, .. }
486            | Production::PrecRight { content, .. }
487            | Production::PrecDynamic { content, .. }
488            | Production::Reserved { content, .. } => {
489                walk(grammar, content, visited, out);
490            }
491            _ => {}
492        }
493    }
494
495    for (name, rule) in &grammar.rules {
496        let expand = name.starts_with('_') || grammar.supertypes.contains(name.as_str());
497        if !expand {
498            continue;
499        }
500        let mut visited: HashSet<&str> = HashSet::new();
501        visited.insert(name.as_str());
502        let mut reachable: HashSet<String> = HashSet::new();
503        walk(grammar, rule, &mut visited, &mut reachable);
504        for kind in &reachable {
505            subtypes
506                .entry(kind.clone())
507                .or_default()
508                .insert(name.clone());
509        }
510    }
511
512    // Aliases: scan every rule body for ALIAS { content, value }
513    // declarations. The kinds reachable from `content` satisfy
514    // `value`, AND (by construction) `value` satisfies the
515    // surrounding rule. Walking the ENTIRE grammar once captures
516    // every alias site, irrespective of which rule introduces it.
517    fn collect_aliases<'g>(production: &'g Production, out: &mut Vec<(String, &'g Production)>) {
518        match production {
519            Production::Alias {
520                content,
521                named,
522                value,
523            } => {
524                if *named && !value.is_empty() {
525                    out.push((value.clone(), content.as_ref()));
526                }
527                collect_aliases(content, out);
528            }
529            Production::Choice { members } | Production::Seq { members } => {
530                for m in members {
531                    collect_aliases(m, out);
532                }
533            }
534            Production::Repeat { content }
535            | Production::Repeat1 { content }
536            | Production::Optional { content }
537            | Production::Field { content, .. }
538            | Production::Token { content }
539            | Production::ImmediateToken { content }
540            | Production::Prec { content, .. }
541            | Production::PrecLeft { content, .. }
542            | Production::PrecRight { content, .. }
543            | Production::PrecDynamic { content, .. }
544            | Production::Reserved { content, .. } => {
545                collect_aliases(content, out);
546            }
547            _ => {}
548        }
549    }
550    let mut aliases: Vec<(String, &Production)> = Vec::new();
551    for rule in grammar.rules.values() {
552        collect_aliases(rule, &mut aliases);
553    }
554    for (alias_value, content) in aliases {
555        let mut visited: HashSet<&str> = HashSet::new();
556        let mut reachable: HashSet<String> = HashSet::new();
557        walk(grammar, content, &mut visited, &mut reachable);
558        // Aliased value satisfies itself and is satisfied by every
559        // kind its content can reach.
560        subtypes
561            .entry(alias_value.clone())
562            .or_default()
563            .insert(alias_value.clone());
564        for kind in reachable {
565            subtypes
566                .entry(kind)
567                .or_default()
568                .insert(alias_value.clone());
569        }
570    }
571
572    // Transitive close: `K ⊑ A` and `A ⊑ B` implies `K ⊑ B`. Iterate
573    // a few rounds; the relation is small so a quick fixed-point
574    // suffices in practice.
575    for _ in 0..8 {
576        let snapshot = subtypes.clone();
577        let mut changed = false;
578        for (kind, supers) in &snapshot {
579            let extra: HashSet<String> = supers
580                .iter()
581                .flat_map(|s| snapshot.get(s).cloned().unwrap_or_default())
582                .collect();
583            let entry = subtypes.entry(kind.clone()).or_default();
584            for s in extra {
585                if entry.insert(s) {
586                    changed = true;
587                }
588            }
589        }
590        if !changed {
591            break;
592        }
593    }
594
595    subtypes
596}
597
598/// Compute the Yield set for every rule in the grammar.
599///
600/// `Yield(P)` is the set of concrete vertex kinds that can appear as
601/// the first named child when production P is taken. See the
602/// `Grammar::yield_sets` doc comment for the inductive definition.
603fn compute_yield_sets(
604    grammar: &Grammar,
605) -> std::collections::HashMap<String, std::collections::HashSet<String>> {
606    let mut cache: std::collections::HashMap<String, std::collections::HashSet<String>> =
607        std::collections::HashMap::new();
608    for (name, rule) in &grammar.rules {
609        if cache.contains_key(name) {
610            continue;
611        }
612        let mut visited = std::collections::HashSet::new();
613        let ys = yield_of_production(grammar, rule, &mut visited, &mut cache);
614        cache.insert(name.clone(), ys);
615    }
616    cache
617}
618
619/// Compute the Yield set of an arbitrary production node.
620///
621/// Uses `cache` (the partially-built `yield_sets` map) as
622/// memoization. `visited` tracks the current recursion path to
623/// detect cycles through hidden/supertype rules; a cycle returns ∅
624/// (a cycle that never passes through a concrete named symbol
625/// cannot produce a first child).
626fn yield_of_production(
627    grammar: &Grammar,
628    production: &Production,
629    visited: &mut std::collections::HashSet<String>,
630    cache: &mut std::collections::HashMap<String, std::collections::HashSet<String>>,
631) -> std::collections::HashSet<String> {
632    match production {
633        Production::Symbol { name } => {
634            if let Some(cached) = cache.get(name) {
635                return cached.clone();
636            }
637            let expand = name.starts_with('_') || grammar.supertypes.contains(name.as_str());
638            if expand {
639                if !visited.insert(name.clone()) {
640                    return std::collections::HashSet::new();
641                }
642                let result = if let Some(rule) = grammar.rules.get(name) {
643                    yield_of_production(grammar, rule, visited, cache)
644                } else {
645                    std::collections::HashSet::new()
646                };
647                visited.remove(name);
648                cache.insert(name.clone(), result.clone());
649                result
650            } else {
651                let mut set = std::collections::HashSet::new();
652                set.insert(name.clone());
653                set
654            }
655        }
656        Production::Alias {
657            content,
658            named,
659            value,
660        } => {
661            if *named && !value.is_empty() {
662                let mut set = std::collections::HashSet::new();
663                set.insert(value.clone());
664                set
665            } else {
666                yield_of_production(grammar, content, visited, cache)
667            }
668        }
669        Production::Seq { members } => {
670            if members.is_empty() {
671                let mut set = std::collections::HashSet::new();
672                set.insert(String::new());
673                set
674            } else {
675                // Walk the SEQ members left-to-right, returning the
676                // Yield of the first member that can produce a named
677                // child. STRING and PATTERN yield ∅ (anonymous
678                // tokens); skip them to reach the first named-child-
679                // producing position.  This handles hidden rules like
680                // `_initializer = SEQ ["=", FIELD { value, ... }]`
681                // where the leading "=" is a STRING.
682                for m in members {
683                    let ys = yield_of_production(grammar, m, visited, cache);
684                    if !ys.is_empty() {
685                        return ys;
686                    }
687                }
688                std::collections::HashSet::new()
689            }
690        }
691        Production::Choice { members } => {
692            let mut union = std::collections::HashSet::new();
693            for m in members {
694                union.extend(yield_of_production(grammar, m, visited, cache));
695            }
696            union
697        }
698        Production::Optional { content } => {
699            let mut set = yield_of_production(grammar, content, visited, cache);
700            set.insert(String::new());
701            set
702        }
703        Production::Blank => {
704            let mut set = std::collections::HashSet::new();
705            set.insert(String::new());
706            set
707        }
708        Production::String { .. } | Production::Pattern { .. } => std::collections::HashSet::new(),
709        Production::Repeat { content }
710        | Production::Repeat1 { content }
711        | Production::Field { content, .. }
712        | Production::Token { content }
713        | Production::ImmediateToken { content }
714        | Production::Prec { content, .. }
715        | Production::PrecLeft { content, .. }
716        | Production::PrecRight { content, .. }
717        | Production::PrecDynamic { content, .. }
718        | Production::Reserved { content, .. } => {
719            yield_of_production(grammar, content, visited, cache)
720        }
721    }
722}
723
724// ═══════════════════════════════════════════════════════════════════
725// Format policy
726// ═══════════════════════════════════════════════════════════════════
727
728/// Whitespace and indentation policy applied during emission.
729///
730/// The default policy inserts a single space between adjacent tokens,
731/// a newline after `;` / `}` / `{`, and tracks indent on `{` / `}`
732/// boundaries. Per-language overrides (idiomatic indent width,
733/// trailing-comma rules, blank-line conventions) can ride alongside
734/// this struct in a follow-up branch; today's defaults aim only for
735/// syntactic validity.
736#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
737pub struct FormatPolicy {
738    /// Number of spaces per indent level.
739    pub indent_width: usize,
740    /// Separator inserted between adjacent terminals that the lexer
741    /// would otherwise glue together (word ↔ word, operator ↔ operator).
742    /// Default is a single space.
743    pub separator: String,
744    /// Newline byte sequence emitted after `line_break_after` tokens
745    /// and at end-of-output. Default is `"\n"`.
746    pub newline: String,
747    /// Tokens after which the walker breaks to a new line.
748    pub line_break_after: Vec<String>,
749    /// Tokens that increase indent on emission.
750    pub indent_open: Vec<String>,
751    /// Tokens that decrease indent on emission.
752    pub indent_close: Vec<String>,
753}
754
755impl Default for FormatPolicy {
756    fn default() -> Self {
757        Self {
758            indent_width: 2,
759            separator: " ".to_owned(),
760            newline: "\n".to_owned(),
761            line_break_after: vec![";".into(), "{".into(), "}".into()],
762            indent_open: vec!["{".into()],
763            indent_close: vec!["}".into()],
764        }
765    }
766}
767
768// ═══════════════════════════════════════════════════════════════════
769// Emitter
770// ═══════════════════════════════════════════════════════════════════
771
772/// Emit a by-construction schema to source bytes.
773///
774/// `protocol` is the grammar / language name (used in error messages
775/// and to label the entry point).
776///
777/// The walker treats `schema.entries` as the ordered list of root
778/// vertices, falling back to a deterministic by-id ordering when
779/// `entries` is empty. Each root is emitted using the production
780/// associated with its kind in `grammar.rules`.
781///
782/// # Errors
783///
784/// Returns [`ParseError::EmitFailed`] when:
785///
786/// - the schema has no vertices
787/// - a root vertex's kind is not a grammar rule
788/// - a `SYMBOL` reference points at a kind with no rule and no schema
789///   child to resolve it to
790/// - a required `FIELD` has no corresponding edge in the schema
791pub fn emit_pretty(
792    protocol: &str,
793    schema: &Schema,
794    grammar: &Grammar,
795    policy: &FormatPolicy,
796) -> Result<Vec<u8>, ParseError> {
797    let roots = collect_roots(schema);
798    if roots.is_empty() {
799        return Err(ParseError::EmitFailed {
800            protocol: protocol.to_owned(),
801            reason: "schema has no entry vertices".to_owned(),
802        });
803    }
804
805    let mut out = Output::new(policy);
806    for (i, root) in roots.iter().enumerate() {
807        if i > 0 {
808            out.newline();
809        }
810        emit_vertex(protocol, schema, grammar, root, &mut out)?;
811    }
812    Ok(out.finish())
813}
814
815fn collect_roots(schema: &Schema) -> Vec<&panproto_gat::Name> {
816    if !schema.entries.is_empty() {
817        return schema
818            .entries
819            .iter()
820            .filter(|name| schema.vertices.contains_key(*name))
821            .collect();
822    }
823
824    // Fallback: every vertex that is not the target of any structural edge
825    // (sorted by id for determinism).
826    let mut targets: std::collections::HashSet<&panproto_gat::Name> =
827        std::collections::HashSet::new();
828    for edge in schema.edges.keys() {
829        targets.insert(&edge.tgt);
830    }
831    let mut roots: Vec<&panproto_gat::Name> = schema
832        .vertices
833        .keys()
834        .filter(|name| !targets.contains(name))
835        .collect();
836    roots.sort();
837    roots
838}
839
840fn emit_vertex(
841    protocol: &str,
842    schema: &Schema,
843    grammar: &Grammar,
844    vertex_id: &panproto_gat::Name,
845    out: &mut Output<'_>,
846) -> Result<(), ParseError> {
847    let vertex = schema
848        .vertices
849        .get(vertex_id)
850        .ok_or_else(|| ParseError::EmitFailed {
851            protocol: protocol.to_owned(),
852            reason: format!("vertex '{vertex_id}' not found"),
853        })?;
854
855    // Leaf shortcut: a vertex carrying a `literal-value` constraint
856    // and no outgoing structural edges is a terminal token. Emit the
857    // captured value directly. This handles identifiers, numeric
858    // literals, and string literals that the parser stored as
859    // `literal-value` even on by-construction schemas.
860    if let Some(literal) = literal_value(schema, vertex_id) {
861        if children_for(schema, vertex_id).is_empty() {
862            out.token(literal);
863            return Ok(());
864        }
865    }
866
867    let kind = vertex.kind.as_ref();
868    let edges = children_for(schema, vertex_id);
869    if let Some(rule) = grammar.rules.get(kind) {
870        let mut cursor = ChildCursor::new(&edges);
871        emit_production(protocol, schema, grammar, vertex_id, rule, &mut cursor, out)?;
872        // Drain any extras left after the rule walk completed; tree-sitter
873        // may record trailing comments as children of the surrounding
874        // vertex (i.e. after the last structural child the rule matched).
875        drain_extras(protocol, schema, grammar, &mut cursor, out)?;
876        // Emit remaining unconsumed structural children that the grammar
877        // rule did not match. This covers cases where tree-sitter's parser
878        // produces child kinds not reachable from grammar.json's production
879        // rules (e.g. Julia's macrocall_expression can have an
880        // `argument_list` child even though the grammar only references
881        // `macro_argument_list`).
882        for (i, edge) in cursor.edges.iter().enumerate() {
883            if !cursor.consumed[i] {
884                let child_kind = schema.vertices.get(&edge.tgt).map(|v| v.kind.as_ref());
885                if child_kind.is_some_and(|k| !grammar.extras.contains(k)) {
886                    emit_vertex(protocol, schema, grammar, &edge.tgt, out)?;
887                }
888            }
889        }
890        return Ok(());
891    }
892
893    // No rule for this kind. The parser produced it via an ALIAS
894    // (tree-sitter's `alias($.something, $.actual_kind)`) or via an
895    // external scanner (e.g. YAML's `document` root). Fall back to
896    // walking the children directly so the inner content survives;
897    // surrounding tokens — whose only source is the missing rule —
898    // are necessarily absent.
899    for edge in &edges {
900        emit_vertex(protocol, schema, grammar, &edge.tgt, out)?;
901    }
902    Ok(())
903}
904
905/// Linear cursor over a vertex's outgoing edges, used to thread
906/// children through a production rule without double-consuming them.
907struct ChildCursor<'a> {
908    edges: &'a [&'a Edge],
909    consumed: Vec<bool>,
910}
911
912impl<'a> ChildCursor<'a> {
913    fn new(edges: &'a [&'a Edge]) -> Self {
914        Self {
915            edges,
916            consumed: vec![false; edges.len()],
917        }
918    }
919
920    /// Take the next unconsumed edge whose kind equals `field_name`.
921    fn take_field(&mut self, field_name: &str) -> Option<&'a Edge> {
922        for (i, edge) in self.edges.iter().enumerate() {
923            if !self.consumed[i] && edge.kind.as_ref() == field_name {
924                self.consumed[i] = true;
925                return Some(edge);
926            }
927        }
928        None
929    }
930
931    /// Whether any unconsumed edge satisfies `predicate`. Used by the
932    /// unit tests; the live emit path went through `has_matching` on
933    /// each alternative until cursor-driven dispatch was rewritten to
934    /// pick the first-unconsumed-edge's kind directly.
935    #[cfg(test)]
936    fn has_matching(&self, predicate: impl Fn(&Edge) -> bool) -> bool {
937        self.edges
938            .iter()
939            .enumerate()
940            .any(|(i, edge)| !self.consumed[i] && predicate(edge))
941    }
942
943    /// Take the next unconsumed edge whose target vertex satisfies
944    /// `predicate`. Returns the edge and the underlying production
945    /// resolution path is the caller's job.
946    fn take_matching(&mut self, predicate: impl Fn(&Edge) -> bool) -> Option<&'a Edge> {
947        for (i, edge) in self.edges.iter().enumerate() {
948            if !self.consumed[i] && predicate(edge) {
949                self.consumed[i] = true;
950                return Some(edge);
951            }
952        }
953        None
954    }
955}
956
957thread_local! {
958    static EMIT_DEPTH: std::cell::Cell<usize> = const { std::cell::Cell::new(0) };
959    /// Set of `(vertex_id, rule_name)` pairs that are currently being
960    /// walked by the recursion. A SYMBOL that resolves to a rule
961    /// already on this stack closes a μ-binder cycle: in the
962    /// coinductive reading, the rule walk at any vertex is the least
963    /// fixed point of `body[μ X . body / X]`, which unfolds at most
964    /// once, with the second visit returning the empty sequence (the
965    /// unit of the free token monoid). Examples that trigger this:
966    /// YAML's `stream` ⊃ `_b_blk_*` mutually-recursive chain, Rust's
967    /// `_expression` ⊃ `binary_expression` ⊃ `_expression`.
968    static EMIT_MU_FRAMES: std::cell::RefCell<std::collections::HashSet<(String, String)>> =
969        std::cell::RefCell::new(std::collections::HashSet::new());
970    /// The name of the FIELD whose body the walker is currently inside,
971    /// or `None` at top level. Lets a SYMBOL nested arbitrarily deep
972    /// in the field's content (under SEQ, CHOICE, REPEAT, OPTIONAL)
973    /// consume from the *outer* cursor by edge-kind rather than from
974    /// the child's own cursor by symbol-match. Without this, shapes
975    /// like `field('args', commaSep1($.X))` — which expands to
976    /// `FIELD(SEQ(SYMBOL X, REPEAT(SEQ(',', SYMBOL X))))` — emit only
977    /// the first matched edge: the FIELD handler consumed one edge,
978    /// the inner REPEAT searched the consumed child's cursor (which
979    /// has no more sibling field edges), and the REPEAT broke after
980    /// one iteration. Setting the context here so the inner SYMBOL
981    /// pulls successive field-named edges from the outer cursor
982    /// recovers every matched edge across arbitrary nesting.
983    static EMIT_FIELD_CONTEXT: std::cell::RefCell<Option<String>> =
984        const { std::cell::RefCell::new(None) };
985}
986
987/// RAII guard that restores the prior `EMIT_FIELD_CONTEXT` value on drop.
988struct FieldContextGuard(Option<String>);
989
990impl Drop for FieldContextGuard {
991    fn drop(&mut self) {
992        EMIT_FIELD_CONTEXT.with(|f| *f.borrow_mut() = self.0.take());
993    }
994}
995
996fn push_field_context(name: &str) -> FieldContextGuard {
997    let prev = EMIT_FIELD_CONTEXT.with(|f| f.borrow_mut().replace(name.to_owned()));
998    FieldContextGuard(prev)
999}
1000
1001/// Clear the field context for the duration of a child-context walk.
1002/// The child's own production has its own FIELDs that set their own
1003/// context; the outer field hint must not leak into them.
1004fn clear_field_context() -> FieldContextGuard {
1005    let prev = EMIT_FIELD_CONTEXT.with(|f| f.borrow_mut().take());
1006    FieldContextGuard(prev)
1007}
1008
1009fn current_field_context() -> Option<String> {
1010    EMIT_FIELD_CONTEXT.with(|f| f.borrow().clone())
1011}
1012
1013/// Walk a rule at a vertex inside a μ-binder. The wrapping frame is
1014/// pushed before recursion and popped after, so any SYMBOL inside
1015/// `rule` that re-enters the same `(vertex_id, rule_name)` pair
1016/// returns the empty sequence (μ X . body unfolds once).
1017fn walk_in_mu_frame(
1018    protocol: &str,
1019    schema: &Schema,
1020    grammar: &Grammar,
1021    vertex_id: &panproto_gat::Name,
1022    rule_name: &str,
1023    rule: &Production,
1024    cursor: &mut ChildCursor<'_>,
1025    out: &mut Output<'_>,
1026) -> Result<(), ParseError> {
1027    let key = (vertex_id.to_string(), rule_name.to_owned());
1028    let inserted = EMIT_MU_FRAMES.with(|frames| frames.borrow_mut().insert(key.clone()));
1029    if !inserted {
1030        // We are already walking this rule at this vertex deeper in
1031        // the call stack. The coinductive μ-fixed-point reading
1032        // returns the empty sequence here; the surrounding
1033        // production resumes after the SYMBOL.
1034        return Ok(());
1035    }
1036    let result = emit_production(protocol, schema, grammar, vertex_id, rule, cursor, out);
1037    EMIT_MU_FRAMES.with(|frames| {
1038        frames.borrow_mut().remove(&key);
1039    });
1040    result
1041}
1042
1043fn emit_production(
1044    protocol: &str,
1045    schema: &Schema,
1046    grammar: &Grammar,
1047    vertex_id: &panproto_gat::Name,
1048    production: &Production,
1049    cursor: &mut ChildCursor<'_>,
1050    out: &mut Output<'_>,
1051) -> Result<(), ParseError> {
1052    let depth = EMIT_DEPTH.with(|d| {
1053        let v = d.get() + 1;
1054        d.set(v);
1055        v
1056    });
1057    if depth > 500 {
1058        EMIT_DEPTH.with(|d| d.set(d.get() - 1));
1059        return Err(ParseError::EmitFailed {
1060            protocol: protocol.to_owned(),
1061            reason: format!(
1062                "emit_production recursion >500 (likely a cyclic grammar; \
1063                     vertex='{vertex_id}')"
1064            ),
1065        });
1066    }
1067    drain_extras(protocol, schema, grammar, cursor, out)?;
1068    let result = emit_production_inner(
1069        protocol, schema, grammar, vertex_id, production, cursor, out,
1070    );
1071    EMIT_DEPTH.with(|d| d.set(d.get() - 1));
1072    result
1073}
1074
1075/// Consume and emit every leading edge on `cursor` whose target kind
1076/// is in `grammar.extras` (typically `line_comment` / `block_comment`).
1077/// Extras live outside the production grammar — tree-sitter skips them
1078/// at parse time and records them as children of the surrounding
1079/// vertex — so the rule walker cannot reconcile them against the
1080/// cursor. Draining them as a side channel preserves their content in
1081/// the output without confusing the structural matchers.
1082fn drain_extras(
1083    protocol: &str,
1084    schema: &Schema,
1085    grammar: &Grammar,
1086    cursor: &mut ChildCursor<'_>,
1087    out: &mut Output<'_>,
1088) -> Result<(), ParseError> {
1089    if grammar.extras.is_empty() {
1090        return Ok(());
1091    }
1092    loop {
1093        let next_extra: Option<usize> = cursor
1094            .edges
1095            .iter()
1096            .enumerate()
1097            .find(|(i, _)| !cursor.consumed[*i])
1098            .and_then(|(i, edge)| {
1099                let kind = schema.vertices.get(&edge.tgt).map(|v| v.kind.as_ref())?;
1100                if grammar.extras.contains(kind) {
1101                    Some(i)
1102                } else {
1103                    None
1104                }
1105            });
1106        let Some(idx) = next_extra else {
1107            return Ok(());
1108        };
1109        cursor.consumed[idx] = true;
1110        let target = &cursor.edges[idx].tgt;
1111        emit_vertex(protocol, schema, grammar, target, out)?;
1112    }
1113}
1114
1115fn emit_production_inner(
1116    protocol: &str,
1117    schema: &Schema,
1118    grammar: &Grammar,
1119    vertex_id: &panproto_gat::Name,
1120    production: &Production,
1121    cursor: &mut ChildCursor<'_>,
1122    out: &mut Output<'_>,
1123) -> Result<(), ParseError> {
1124    match production {
1125        Production::String { value } => {
1126            out.token(value);
1127            Ok(())
1128        }
1129        Production::Pattern { value } => {
1130            if let Some(literal) = literal_value(schema, vertex_id) {
1131                out.token(literal);
1132            } else if is_newline_like_pattern(value) {
1133                // Patterns like `\r?\n`, `\n`, `\r\n` are the structural
1134                // newline tokens grammars use to separate top-level
1135                // statements (csound's `_new_line`, ABC's line-end, etc.).
1136                // Emitting them through the placeholder fallback rendered
1137                // the bare `_` sentinel between siblings; route them to
1138                // the layout pass's line-break instead so the output
1139                // re-parses.
1140                out.newline();
1141            } else if is_whitespace_only_pattern(value) {
1142                // `\s+`, `[ \t]+` and friends are interstitial whitespace
1143                // tokens. Emit nothing: the layout pass inserts the
1144                // policy separator between adjacent Lits if needed.
1145            } else {
1146                out.token(&placeholder_for_pattern(value));
1147            }
1148            Ok(())
1149        }
1150        Production::Blank => Ok(()),
1151        Production::Symbol { name } => {
1152            // Inside a FIELD body, a SYMBOL consumes by field-name on
1153            // the outer cursor rather than searching by symbol-match.
1154            // This covers the simple `FIELD(SYMBOL X)` case as well as
1155            // every nesting under FIELD that contains SYMBOLs (SEQ,
1156            // CHOICE, REPEAT, OPTIONAL, ALIAS). Without the override,
1157            // shapes like `field('args', commaSep1($.X))` consume one
1158            // field edge in the FIELD handler and then the REPEAT
1159            // inside SEQ searches the consumed child's cursor — where
1160            // no sibling field edges sit — and breaks after one
1161            // iteration.
1162            if let Some(field) = current_field_context() {
1163                if let Some(edge) = cursor.take_field(&field) {
1164                    return emit_in_child_context(
1165                        protocol, schema, grammar, &edge.tgt, production, out,
1166                    );
1167                }
1168                // No matching field-named edge left on the outer
1169                // cursor. Surface nothing; the surrounding REPEAT /
1170                // OPTIONAL / CHOICE backtracks the literal tokens it
1171                // emitted on this iteration when it sees no progress.
1172                return Ok(());
1173            }
1174            if name.starts_with('_') {
1175                // Hidden rule: not a vertex kind on the schema side.
1176                // Inline-expand the rule body so its children take
1177                // edges from the current cursor, instead of trying to
1178                // take a single child edge that "satisfies" the
1179                // hidden rule and discarding the rest of the body
1180                // (which would drop tokens like `=` and the trailing
1181                // value SYMBOL inside e.g. TOML's `_inline_pair`).
1182                //
1183                // Wrapped in a μ-frame so a hidden rule that
1184                // references its own kind cyclically (or another
1185                // hidden rule that closes the cycle) unfolds once
1186                // and then collapses to the empty sequence at the
1187                // second visit, rather than blowing the stack.
1188                if let Some(rule) = grammar.rules.get(name) {
1189                    walk_in_mu_frame(
1190                        protocol, schema, grammar, vertex_id, name, rule, cursor, out,
1191                    )
1192                } else {
1193                    // External hidden rule (declared in the
1194                    // grammar's `externals` block, scanned by C code,
1195                    // not listed in `rules`). Heuristic fallback by
1196                    // name:
1197                    //
1198                    // - `_indent` / `*_indent`: open an indent block.
1199                    //   Indent-based grammars (Python, YAML, qvr)
1200                    //   declare an `_indent` external scanner before
1201                    //   the body of a block-bodied declaration; the
1202                    //   emitted output is unparseable without the
1203                    //   corresponding indentation jump.
1204                    // - `_dedent` / `*_dedent`: close the matching
1205                    //   indent block.
1206                    // - `_newline` / `*_line_ending` / `*_or_eof`:
1207                    //   universally newline-or-empty; emitting a
1208                    //   single newline is the right default for
1209                    //   grammars like TOML whose `pair` SEQ trails
1210                    //   into `_line_ending_or_eof`.
1211                    //
1212                    // Anything else falls through silently — better
1213                    // to drop an unknown external token than to
1214                    // invent one that confuses re-parsing.
1215                    if name == "_indent" || name.ends_with("_indent") {
1216                        out.indent_open();
1217                    } else if name == "_dedent" || name.ends_with("_dedent") {
1218                        out.indent_close();
1219                    } else if name.contains("line_ending")
1220                        || name.contains("newline")
1221                        || name.ends_with("_or_eof")
1222                    {
1223                        out.newline();
1224                    } else if name.contains("semicolon") {
1225                        out.token(";");
1226                    }
1227                    Ok(())
1228                }
1229            } else if let Some(edge) = { take_symbol_match(grammar, schema, cursor, name) } {
1230                // For supertype / hidden-rule dispatch the child's
1231                // own kind names the actual production to walk
1232                // (`child.kind` IS the subtype). For ALIAS the
1233                // dependent-optic context is carried by the
1234                // surrounding `Production::Alias` branch, which calls
1235                // `emit_aliased_child` directly; we don't reach here
1236                // for that case. So walking `grammar.rules[child.kind]`
1237                // via `emit_vertex` is correct: the dependent-optic
1238                // path is preserved at every site where it actually
1239                // diverges from `child.kind`.
1240                emit_vertex(protocol, schema, grammar, &edge.tgt, out)
1241            } else if vertex_id_kind(schema, vertex_id) == Some(name.as_str()) {
1242                let rule = grammar
1243                    .rules
1244                    .get(name)
1245                    .ok_or_else(|| ParseError::EmitFailed {
1246                        protocol: protocol.to_owned(),
1247                        reason: format!("no production for SYMBOL '{name}'"),
1248                    })?;
1249                // Self-reference (`X = ... SYMBOL X ...`): wrap in a
1250                // μ-frame so re-entry collapses to the empty sequence.
1251                walk_in_mu_frame(
1252                    protocol, schema, grammar, vertex_id, name, rule, cursor, out,
1253                )
1254            } else {
1255                // Named rule with no matching child: emit nothing and
1256                // let the surrounding CHOICE / OPTIONAL / REPEAT
1257                // resolve the absence.
1258                Ok(())
1259            }
1260        }
1261        Production::Seq { members } => {
1262            for member in members {
1263                emit_production(protocol, schema, grammar, vertex_id, member, cursor, out)?;
1264            }
1265            Ok(())
1266        }
1267        Production::Choice { members } => {
1268            if let Some(matched) =
1269                pick_choice_with_cursor(schema, grammar, vertex_id, cursor, members)
1270            {
1271                emit_production(protocol, schema, grammar, vertex_id, matched, cursor, out)
1272            } else {
1273                Ok(())
1274            }
1275        }
1276        Production::Repeat { content } | Production::Repeat1 { content } => {
1277            // Detect a "separator-leading SEQ" iteration body: SEQ whose
1278            // first member is a CHOICE containing BLANK (or an OPTIONAL),
1279            // i.e. the source-level separator between two iterations is
1280            // syntactically optional. When the chosen alternative for
1281            // that separator slot emits zero content tokens at runtime,
1282            // there was no source-level separator between this iteration
1283            // and the previous one; the layout pass must suppress its
1284            // policy separator to match the source's tight adjacency.
1285            //
1286            // Categorical reading: REPEAT body `B = SEQ(SEP, BODY)` is
1287            // the pullback of two halves. The bytes emitted in iteration
1288            // k+1 are a concatenation of `SEP_k+1` and `BODY_k+1`; if
1289            // `SEP_k+1` is the empty word, the concatenation of
1290            // `BODY_k` and `BODY_k+1` must remain a single contiguous
1291            // span. Hence the NoSpace marker.
1292            let separator_leading_seq: Option<&[Production]> = match content.as_ref() {
1293                Production::Seq { members } if members.len() >= 2 => {
1294                    let first = &members[0];
1295                    let is_separator_slot = match first {
1296                        Production::Choice { members } => {
1297                            members.iter().any(|m| matches!(m, Production::Blank))
1298                        }
1299                        Production::Optional { .. } => true,
1300                        _ => false,
1301                    };
1302                    if is_separator_slot {
1303                        Some(members.as_slice())
1304                    } else {
1305                        None
1306                    }
1307                }
1308                _ => None,
1309            };
1310
1311            let mut emitted_any = false;
1312            loop {
1313                let cursor_snap = cursor.consumed.clone();
1314                let out_snap = out.snapshot();
1315                let consumed_before = cursor.consumed.iter().filter(|&&c| c).count();
1316                let result: Result<(), ParseError> =
1317                    if let Some(seq_members) = separator_leading_seq {
1318                        // Emit the separator slot first and observe
1319                        // whether it contributed any Lit. If not, push
1320                        // a NoSpace marker before walking the remaining
1321                        // SEQ members. The OutputSnapshot here covers
1322                        // only the separator's emission window.
1323                        let pre_sep = out.snapshot();
1324                        let sep_result = emit_production(
1325                            protocol,
1326                            schema,
1327                            grammar,
1328                            vertex_id,
1329                            &seq_members[0],
1330                            cursor,
1331                            out,
1332                        );
1333                        match sep_result {
1334                            Err(e) => Err(e),
1335                            Ok(()) => {
1336                                if !out.lit_emitted_since(pre_sep) {
1337                                    out.no_space();
1338                                }
1339                                let mut rest_result = Ok(());
1340                                for member in &seq_members[1..] {
1341                                    rest_result = emit_production(
1342                                        protocol, schema, grammar, vertex_id, member, cursor, out,
1343                                    );
1344                                    if rest_result.is_err() {
1345                                        break;
1346                                    }
1347                                }
1348                                rest_result
1349                            }
1350                        }
1351                    } else {
1352                        emit_production(protocol, schema, grammar, vertex_id, content, cursor, out)
1353                    };
1354                let consumed_after = cursor.consumed.iter().filter(|&&c| c).count();
1355                if result.is_err() || consumed_after == consumed_before {
1356                    cursor.consumed = cursor_snap;
1357                    out.restore(out_snap);
1358                    break;
1359                }
1360                emitted_any = true;
1361            }
1362            if matches!(production, Production::Repeat1 { .. }) && !emitted_any {
1363                emit_production(protocol, schema, grammar, vertex_id, content, cursor, out)?;
1364            }
1365            Ok(())
1366        }
1367        Production::Optional { content } => {
1368            let cursor_snap = cursor.consumed.clone();
1369            let out_snap = out.snapshot();
1370            let consumed_before = cursor.consumed.iter().filter(|&&c| c).count();
1371            let result =
1372                emit_production(protocol, schema, grammar, vertex_id, content, cursor, out);
1373            // OPTIONAL is a backtracking site: if the inner production
1374            // errored *or* made no progress without leaving a witness
1375            // constraint, restore both cursor and output to their
1376            // pre-attempt state. Mirrors `Repeat`'s loop body.
1377            if result.is_err() {
1378                cursor.consumed = cursor_snap;
1379                out.restore(out_snap);
1380                return result;
1381            }
1382            let consumed_after = cursor.consumed.iter().filter(|&&c| c).count();
1383            if consumed_after == consumed_before
1384                && !has_relevant_constraint(content, schema, vertex_id)
1385            {
1386                cursor.consumed = cursor_snap;
1387                out.restore(out_snap);
1388            }
1389            Ok(())
1390        }
1391        Production::Field { name, content } => {
1392            // Set the field context for the duration of `content`'s
1393            // walk and emit the content against the *outer* cursor.
1394            // The SYMBOL handler picks up the context and pulls
1395            // successive `take_field(name)` edges as it encounters
1396            // SYMBOLs anywhere under `content` (under SEQ, CHOICE,
1397            // REPEAT, OPTIONAL, ALIAS — arbitrarily nested). This
1398            // subsumes the prior carve-outs for FIELD(REPEAT(...)),
1399            // FIELD(REPEAT1(...)), and the bare FIELD(SYMBOL ...)
1400            // case, and adds coverage for
1401            // `field('xs', commaSep1($.X))` which expands to
1402            // FIELD(SEQ(SYMBOL X, REPEAT(SEQ(',', SYMBOL X)))) and
1403            // any other shape where REPEAT/REPEAT1 sits inside SEQ /
1404            // CHOICE / OPTIONAL under a FIELD. A FIELD that wraps a
1405            // non-SYMBOL production (e.g. `field('op', '+')` or
1406            // `field('op', CHOICE(STRING ...))`) still works: STRING
1407            // handlers ignore the context and emit literals
1408            // directly, so the operator token survives the round
1409            // trip.
1410            let _guard = push_field_context(name);
1411            emit_production(protocol, schema, grammar, vertex_id, content, cursor, out)
1412        }
1413        Production::Alias {
1414            content,
1415            named,
1416            value,
1417        } => {
1418            // A named ALIAS rewrites the parser-visible kind to
1419            // `value`. If the cursor has an unconsumed child whose
1420            // kind matches that alias name, take it and emit the
1421            // child using the alias's INNER content as the rule
1422            // (e.g. `ALIAS { SYMBOL real_rule, value: "kind_x" }`
1423            // means a `kind_x` vertex on the schema should be walked
1424            // through `real_rule`'s body, not through whatever rule
1425            // happens to be keyed under `kind_x`). This is the
1426            // dependent-optic shape: the rule the emitter walks at a
1427            // child position is determined by the parent's chosen
1428            // alias, not by the child kind alone — without it,
1429            // grammars like YAML that introduce the same kind through
1430            // many ALIAS sites lose the parent context the moment
1431            // emit_vertex is called.
1432            if *named && !value.is_empty() {
1433                if let Some(edge) = cursor.take_matching(|edge| {
1434                    schema
1435                        .vertices
1436                        .get(&edge.tgt)
1437                        .map(|v| v.kind.as_ref() == value.as_str())
1438                        .unwrap_or(false)
1439                }) {
1440                    return emit_aliased_child(protocol, schema, grammar, &edge.tgt, content, out);
1441                }
1442            }
1443            emit_production(protocol, schema, grammar, vertex_id, content, cursor, out)
1444        }
1445        Production::Token { content }
1446        | Production::ImmediateToken { content }
1447        | Production::Prec { content, .. }
1448        | Production::PrecLeft { content, .. }
1449        | Production::PrecRight { content, .. }
1450        | Production::PrecDynamic { content, .. }
1451        | Production::Reserved { content, .. } => {
1452            emit_production(protocol, schema, grammar, vertex_id, content, cursor, out)
1453        }
1454    }
1455}
1456
1457/// Take the next cursor edge whose target vertex's kind matches the
1458/// SYMBOL `name` directly or via inline expansion of a hidden rule.
1459fn take_symbol_match<'a>(
1460    grammar: &Grammar,
1461    schema: &Schema,
1462    cursor: &mut ChildCursor<'a>,
1463    name: &str,
1464) -> Option<&'a Edge> {
1465    // Prefer non-field edges (`child_of`) to avoid consuming a
1466    // field-named edge that a later FIELD handler should claim.
1467    // Field-named edges (edge.kind != "child_of") are reserved for
1468    // the FIELD production that names them; consuming one here would
1469    // steal it from its intended handler (e.g. `as_pattern`'s
1470    // `alias` field edge consumed by the leading `expression`
1471    // SYMBOL instead of the trailing FIELD "alias" handler).
1472    if let Some(edge) = cursor.take_matching(|edge| {
1473        edge.kind.as_ref() == "child_of" && {
1474            let target_kind = schema.vertices.get(&edge.tgt).map(|v| v.kind.as_ref());
1475            kind_satisfies_symbol(grammar, target_kind, name)
1476        }
1477    }) {
1478        return Some(edge);
1479    }
1480    cursor.take_matching(|edge| {
1481        let target_kind = schema.vertices.get(&edge.tgt).map(|v| v.kind.as_ref());
1482        kind_satisfies_symbol(grammar, target_kind, name)
1483    })
1484}
1485
1486/// Decide whether a schema vertex of kind `target_kind` satisfies a
1487/// SYMBOL `name` reference in the grammar.
1488///
1489/// Operates as an O(1) lookup against the precomputed subtype
1490/// closure built at [`Grammar::from_bytes`]. The semantic content is
1491/// "K satisfies SYMBOL S iff K is reachable from S by walking the
1492/// grammar's hidden, supertype, and named-alias dispatch": this is
1493/// exactly the relation tree-sitter induces on `(parser-visible kind,
1494/// rule-position)` pairs.
1495fn kind_satisfies_symbol(grammar: &Grammar, target_kind: Option<&str>, name: &str) -> bool {
1496    let Some(target) = target_kind else {
1497        return false;
1498    };
1499    if target == name {
1500        return true;
1501    }
1502    grammar
1503        .subtypes
1504        .get(target)
1505        .is_some_and(|set| set.contains(name))
1506}
1507
1508/// Emit a child reached through an ALIAS production using the
1509/// alias's inner content as the rule, not `grammar.rules[child.kind]`.
1510///
1511/// This carries the dependent-optic context across the ALIAS edge:
1512/// at the parent rule's site we know which underlying production the
1513/// alias wraps (typically `SYMBOL real_rule`), and that's the
1514/// production that should drive the emit walk on the child's
1515/// children. Looking up `grammar.rules.get(child.kind)` instead would
1516/// either fail (the renamed kind has no top-level rule, e.g. YAML's
1517/// `block_mapping_pair`) or pick an arbitrary same-kinded rule from
1518/// elsewhere in the grammar.
1519///
1520/// Walk-context invariant. The dependent-optic shape of `emit_pretty`
1521/// says: the production walked at any vertex is determined by the
1522/// path from the root through the grammar, not by the vertex kind in
1523/// isolation. Two dispatch sites realise that invariant:
1524///
1525/// * [`emit_vertex`] looks up `grammar.rules[child.kind]` and walks
1526///   it. Correct for supertype / hidden-rule dispatch: the child's
1527///   kind on the schema IS the subtype tree-sitter selected, so its
1528///   top-level rule is the right production to walk.
1529/// * `emit_aliased_child` threads the parent rule's `Production`
1530///   directly (the inner `content` of `Production::Alias`) and walks
1531///   it on the child's children. Correct for ALIAS dispatch: the
1532///   child's kind on the schema is the alias's `value` (a renamed
1533///   kind that may have no top-level rule), and the production to
1534///   walk is the alias's content body, supplied by the parent.
1535///
1536/// Together these cover every site where the rule-walked-at-child
1537/// diverges from `grammar.rules[child.kind]`; the recursion site for
1538/// plain SYMBOL therefore correctly delegates to `emit_vertex`, and
1539/// we do not need a richer `WalkContext` value passed by reference.
1540/// The grammar dependency is the thread.
1541fn emit_aliased_child(
1542    protocol: &str,
1543    schema: &Schema,
1544    grammar: &Grammar,
1545    child_id: &panproto_gat::Name,
1546    content: &Production,
1547    out: &mut Output<'_>,
1548) -> Result<(), ParseError> {
1549    // Leaf shortcut: if the child has a literal-value and no
1550    // structural children, emit the captured text. Identifiers and
1551    // similar terminals reach here when an ALIAS wraps a SYMBOL that
1552    // resolves to a PATTERN.
1553    if let Some(literal) = literal_value(schema, child_id) {
1554        if children_for(schema, child_id).is_empty() {
1555            out.token(literal);
1556            return Ok(());
1557        }
1558    }
1559
1560    // Clear the enclosing FIELD context so it does not leak into the
1561    // aliased child's production walk. Without this, a FIELD("alias")
1562    // containing an ALIAS whose content is SYMBOL "expression" would
1563    // cause the inner SYMBOL handler to pull by field name "alias"
1564    // instead of by symbol match, failing to find the child edge.
1565    let _guard = clear_field_context();
1566
1567    // Resolve `content` to a rule when it's a SYMBOL (the dominant
1568    // shape: `ALIAS { content: SYMBOL real_rule, value: "kind_x" }`).
1569    if let Production::Symbol { name } = content {
1570        if let Some(rule) = grammar.rules.get(name) {
1571            let edges = children_for(schema, child_id);
1572            let mut cursor = ChildCursor::new(&edges);
1573            return emit_production(protocol, schema, grammar, child_id, rule, &mut cursor, out);
1574        }
1575    }
1576
1577    // Other ALIAS contents (CHOICE, SEQ, literals) walk in place.
1578    let edges = children_for(schema, child_id);
1579    let mut cursor = ChildCursor::new(&edges);
1580    emit_production(
1581        protocol,
1582        schema,
1583        grammar,
1584        child_id,
1585        content,
1586        &mut cursor,
1587        out,
1588    )
1589}
1590
1591fn emit_in_child_context(
1592    protocol: &str,
1593    schema: &Schema,
1594    grammar: &Grammar,
1595    child_id: &panproto_gat::Name,
1596    production: &Production,
1597    out: &mut Output<'_>,
1598) -> Result<(), ParseError> {
1599    // The child walks under its own production tree, with its own
1600    // FIELDs setting their own contexts. Clear the outer FIELD hint
1601    // so it does not leak through and cause sibling SYMBOLs inside
1602    // the child's body to mistakenly pull edges from the child's
1603    // cursor by the parent's field name.
1604    let _guard = clear_field_context();
1605    // If `production` is a structural wrapper (CHOICE / SEQ /
1606    // OPTIONAL / ...) whose referenced symbols cover the child's own
1607    // kind, the child IS the production's target node and the right
1608    // emit path is `emit_vertex(child)` (which honours the
1609    // literal-value leaf shortcut). Without this guard, FIELD(pattern,
1610    // CHOICE { _pattern, self }) on an identifier child walks the
1611    // CHOICE on the identifier's empty cursor, falls through to the
1612    // first non-BLANK alt, and loses the captured identifier text.
1613    if !matches!(production, Production::Symbol { .. }) {
1614        let child_kind = schema.vertices.get(child_id).map(|v| v.kind.as_ref());
1615        let symbols = referenced_symbols(production);
1616        if symbols
1617            .iter()
1618            .any(|s| kind_satisfies_symbol(grammar, child_kind, s) || child_kind == Some(s))
1619        {
1620            return emit_vertex(protocol, schema, grammar, child_id, out);
1621        }
1622    }
1623    match production {
1624        Production::Symbol { .. } => emit_vertex(protocol, schema, grammar, child_id, out),
1625        _ => {
1626            let edges = children_for(schema, child_id);
1627            let mut cursor = ChildCursor::new(&edges);
1628            emit_production(
1629                protocol,
1630                schema,
1631                grammar,
1632                child_id,
1633                production,
1634                &mut cursor,
1635                out,
1636            )
1637        }
1638    }
1639}
1640
1641fn pick_choice_with_cursor<'a>(
1642    schema: &Schema,
1643    grammar: &Grammar,
1644    vertex_id: &panproto_gat::Name,
1645    cursor: &ChildCursor<'_>,
1646    alternatives: &'a [Production],
1647) -> Option<&'a Production> {
1648    // Discriminator-driven dispatch (highest priority): when the
1649    // walker recorded a `chose-alt-fingerprint` constraint at parse
1650    // time, dispatch directly against that. This is the categorical
1651    // discriminator: it survives stripping of byte-position
1652    // constraints (so by-construction round-trips work) and is the
1653    // explicit witness of which CHOICE alternative the parser took.
1654    //
1655    // Falls back to the live `interstitial-*` substring blob when no
1656    // fingerprint is present (e.g. instances built by callers that
1657    // bypass the AstWalker). Both blobs are scored by the longest
1658    // STRING-literal token in an alternative that matches; the
1659    // length tiebreak prefers `&&` over `&`, `==` over `=`, etc.
1660    let constraint_blob = schema
1661        .constraints
1662        .get(vertex_id)
1663        .map(|cs| {
1664            let fingerprint: Option<&str> = cs
1665                .iter()
1666                .find(|c| c.sort.as_ref() == "chose-alt-fingerprint")
1667                .map(|c| c.value.as_str());
1668            if let Some(fp) = fingerprint {
1669                fp.to_owned()
1670            } else {
1671                cs.iter()
1672                    .filter(|c| {
1673                        let s = c.sort.as_ref();
1674                        s.starts_with("interstitial-") && !s.ends_with("-start-byte")
1675                    })
1676                    .map(|c| c.value.as_str())
1677                    .collect::<Vec<&str>>()
1678                    .join(" ")
1679            }
1680        })
1681        .unwrap_or_default();
1682    let child_kinds: Vec<&str> = schema
1683        .constraints
1684        .get(vertex_id)
1685        .and_then(|cs| {
1686            cs.iter()
1687                .find(|c| c.sort.as_ref() == "chose-alt-child-kinds")
1688                .map(|c| c.value.split_whitespace().collect())
1689        })
1690        .unwrap_or_default();
1691    // Cursor-exhaustion BLANK-preference: when all cursor edges have
1692    // been consumed AND `BLANK` is one of the alternatives, the only
1693    // alt that won't introduce a non-existent child is `BLANK`.
1694    //
1695    // This gate fires before the literal-blob discriminator because
1696    // the fingerprint is shared across every CHOICE position in the
1697    // vertex's rule body: a vertex like `sample_step` that ends in
1698    // `..., REPEAT(SEQ(",", arg)), CHOICE(",", BLANK)` records all of
1699    // its `","` interstitials in a single blob, so the literal-score
1700    // matcher would otherwise prefer `","` for the trailing CHOICE
1701    // even when the source had no trailing comma. By the time the
1702    // emitter reaches the trailing CHOICE, the REPEAT has consumed
1703    // every arg edge in cursor order; the residual unconsumed multiset
1704    // is empty; and the categorical reading of a CHOICE-with-BLANK at
1705    // a position with no remaining children is the no-op alternative.
1706    let any_unconsumed = cursor
1707        .edges
1708        .iter()
1709        .enumerate()
1710        .any(|(i, _)| !cursor.consumed[i]);
1711    let blank_present = alternatives.iter().any(|a| matches!(a, Production::Blank));
1712    if !any_unconsumed && blank_present {
1713        return alternatives.iter().find(|a| matches!(a, Production::Blank));
1714    }
1715    if !any_unconsumed && !blank_present {
1716        let mut visited = std::collections::HashSet::new();
1717        let mut yield_cache = grammar.yield_sets.clone();
1718        for alt in alternatives {
1719            let ys = yield_of_production(grammar, alt, &mut visited, &mut yield_cache);
1720            if ys.contains("") {
1721                return Some(alt);
1722            }
1723            visited.clear();
1724        }
1725    }
1726
1727    if !constraint_blob.is_empty() {
1728        // Primary score: literal-token match length. This dominates
1729        // alt selection so existing language tests that depend on
1730        // literal-only fingerprints keep working.
1731        // Secondary score (tiebreaker only): named-symbol kind match
1732        // count, read from the separate `chose-alt-child-kinds`
1733        // constraint (kept apart from the literal fingerprint so
1734        // identifiers like `:` in the kind list don't contaminate the
1735        // literal match). An alt that matches the recorded kinds is a
1736        // stronger witness than one whose only
1737        // overlap is literal punctuation.
1738        let mut best_literal: usize = 0;
1739        let mut best_symbols: usize = 0;
1740        let mut best_alt: Option<&Production> = None;
1741        let mut tied = false;
1742        for alt in alternatives {
1743            let strings = literal_strings(alt);
1744            if strings.is_empty() {
1745                continue;
1746            }
1747            let literal_score = strings
1748                .iter()
1749                .filter(|s| constraint_blob.contains(s.as_str()))
1750                .map(String::len)
1751                .sum::<usize>();
1752            if literal_score == 0 {
1753                continue;
1754            }
1755            // Symbol score is computed only as a tiebreaker among alts
1756            // whose literal-token coverage is the same; it never lifts
1757            // an alt above one with a strictly higher literal score.
1758            // Reads the `chose-alt-child-kinds` constraint (a separate
1759            // sequence the walker emits, kept apart from the literal
1760            // fingerprint to avoid cross-contamination).
1761            let symbol_score = if literal_score >= best_literal && !child_kinds.is_empty() {
1762                let symbols = referenced_symbols(alt);
1763                symbols
1764                    .iter()
1765                    .filter(|sym| {
1766                        let sym_str: &str = sym;
1767                        if child_kinds.contains(&sym_str) {
1768                            return true;
1769                        }
1770                        grammar.subtypes.get(sym_str).is_some_and(|sub_set| {
1771                            sub_set
1772                                .iter()
1773                                .any(|sub| child_kinds.contains(&sub.as_str()))
1774                        })
1775                    })
1776                    .count()
1777            } else {
1778                0
1779            };
1780            let better = literal_score > best_literal
1781                || (literal_score == best_literal && symbol_score > best_symbols);
1782            let same = literal_score == best_literal && symbol_score == best_symbols;
1783            if better {
1784                best_literal = literal_score;
1785                best_symbols = symbol_score;
1786                best_alt = Some(alt);
1787                tied = false;
1788            } else if same && best_alt.is_some() {
1789                tied = true;
1790            }
1791        }
1792        // Only commit to an alt when the fingerprint discriminates it
1793        // uniquely. A tie means the alts share the same literal token
1794        // set (e.g. JSON's `string = CHOICE { SEQ { '"', '"' }, SEQ {
1795        // '"', _string_content, '"' } }` — both alts contain just the
1796        // two `"` tokens). In that case fall through to cursor-based
1797        // dispatch, which uses the actual edge structure.
1798        if let Some(alt) = best_alt {
1799            if !tied {
1800                return Some(alt);
1801            }
1802        }
1803    }
1804
1805    // Cursor-driven dispatch via Yield-set preimage.
1806    //
1807    // For a CHOICE C = A1 | ... | An, Yield(Ai) is the set of vertex
1808    // kinds that can appear as the first named child when Ai is taken
1809    // (see `yield_of_production`). Given the first unconsumed cursor
1810    // edge with target kind K, select the first Ai (grammar order)
1811    // where K ∈ Yield(Ai). This is deterministic: grammar order is
1812    // the tiebreak, matching tree-sitter's own disambiguation.
1813    let first_unconsumed_kind: Option<&str> = cursor
1814        .edges
1815        .iter()
1816        .enumerate()
1817        .find(|(i, _)| !cursor.consumed[*i])
1818        .and_then(|(_, edge)| schema.vertices.get(&edge.tgt).map(|v| v.kind.as_ref()));
1819    if let Some(target_kind) = first_unconsumed_kind {
1820        // The subtype closure `subtypes[target_kind]` contains every
1821        // symbol name S such that a vertex of kind `target_kind` can
1822        // appear where the grammar says `SYMBOL S`. For a CHOICE
1823        // C = A1 | ... | An, the correct alternative is the one whose
1824        // top-level symbol is in `subtypes[target_kind]` (the target
1825        // kind IS a subtype of that symbol, so the symbol's rule body
1826        // dispatches to the target kind at parse time). This is an
1827        // O(1) set-membership check per alternative — no recursive
1828        // Yield computation needed.
1829        //
1830        // Preference order:
1831        //   1. Direct name match (target_kind == symbol name)
1832        //   2. Subtype match (symbol name ∈ subtypes[target_kind])
1833        //   3. Yield-set match (target_kind ∈ Yield(alt)) as fallback
1834        //      for non-SYMBOL alternatives (ALIAS, SEQ, etc.)
1835        let target_supers = grammar.subtypes.get(target_kind);
1836
1837        // Pass 1: direct name match
1838        for alt in alternatives {
1839            if let Production::Symbol { name } = alt {
1840                if name.as_str() == target_kind {
1841                    return Some(alt);
1842                }
1843            }
1844            if let Production::Alias {
1845                named: true, value, ..
1846            } = alt
1847            {
1848                if value.as_str() == target_kind {
1849                    return Some(alt);
1850                }
1851            }
1852        }
1853
1854        // Pass 2: subtype match (the target kind's supertype set
1855        // tells us which SYMBOL names it satisfies)
1856        if let Some(supers) = target_supers {
1857            for alt in alternatives {
1858                if let Production::Symbol { name } = alt {
1859                    if supers.contains(name.as_str()) {
1860                        return Some(alt);
1861                    }
1862                }
1863                if let Production::Alias {
1864                    named: true, value, ..
1865                } = alt
1866                {
1867                    if supers.contains(value.as_str()) {
1868                        return Some(alt);
1869                    }
1870                }
1871            }
1872        }
1873
1874        // Pass 3: Yield-set fallback for alternatives that are not
1875        // plain SYMBOLs or named ALIASes (e.g. SEQ, PREC wrappers
1876        // around SYMBOLs that the above passes don't unwrap).
1877        let mut visited = std::collections::HashSet::new();
1878        let mut yield_cache = grammar.yield_sets.clone();
1879        for alt in alternatives {
1880            let ys = yield_of_production(grammar, alt, &mut visited, &mut yield_cache);
1881            if ys.contains(target_kind) {
1882                return Some(alt);
1883            }
1884            visited.clear();
1885        }
1886    }
1887
1888    // FIELD dispatch: pick an alternative whose FIELD name matches an
1889    // unconsumed edge kind.
1890    let edge_kinds: Vec<&str> = cursor
1891        .edges
1892        .iter()
1893        .enumerate()
1894        .filter(|(i, _)| !cursor.consumed[*i])
1895        .map(|(_, e)| e.kind.as_ref())
1896        .collect();
1897    for alt in alternatives {
1898        if has_field_in(alt, &edge_kinds) {
1899            return Some(alt);
1900        }
1901    }
1902
1903    // No cursor-driven match. Fall back to:
1904    //
1905    // - BLANK (the explicit empty alternative) when present, so an
1906    //   OPTIONAL-shaped CHOICE compiles to nothing.
1907    // - The first non-`BLANK` alternative as a last resort, used by
1908    //   STRING-only alternatives (keyword tokens) and other choices
1909    //   that don't reach the cursor.
1910    //
1911    // The previous "match own_kind" branch is intentionally absent:
1912    // when an alt's first SYMBOL equals the current vertex's kind, the
1913    // caller is already emitting that vertex's own rule. Recursing
1914    // into the alt would cause a self-loop in the rule walk.
1915    let _ = (schema, vertex_id);
1916    if alternatives.iter().any(|a| matches!(a, Production::Blank)) {
1917        return alternatives.iter().find(|a| matches!(a, Production::Blank));
1918    }
1919    alternatives
1920        .iter()
1921        .find(|alt| !matches!(alt, Production::Blank))
1922}
1923
1924/// Collect every literal STRING token directly inside `production`
1925/// (without descending into SYMBOLs / hidden rules). Used to score
1926/// CHOICE alternatives against the parent vertex's interstitials so
1927/// the right operator / keyword form is picked when the schema
1928/// preserves interstitial fragments from a prior parse.
1929fn literal_strings(production: &Production) -> Vec<String> {
1930    let mut out = Vec::new();
1931    fn walk(p: &Production, out: &mut Vec<String>) {
1932        match p {
1933            Production::String { value } if !value.is_empty() => {
1934                out.push(value.clone());
1935            }
1936            Production::Choice { members } | Production::Seq { members } => {
1937                for m in members {
1938                    walk(m, out);
1939                }
1940            }
1941            Production::Repeat { content }
1942            | Production::Repeat1 { content }
1943            | Production::Optional { content }
1944            | Production::Field { content, .. }
1945            | Production::Alias { content, .. }
1946            | Production::Token { content }
1947            | Production::ImmediateToken { content }
1948            | Production::Prec { content, .. }
1949            | Production::PrecLeft { content, .. }
1950            | Production::PrecRight { content, .. }
1951            | Production::PrecDynamic { content, .. }
1952            | Production::Reserved { content, .. } => walk(content, out),
1953            _ => {}
1954        }
1955    }
1956    walk(production, &mut out);
1957    out
1958}
1959
1960/// Collect every SYMBOL name reachable from `production` without
1961/// crossing into nested rules. Used by `pick_choice_with_cursor` to
1962/// rank alternatives by "any SYMBOL inside this alt matches something
1963/// on the cursor", instead of just the first SYMBOL: a leading
1964/// optional like `attribute_item` then `parameter` is otherwise
1965/// rejected when only the parameter children are present.
1966fn referenced_symbols(production: &Production) -> Vec<&str> {
1967    let mut out = Vec::new();
1968    fn walk<'a>(p: &'a Production, out: &mut Vec<&'a str>) {
1969        match p {
1970            Production::Symbol { name } => out.push(name.as_str()),
1971            Production::Choice { members } | Production::Seq { members } => {
1972                for m in members {
1973                    walk(m, out);
1974                }
1975            }
1976            Production::Alias {
1977                content,
1978                named,
1979                value,
1980            } => {
1981                // A named ALIAS produces a child vertex whose kind is
1982                // the alias `value` (e.g. `ALIAS { content: STRING "=",
1983                // value: "punctuation", named: true }` introduces a
1984                // `punctuation` child). For cursor-driven dispatch to
1985                // recognise alts that emit such children, yield the
1986                // alias value as a referenced symbol. Anonymous aliases
1987                // do not introduce a named node and only need their
1988                // inner content's symbols.
1989                if *named && !value.is_empty() {
1990                    out.push(value.as_str());
1991                }
1992                walk(content, out);
1993            }
1994            Production::Repeat { content }
1995            | Production::Repeat1 { content }
1996            | Production::Optional { content }
1997            | Production::Field { content, .. }
1998            | Production::Token { content }
1999            | Production::ImmediateToken { content }
2000            | Production::Prec { content, .. }
2001            | Production::PrecLeft { content, .. }
2002            | Production::PrecRight { content, .. }
2003            | Production::PrecDynamic { content, .. }
2004            | Production::Reserved { content, .. } => walk(content, out),
2005            _ => {}
2006        }
2007    }
2008    walk(production, &mut out);
2009    out
2010}
2011
2012#[cfg(test)]
2013fn first_symbol(production: &Production) -> Option<&str> {
2014    match production {
2015        Production::Symbol { name } => Some(name),
2016        Production::Seq { members } => members.iter().find_map(first_symbol),
2017        Production::Choice { members } => members.iter().find_map(first_symbol),
2018        Production::Repeat { content }
2019        | Production::Repeat1 { content }
2020        | Production::Optional { content }
2021        | Production::Field { content, .. }
2022        | Production::Alias { content, .. }
2023        | Production::Token { content }
2024        | Production::ImmediateToken { content }
2025        | Production::Prec { content, .. }
2026        | Production::PrecLeft { content, .. }
2027        | Production::PrecRight { content, .. }
2028        | Production::PrecDynamic { content, .. }
2029        | Production::Reserved { content, .. } => first_symbol(content),
2030        _ => None,
2031    }
2032}
2033
2034fn has_field_in(production: &Production, edge_kinds: &[&str]) -> bool {
2035    match production {
2036        Production::Field { name, .. } => edge_kinds.contains(&name.as_str()),
2037        Production::Seq { members } | Production::Choice { members } => {
2038            members.iter().any(|m| has_field_in(m, edge_kinds))
2039        }
2040        Production::Repeat { content }
2041        | Production::Repeat1 { content }
2042        | Production::Optional { content }
2043        | Production::Alias { content, .. }
2044        | Production::Token { content }
2045        | Production::ImmediateToken { content }
2046        | Production::Prec { content, .. }
2047        | Production::PrecLeft { content, .. }
2048        | Production::PrecRight { content, .. }
2049        | Production::PrecDynamic { content, .. }
2050        | Production::Reserved { content, .. } => has_field_in(content, edge_kinds),
2051        _ => false,
2052    }
2053}
2054
2055fn has_relevant_constraint(
2056    production: &Production,
2057    schema: &Schema,
2058    vertex_id: &panproto_gat::Name,
2059) -> bool {
2060    let constraints = match schema.constraints.get(vertex_id) {
2061        Some(c) => c,
2062        None => return false,
2063    };
2064    fn walk(production: &Production, constraints: &[panproto_schema::Constraint]) -> bool {
2065        match production {
2066            Production::String { value } => constraints
2067                .iter()
2068                .any(|c| c.value == *value || c.sort.as_ref() == value),
2069            Production::Field { name, content } => {
2070                constraints.iter().any(|c| c.sort.as_ref() == name) || walk(content, constraints)
2071            }
2072            Production::Seq { members } | Production::Choice { members } => {
2073                members.iter().any(|m| walk(m, constraints))
2074            }
2075            Production::Repeat { content }
2076            | Production::Repeat1 { content }
2077            | Production::Optional { content }
2078            | Production::Alias { content, .. }
2079            | Production::Token { content }
2080            | Production::ImmediateToken { content }
2081            | Production::Prec { content, .. }
2082            | Production::PrecLeft { content, .. }
2083            | Production::PrecRight { content, .. }
2084            | Production::PrecDynamic { content, .. }
2085            | Production::Reserved { content, .. } => walk(content, constraints),
2086            _ => false,
2087        }
2088    }
2089    walk(production, constraints)
2090}
2091
2092fn children_for<'a>(schema: &'a Schema, vertex_id: &panproto_gat::Name) -> Vec<&'a Edge> {
2093    // Walk `outgoing` (insertion-ordered by SchemaBuilder via SmallVec
2094    // append) rather than the unordered `edges` HashMap so abstract
2095    // schemas under REPEAT(CHOICE(...)) preserve the order their edges
2096    // were inserted in. The previous implementation walked the HashMap
2097    // and sorted lexicographically by (kind, target id), which fused
2098    // interleaved children of the same kind into runs (e.g. a sequence
2099    // [symbol, punct, int, symbol, punct, int] became [symbol, symbol,
2100    // punct, punct, int, int] after the lex sort).
2101    let Some(edges) = schema.outgoing.get(vertex_id) else {
2102        return Vec::new();
2103    };
2104
2105    // Look up the canonical Edge reference (the key in `schema.edges`)
2106    // for each entry in `outgoing`. Falls back to the SmallVec entry if
2107    // the canonical key is missing, which would indicate index drift.
2108    let mut indexed: Vec<(usize, u32, &Edge)> = edges
2109        .iter()
2110        .enumerate()
2111        .map(|(i, e)| {
2112            let canonical = schema.edges.get_key_value(e).map_or(e, |(k, _)| k);
2113            let pos = schema.orderings.get(canonical).copied().unwrap_or(u32::MAX);
2114            (i, pos, canonical)
2115        })
2116        .collect();
2117
2118    // Stable sort by (explicit-ordering, insertion-index). Edges with
2119    // an explicit `orderings` entry come first in their declared order;
2120    // the remainder fall through in insertion order.
2121    indexed.sort_by_key(|(i, pos, _)| (*pos, *i));
2122    indexed.into_iter().map(|(_, _, e)| e).collect()
2123}
2124
2125fn vertex_id_kind<'a>(schema: &'a Schema, vertex_id: &panproto_gat::Name) -> Option<&'a str> {
2126    schema.vertices.get(vertex_id).map(|v| v.kind.as_ref())
2127}
2128
2129fn literal_value<'a>(schema: &'a Schema, vertex_id: &panproto_gat::Name) -> Option<&'a str> {
2130    schema
2131        .constraints
2132        .get(vertex_id)?
2133        .iter()
2134        .find(|c| c.sort.as_ref() == "literal-value")
2135        .map(|c| c.value.as_str())
2136}
2137
2138/// True iff `pattern` matches a (possibly optional / repeated) sequence
2139/// of carriage-return and newline characters only. Examples: `\r?\n`,
2140/// `\n`, `\r\n`, `\n+`, `\r?\n+`. Distinguishes structural newline
2141/// terminals from generic whitespace and from other patterns that
2142/// happen to contain a newline escape inside a larger class.
2143fn is_newline_like_pattern(pattern: &str) -> bool {
2144    if pattern.is_empty() {
2145        return false;
2146    }
2147    let mut chars = pattern.chars();
2148    let mut saw_newline_atom = false;
2149    while let Some(c) = chars.next() {
2150        match c {
2151            '\\' => match chars.next() {
2152                Some('n' | 'r') => saw_newline_atom = true,
2153                _ => return false,
2154            },
2155            '?' | '*' | '+' => {} // quantifiers on the previous atom
2156            _ => return false,
2157        }
2158    }
2159    saw_newline_atom
2160}
2161
2162/// True iff `pattern` matches a (possibly quantified) run of generic
2163/// whitespace characters: `\s+`, `[ \t]+`, ` +`, `\s*`. Such patterns
2164/// describe interstitial spacing rather than syntactic content, so the
2165/// pretty emitter can drop them and let the layout pass insert the
2166/// configured separator.
2167fn is_whitespace_only_pattern(pattern: &str) -> bool {
2168    if pattern.is_empty() {
2169        return false;
2170    }
2171    // Strip an outer quantifier suffix.
2172    let trimmed = pattern.trim_end_matches(['?', '*', '+']);
2173    if trimmed.is_empty() {
2174        return false;
2175    }
2176    // Bare `\s` / ` ` / `\t`.
2177    if matches!(trimmed, "\\s" | " " | "\\t") {
2178        return true;
2179    }
2180    // Character class containing only whitespace atoms.
2181    if let Some(inner) = trimmed.strip_prefix('[').and_then(|s| s.strip_suffix(']')) {
2182        let mut chars = inner.chars();
2183        let mut saw_atom = false;
2184        while let Some(c) = chars.next() {
2185            match c {
2186                '\\' => match chars.next() {
2187                    Some('s' | 't' | 'r' | 'n') => saw_atom = true,
2188                    _ => return false,
2189                },
2190                ' ' | '\t' => saw_atom = true,
2191                _ => return false,
2192            }
2193        }
2194        return saw_atom;
2195    }
2196    false
2197}
2198
2199fn placeholder_for_pattern(pattern: &str) -> String {
2200    // Heuristic placeholder for unconstrained PATTERN terminals.
2201    //
2202    // First handle the "the regex IS a literal escape" cases that
2203    // tree-sitter grammars use as separators (`\n`, `\r\n`, `;`,
2204    // etc.); emitting the matching character is always preferable
2205    // to a `_x` identifier-like placeholder when the surrounding
2206    // grammar expects a separator.
2207    let simple_lit = decode_simple_pattern_literal(pattern);
2208    if let Some(lit) = simple_lit {
2209        return lit;
2210    }
2211
2212    if pattern.contains("[0-9]") || pattern.contains("\\d") {
2213        "0".into()
2214    } else if pattern.contains("[a-zA-Z_]") || pattern.contains("\\w") {
2215        "_x".into()
2216    } else if pattern.contains('"') || pattern.contains('\'') {
2217        "\"\"".into()
2218    } else {
2219        "_".into()
2220    }
2221}
2222
2223/// Decode a tree-sitter PATTERN whose regex is a simple literal
2224/// (newline, semicolon, comma, etc.) to the byte sequence it matches.
2225/// Returns `None` for patterns with character classes, alternations,
2226/// or quantifiers; the caller falls back to the heuristic placeholder.
2227fn decode_simple_pattern_literal(pattern: &str) -> Option<String> {
2228    // Skip patterns containing regex metachars that would broaden the
2229    // match beyond a single literal byte sequence.
2230    if pattern
2231        .chars()
2232        .any(|c| matches!(c, '[' | ']' | '(' | ')' | '*' | '+' | '?' | '|' | '{' | '}'))
2233    {
2234        return None;
2235    }
2236    let mut out = String::new();
2237    let mut chars = pattern.chars();
2238    while let Some(c) = chars.next() {
2239        if c == '\\' {
2240            match chars.next() {
2241                Some('n') => out.push('\n'),
2242                Some('r') => out.push('\r'),
2243                Some('t') => out.push('\t'),
2244                Some('\\') => out.push('\\'),
2245                Some('/') => out.push('/'),
2246                Some(other) => out.push(other),
2247                None => return None,
2248            }
2249        } else {
2250            out.push(c);
2251        }
2252    }
2253    Some(out)
2254}
2255
2256// ═══════════════════════════════════════════════════════════════════
2257// Token list output with Spacing algebra
2258// ═══════════════════════════════════════════════════════════════════
2259//
2260// Emit produces a free monoid over `Token`. Layout (spaces, newlines,
2261// indentation) is a homomorphism `Vec<Token> -> Vec<u8>` parameterised
2262// by `FormatPolicy`. Separating the structural output from the layout
2263// decision means each phase has one job: emit walks the grammar and
2264// pushes tokens; layout is a single fold, locally driven by adjacent
2265// pairs and a depth counter. Snapshot/restore is just `tokens.len()`.
2266
2267#[derive(Clone)]
2268enum Token {
2269    /// A user-visible terminal contributed by the grammar.
2270    Lit(String),
2271    /// `indent_open` marker emitted when a `Lit` matched the policy's
2272    /// open list. Carried as a separate token so layout can decide to
2273    /// break + indent without re-scanning.
2274    IndentOpen,
2275    /// `indent_close` marker emitted before a closer-`Lit`.
2276    IndentClose,
2277    /// "Break a line here if not already at line start" — used after
2278    /// statements/declarations and after open braces.
2279    LineBreak,
2280    /// Suppress the next inter-Lit separator. Pushed by the REPEAT
2281    /// walker when an iteration's "separator slot" (a CHOICE-with-BLANK
2282    /// or OPTIONAL at SEQ position 0) emitted zero content tokens, so
2283    /// the categorical reading is "no source-level separator existed
2284    /// between these two sibling iterations of the body".
2285    NoSpace,
2286}
2287
2288struct Output<'a> {
2289    tokens: Vec<Token>,
2290    policy: &'a FormatPolicy,
2291}
2292
2293#[derive(Clone)]
2294struct OutputSnapshot {
2295    tokens_len: usize,
2296}
2297
2298impl<'a> Output<'a> {
2299    fn new(policy: &'a FormatPolicy) -> Self {
2300        Self {
2301            tokens: Vec::new(),
2302            policy,
2303        }
2304    }
2305
2306    fn token(&mut self, value: &str) {
2307        if value.is_empty() {
2308            return;
2309        }
2310
2311        // A grammar STRING whose value is a newline (e.g. abc's `_NL = "\n"`
2312        // or any rule that uses `"\n"` as a structural line terminator)
2313        // must route through the layout's `LineBreak` channel. Emitting it
2314        // as a `Lit` leaves the newline character in the byte stream but
2315        // also makes `needs_space_between` insert the configured separator
2316        // between the newline and the following token, producing leading
2317        // spaces on every line after the first.
2318        if value == "\n" || value == "\r\n" || value == "\r" {
2319            self.tokens.push(Token::LineBreak);
2320            return;
2321        }
2322
2323        // A captured literal value (typically a vertex's `literal-value`
2324        // constraint covering the full source span of a terminal-like
2325        // rule, e.g. abc's `reference_number_line` matching `"X:1\n"`)
2326        // may contain trailing newlines. Splitting the trailing newlines
2327        // off as a `LineBreak` lets the layout pass treat the next Lit
2328        // as starting a new line; otherwise the next Lit pair would
2329        // trigger `needs_space_between` against the embedded `\n` and
2330        // insert the policy separator at column 0 of the new line.
2331        let trimmed = value.trim_end_matches(['\n', '\r']);
2332        let trailing_newlines = value.len() - trimmed.len();
2333        if trailing_newlines > 0 && !trimmed.is_empty() {
2334            if self.policy.indent_close.iter().any(|t| t == trimmed) {
2335                self.tokens.push(Token::IndentClose);
2336            }
2337            self.tokens.push(Token::Lit(trimmed.to_owned()));
2338            if self.policy.indent_open.iter().any(|t| t == trimmed) {
2339                self.tokens.push(Token::IndentOpen);
2340            } else if self.policy.line_break_after.iter().any(|t| t == trimmed) {
2341                // already emitting a LineBreak below for the trailing \n
2342            }
2343            self.tokens.push(Token::LineBreak);
2344            return;
2345        }
2346
2347        if self.policy.indent_close.iter().any(|t| t == value) {
2348            self.tokens.push(Token::IndentClose);
2349        }
2350
2351        self.tokens.push(Token::Lit(value.to_owned()));
2352
2353        if self.policy.indent_open.iter().any(|t| t == value) {
2354            self.tokens.push(Token::IndentOpen);
2355            self.tokens.push(Token::LineBreak);
2356        } else if self.policy.line_break_after.iter().any(|t| t == value) {
2357            self.tokens.push(Token::LineBreak);
2358        }
2359    }
2360
2361    fn newline(&mut self) {
2362        self.tokens.push(Token::LineBreak);
2363    }
2364
2365    /// Open an indent scope: subsequent `LineBreak`s render at the
2366    /// new depth until a matching `indent_close` pops it. Used by the
2367    /// external-token fallback to render indent-based grammars'
2368    /// `_indent` scanner outputs.
2369    fn indent_open(&mut self) {
2370        self.tokens.push(Token::IndentOpen);
2371        self.tokens.push(Token::LineBreak);
2372    }
2373
2374    /// Close one indent scope opened by `indent_open`.
2375    fn indent_close(&mut self) {
2376        self.tokens.push(Token::IndentClose);
2377    }
2378
2379    fn snapshot(&self) -> OutputSnapshot {
2380        OutputSnapshot {
2381            tokens_len: self.tokens.len(),
2382        }
2383    }
2384
2385    fn restore(&mut self, snap: OutputSnapshot) {
2386        self.tokens.truncate(snap.tokens_len);
2387    }
2388
2389    /// True iff at least one `Token::Lit` was pushed since `snap`.
2390    /// Control-only emissions (`LineBreak`, `IndentOpen` / `IndentClose`,
2391    /// `NoSpace`) do not count as content. Used by the REPEAT walker
2392    /// to detect that a "separator slot" CHOICE picked its BLANK
2393    /// alternative, so the next iteration's content can be marked
2394    /// tight against the previous iteration's content.
2395    fn lit_emitted_since(&self, snap: OutputSnapshot) -> bool {
2396        self.tokens[snap.tokens_len..]
2397            .iter()
2398            .any(|t| matches!(t, Token::Lit(_)))
2399    }
2400
2401    /// Push a marker that suppresses the next inter-Lit separator the
2402    /// layout pass would otherwise insert. Used to encode "no source-
2403    /// level separator was emitted between these two Lits" without
2404    /// having to make per-grammar adjacency decisions in the layout.
2405    fn no_space(&mut self) {
2406        self.tokens.push(Token::NoSpace);
2407    }
2408
2409    fn finish(self) -> Vec<u8> {
2410        layout(&self.tokens, self.policy)
2411    }
2412}
2413
2414/// Fold a token list into bytes. The algebra:
2415/// * adjacent `Lit`s get a single space iff `needs_space_between(a, b)`,
2416/// * `IndentOpen` / `IndentClose` adjust a depth counter,
2417/// * `LineBreak` writes `\n` if not already at line start, then the
2418///   next `Lit` writes `indent * indent_width` spaces of indent.
2419fn layout(tokens: &[Token], policy: &FormatPolicy) -> Vec<u8> {
2420    let mut bytes = Vec::new();
2421    let mut indent: usize = 0;
2422    let mut at_line_start = true;
2423    let mut last_lit: Option<&str> = None;
2424    // True iff, at the moment `last_lit` was emitted, the cursor was at a
2425    // position where the grammar expects an operand: start of stream / line,
2426    // just after an open paren / bracket / brace, just after a separator like
2427    // `,` or `;`, or just after a binary / assignment operator. Used by
2428    // `needs_space_between` to recognise `last_lit` as a tight unary prefix
2429    // (`f(-1.0)`) rather than a spaced binary operator (`a - b`).
2430    let mut last_was_in_operand_position = true;
2431    let mut expecting_operand = true;
2432    // Set when a `Token::NoSpace` marker is seen; cleared when the next
2433    // Lit consumes it. While set, suppress the policy separator that
2434    // would otherwise be inserted before the next Lit.
2435    let mut suppress_next_separator = false;
2436    let newline = policy.newline.as_bytes();
2437    let separator = policy.separator.as_bytes();
2438
2439    for tok in tokens {
2440        match tok {
2441            Token::IndentOpen => indent += 1,
2442            Token::IndentClose => {
2443                indent = indent.saturating_sub(1);
2444                if !at_line_start {
2445                    bytes.extend_from_slice(newline);
2446                    at_line_start = true;
2447                    expecting_operand = true;
2448                }
2449            }
2450            Token::LineBreak => {
2451                if !at_line_start {
2452                    bytes.extend_from_slice(newline);
2453                    at_line_start = true;
2454                    expecting_operand = true;
2455                }
2456            }
2457            Token::NoSpace => {
2458                suppress_next_separator = true;
2459            }
2460            Token::Lit(value) => {
2461                if at_line_start {
2462                    bytes.extend(std::iter::repeat_n(b' ', indent * policy.indent_width));
2463                } else if let Some(prev) = last_lit {
2464                    if !suppress_next_separator
2465                        && needs_space_between(prev, value, last_was_in_operand_position)
2466                    {
2467                        bytes.extend_from_slice(separator);
2468                    }
2469                }
2470                suppress_next_separator = false;
2471                bytes.extend_from_slice(value.as_bytes());
2472                at_line_start = false;
2473                last_was_in_operand_position = expecting_operand;
2474                expecting_operand = leaves_operand_position(value);
2475                last_lit = Some(value.as_str());
2476            }
2477        }
2478    }
2479
2480    if !at_line_start {
2481        bytes.extend_from_slice(newline);
2482    }
2483    bytes
2484}
2485
2486/// True iff emitting `tok` leaves the cursor in a position where the
2487/// grammar expects an operand next. Operand-introducing tokens are open
2488/// punctuation, separators, and operator-like strings; operand-terminating
2489/// tokens are identifiers, literals, and closing punctuation.
2490fn leaves_operand_position(tok: &str) -> bool {
2491    if tok.is_empty() {
2492        return true;
2493    }
2494    if is_punct_open(tok) {
2495        return true;
2496    }
2497    if matches!(tok, "," | ";") {
2498        return true;
2499    }
2500    if is_punct_close(tok) {
2501        return false;
2502    }
2503    if first_is_alnum_or_underscore(tok) || last_ends_with_alnum(tok) {
2504        return false;
2505    }
2506    // Pure punctuation/operator runs (`=`, `+`, `-`, `<=`, `>>`, …) leave
2507    // the cursor expecting another operand.
2508    true
2509}
2510
2511fn needs_space_between(last: &str, next: &str, expecting_operand: bool) -> bool {
2512    if last.is_empty() || next.is_empty() {
2513        return false;
2514    }
2515    if is_punct_open(last) || is_punct_open(next) {
2516        return false;
2517    }
2518    if is_punct_close(next) {
2519        return false;
2520    }
2521    if is_punct_close(last) && is_punct_punctuation(next) {
2522        return false;
2523    }
2524    if last == "." || next == "." {
2525        return false;
2526    }
2527    // Tight unary prefix: `last` is a sign/logical-not operator emitted
2528    // where the grammar expected an operand, so it glues to `next`.
2529    // `expecting_operand` here means: just before `last` was emitted,
2530    // the cursor expected an operand, which makes `last` a unary prefix.
2531    // Examples: `f(-1.0)`, `[ -2, 3 ]`, `return -x`, `a = !flag`.
2532    if expecting_operand && is_unary_prefix_operator(last) && first_is_operand_start(next) {
2533        return false;
2534    }
2535    if last_is_word_like(last) && first_is_word_like(next) {
2536        return true;
2537    }
2538    if last_ends_with_alnum(last) && first_is_alnum_or_underscore(next) {
2539        return true;
2540    }
2541    // Adjacent operator runs: keep them apart so the lexer doesn't glue
2542    // `>` and `=` into `>=` unintentionally.
2543    true
2544}
2545
2546fn is_unary_prefix_operator(s: &str) -> bool {
2547    matches!(s, "-" | "+" | "!" | "~")
2548}
2549
2550fn first_is_operand_start(s: &str) -> bool {
2551    s.chars()
2552        .next()
2553        .map(|c| c.is_alphanumeric() || c == '_' || c == '.' || c == '(')
2554        .unwrap_or(false)
2555}
2556
2557fn is_punct_open(s: &str) -> bool {
2558    matches!(s, "(" | "[" | "{" | "\"" | "'" | "`" | "@" | "#")
2559}
2560
2561fn is_punct_close(s: &str) -> bool {
2562    matches!(s, ")" | "]" | "}" | "," | ";" | ":" | "\"" | "'" | "`")
2563}
2564
2565fn is_punct_punctuation(s: &str) -> bool {
2566    matches!(s, "," | ";" | ":" | "." | ")" | "]" | "}")
2567}
2568
2569fn last_is_word_like(s: &str) -> bool {
2570    s.chars()
2571        .next_back()
2572        .map(|c| c.is_alphanumeric() || c == '_')
2573        .unwrap_or(false)
2574}
2575
2576fn first_is_word_like(s: &str) -> bool {
2577    s.chars()
2578        .next()
2579        .map(|c| c.is_alphanumeric() || c == '_')
2580        .unwrap_or(false)
2581}
2582
2583fn last_ends_with_alnum(s: &str) -> bool {
2584    s.chars()
2585        .next_back()
2586        .map(char::is_alphanumeric)
2587        .unwrap_or(false)
2588}
2589
2590fn first_is_alnum_or_underscore(s: &str) -> bool {
2591    s.chars()
2592        .next()
2593        .map(|c| c.is_alphanumeric() || c == '_')
2594        .unwrap_or(false)
2595}
2596
2597#[cfg(test)]
2598#[allow(clippy::unwrap_used)]
2599mod tests {
2600    use super::*;
2601
2602    #[test]
2603    fn parses_simple_grammar_json() {
2604        let bytes = br#"{
2605            "name": "tiny",
2606            "rules": {
2607                "program": {
2608                    "type": "SEQ",
2609                    "members": [
2610                        {"type": "STRING", "value": "hello"},
2611                        {"type": "STRING", "value": ";"}
2612                    ]
2613                }
2614            }
2615        }"#;
2616        let g = Grammar::from_bytes("tiny", bytes).expect("valid tiny grammar");
2617        assert!(g.rules.contains_key("program"));
2618    }
2619
2620    #[test]
2621    fn output_emits_punctuation_without_leading_space() {
2622        let policy = FormatPolicy::default();
2623        let mut out = Output::new(&policy);
2624        out.token("foo");
2625        out.token("(");
2626        out.token(")");
2627        out.token(";");
2628        let bytes = out.finish();
2629        let s = std::str::from_utf8(&bytes).expect("ascii output");
2630        assert!(s.starts_with("foo();"), "got {s:?}");
2631    }
2632
2633    #[test]
2634    fn grammar_from_bytes_rejects_malformed_input() {
2635        let result = Grammar::from_bytes("malformed", b"not json");
2636        let err = result.expect_err("malformed bytes must yield Err");
2637        let msg = err.to_string();
2638        assert!(
2639            msg.contains("malformed"),
2640            "error message should name the protocol: {msg:?}"
2641        );
2642    }
2643
2644    #[test]
2645    fn output_indents_after_open_brace() {
2646        let policy = FormatPolicy::default();
2647        let mut out = Output::new(&policy);
2648        out.token("fn");
2649        out.token("foo");
2650        out.token("(");
2651        out.token(")");
2652        out.token("{");
2653        out.token("body");
2654        out.token("}");
2655        let bytes = out.finish();
2656        let s = std::str::from_utf8(&bytes).expect("ascii output");
2657        assert!(s.contains("{\n"), "newline after opening brace: {s:?}");
2658        assert!(s.contains("body"), "body inside block: {s:?}");
2659        assert!(s.ends_with("}\n"), "newline after closing brace: {s:?}");
2660    }
2661
2662    #[test]
2663    fn output_no_space_between_word_and_dot() {
2664        let policy = FormatPolicy::default();
2665        let mut out = Output::new(&policy);
2666        out.token("foo");
2667        out.token(".");
2668        out.token("bar");
2669        let bytes = out.finish();
2670        let s = std::str::from_utf8(&bytes).expect("ascii output");
2671        assert!(s.starts_with("foo.bar"), "no space around dot: {s:?}");
2672    }
2673
2674    #[test]
2675    fn output_snapshot_restore_truncates_bytes() {
2676        let policy = FormatPolicy::default();
2677        let mut out = Output::new(&policy);
2678        out.token("keep");
2679        let snap = out.snapshot();
2680        out.token("drop");
2681        out.token("more");
2682        out.restore(snap);
2683        out.token("after");
2684        let bytes = out.finish();
2685        let s = std::str::from_utf8(&bytes).expect("ascii output");
2686        assert!(s.contains("keep"), "kept token survives: {s:?}");
2687        assert!(s.contains("after"), "post-restore token visible: {s:?}");
2688        assert!(!s.contains("drop"), "rolled-back token removed: {s:?}");
2689        assert!(!s.contains("more"), "rolled-back token removed: {s:?}");
2690    }
2691
2692    #[test]
2693    fn child_cursor_take_field_consumes_once() {
2694        let edges_owned: Vec<Edge> = vec![Edge {
2695            src: panproto_gat::Name::from("p"),
2696            tgt: panproto_gat::Name::from("c"),
2697            kind: panproto_gat::Name::from("name"),
2698            name: None,
2699        }];
2700        let edges: Vec<&Edge> = edges_owned.iter().collect();
2701        let mut cursor = ChildCursor::new(&edges);
2702        let first = cursor.take_field("name");
2703        let second = cursor.take_field("name");
2704        assert!(first.is_some(), "first take returns the edge");
2705        assert!(
2706            second.is_none(),
2707            "second take returns None (already consumed)"
2708        );
2709    }
2710
2711    #[test]
2712    fn child_cursor_take_matching_predicate() {
2713        let edges_owned: Vec<Edge> = vec![
2714            Edge {
2715                src: "p".into(),
2716                tgt: "c1".into(),
2717                kind: "child_of".into(),
2718                name: None,
2719            },
2720            Edge {
2721                src: "p".into(),
2722                tgt: "c2".into(),
2723                kind: "key".into(),
2724                name: None,
2725            },
2726        ];
2727        let edges: Vec<&Edge> = edges_owned.iter().collect();
2728        let mut cursor = ChildCursor::new(&edges);
2729        assert!(cursor.has_matching(|e| e.kind.as_ref() == "key"));
2730        let taken = cursor.take_matching(|e| e.kind.as_ref() == "key");
2731        assert!(taken.is_some());
2732        assert!(
2733            !cursor.has_matching(|e| e.kind.as_ref() == "key"),
2734            "consumed edge no longer matches"
2735        );
2736        assert!(
2737            cursor.has_matching(|e| e.kind.as_ref() == "child_of"),
2738            "the other edge is still available"
2739        );
2740    }
2741
2742    #[test]
2743    fn kind_satisfies_symbol_direct_match() {
2744        let bytes = br#"{
2745            "name": "tiny",
2746            "rules": {
2747                "x": {"type": "STRING", "value": "x"}
2748            }
2749        }"#;
2750        let g = Grammar::from_bytes("tiny", bytes).expect("valid grammar");
2751        assert!(kind_satisfies_symbol(&g, Some("x"), "x"));
2752        assert!(!kind_satisfies_symbol(&g, Some("y"), "x"));
2753        assert!(!kind_satisfies_symbol(&g, None, "x"));
2754    }
2755
2756    #[test]
2757    fn kind_satisfies_symbol_through_hidden_rule() {
2758        let bytes = br#"{
2759            "name": "tiny",
2760            "rules": {
2761                "_value": {
2762                    "type": "CHOICE",
2763                    "members": [
2764                        {"type": "SYMBOL", "name": "object"},
2765                        {"type": "SYMBOL", "name": "number"}
2766                    ]
2767                },
2768                "object": {"type": "STRING", "value": "{}"},
2769                "number": {"type": "PATTERN", "value": "[0-9]+"}
2770            }
2771        }"#;
2772        let g = Grammar::from_bytes("tiny", bytes).expect("valid grammar");
2773        assert!(
2774            kind_satisfies_symbol(&g, Some("number"), "_value"),
2775            "number is reachable from _value via CHOICE"
2776        );
2777        assert!(
2778            kind_satisfies_symbol(&g, Some("object"), "_value"),
2779            "object is reachable from _value via CHOICE"
2780        );
2781        assert!(
2782            !kind_satisfies_symbol(&g, Some("string"), "_value"),
2783            "string is NOT among the alternatives"
2784        );
2785    }
2786
2787    #[test]
2788    fn first_symbol_skips_string_terminals() {
2789        let prod: Production = serde_json::from_str(
2790            r#"{
2791                "type": "SEQ",
2792                "members": [
2793                    {"type": "STRING", "value": "{"},
2794                    {"type": "SYMBOL", "name": "body"},
2795                    {"type": "STRING", "value": "}"}
2796                ]
2797            }"#,
2798        )
2799        .expect("valid SEQ");
2800        assert_eq!(first_symbol(&prod), Some("body"));
2801    }
2802
2803    #[test]
2804    fn placeholder_for_pattern_routes_by_regex_class() {
2805        assert_eq!(placeholder_for_pattern("[0-9]+"), "0");
2806        assert_eq!(placeholder_for_pattern("[a-zA-Z_]\\w*"), "_x");
2807        assert_eq!(placeholder_for_pattern("\"[^\"]*\""), "\"\"");
2808        assert_eq!(placeholder_for_pattern("\\d+\\.\\d+"), "0");
2809    }
2810
2811    #[test]
2812    fn format_policy_default_breaks_after_semicolon() {
2813        let policy = FormatPolicy::default();
2814        assert!(policy.line_break_after.iter().any(|t| t == ";"));
2815        assert!(policy.indent_open.iter().any(|t| t == "{"));
2816        assert!(policy.indent_close.iter().any(|t| t == "}"));
2817        assert_eq!(policy.indent_width, 2);
2818    }
2819
2820    #[test]
2821    fn placeholder_decodes_literal_pattern_separators() {
2822        // PATTERN regexes that match a single literal byte sequence
2823        // (newline, semicolon, comma) emit the bytes verbatim instead
2824        // of falling through to the `_` catch-all.
2825        assert_eq!(placeholder_for_pattern("\\n"), "\n");
2826        assert_eq!(placeholder_for_pattern("\\r\\n"), "\r\n");
2827        assert_eq!(placeholder_for_pattern(";"), ";");
2828        // Patterns with character classes / alternation still route
2829        // through the heuristic.
2830        assert_eq!(placeholder_for_pattern("[0-9]+"), "0");
2831        assert_eq!(placeholder_for_pattern("a|b"), "_");
2832    }
2833
2834    #[test]
2835    fn supertypes_decode_from_grammar_json_strings() {
2836        // Tree-sitter older grammars list supertypes as bare strings.
2837        let bytes = br#"{
2838            "name": "tiny",
2839            "supertypes": ["expression"],
2840            "rules": {
2841                "expression": {
2842                    "type": "CHOICE",
2843                    "members": [
2844                        {"type": "SYMBOL", "name": "binary_expression"},
2845                        {"type": "SYMBOL", "name": "identifier"}
2846                    ]
2847                },
2848                "binary_expression": {"type": "STRING", "value": "x"},
2849                "identifier": {"type": "PATTERN", "value": "[a-z]+"}
2850            }
2851        }"#;
2852        let g = Grammar::from_bytes("tiny", bytes).expect("parse");
2853        assert!(g.supertypes.contains("expression"));
2854        // identifier matches the supertype `expression`.
2855        assert!(kind_satisfies_symbol(&g, Some("identifier"), "expression"));
2856        // unrelated kinds do not.
2857        assert!(!kind_satisfies_symbol(&g, Some("string"), "expression"));
2858    }
2859
2860    #[test]
2861    fn supertypes_decode_from_grammar_json_objects() {
2862        // Recent grammars list supertypes as `{type: SYMBOL, name: ...}`
2863        // entries instead of bare strings.
2864        let bytes = br#"{
2865            "name": "tiny",
2866            "supertypes": [{"type": "SYMBOL", "name": "stmt"}],
2867            "rules": {
2868                "stmt": {
2869                    "type": "CHOICE",
2870                    "members": [
2871                        {"type": "SYMBOL", "name": "while_stmt"},
2872                        {"type": "SYMBOL", "name": "if_stmt"}
2873                    ]
2874                },
2875                "while_stmt": {"type": "STRING", "value": "while"},
2876                "if_stmt": {"type": "STRING", "value": "if"}
2877            }
2878        }"#;
2879        let g = Grammar::from_bytes("tiny", bytes).expect("parse");
2880        assert!(g.supertypes.contains("stmt"));
2881        assert!(kind_satisfies_symbol(&g, Some("while_stmt"), "stmt"));
2882    }
2883
2884    #[test]
2885    fn alias_value_matches_kind() {
2886        // A named ALIAS rewrites the parser-visible kind to `value`;
2887        // `kind_satisfies_symbol` should accept that rewritten kind
2888        // when looking up the original SYMBOL.
2889        let bytes = br#"{
2890            "name": "tiny",
2891            "rules": {
2892                "_package_identifier": {
2893                    "type": "ALIAS",
2894                    "named": true,
2895                    "value": "package_identifier",
2896                    "content": {"type": "SYMBOL", "name": "identifier"}
2897                },
2898                "identifier": {"type": "PATTERN", "value": "[a-z]+"}
2899            }
2900        }"#;
2901        let g = Grammar::from_bytes("tiny", bytes).expect("parse");
2902        assert!(kind_satisfies_symbol(
2903            &g,
2904            Some("package_identifier"),
2905            "_package_identifier"
2906        ));
2907    }
2908
2909    #[test]
2910    fn referenced_symbols_walks_nested_seq() {
2911        let prod: Production = serde_json::from_str(
2912            r#"{
2913                "type": "SEQ",
2914                "members": [
2915                    {"type": "CHOICE", "members": [
2916                        {"type": "SYMBOL", "name": "attribute_item"},
2917                        {"type": "BLANK"}
2918                    ]},
2919                    {"type": "SYMBOL", "name": "parameter"},
2920                    {"type": "REPEAT", "content": {
2921                        "type": "SEQ",
2922                        "members": [
2923                            {"type": "STRING", "value": ","},
2924                            {"type": "SYMBOL", "name": "parameter"}
2925                        ]
2926                    }}
2927                ]
2928            }"#,
2929        )
2930        .expect("seq");
2931        let symbols = referenced_symbols(&prod);
2932        assert!(symbols.contains(&"attribute_item"));
2933        assert!(symbols.contains(&"parameter"));
2934    }
2935
2936    #[test]
2937    fn literal_strings_collects_choice_members() {
2938        let prod: Production = serde_json::from_str(
2939            r#"{
2940                "type": "CHOICE",
2941                "members": [
2942                    {"type": "STRING", "value": "+"},
2943                    {"type": "STRING", "value": "-"},
2944                    {"type": "STRING", "value": "*"}
2945                ]
2946            }"#,
2947        )
2948        .expect("choice");
2949        let strings = literal_strings(&prod);
2950        assert_eq!(strings, vec!["+", "-", "*"]);
2951    }
2952
2953    /// The ocaml and javascript grammars (tree-sitter ≥ 0.25) emit a
2954    /// `RESERVED` rule kind that an earlier deserialiser rejected
2955    /// with `unknown variant "RESERVED"`. Verify both that the bare
2956    /// variant deserialises and that a `RESERVED`-wrapped grammar is
2957    /// loadable end-to-end via [`Grammar::from_bytes`].
2958    #[test]
2959    fn reserved_variant_deserialises() {
2960        let prod: Production = serde_json::from_str(
2961            r#"{
2962                "type": "RESERVED",
2963                "content": {"type": "SYMBOL", "name": "_lowercase_identifier"},
2964                "context_name": "attribute_id"
2965            }"#,
2966        )
2967        .expect("RESERVED parses");
2968        match prod {
2969            Production::Reserved { content, .. } => match *content {
2970                Production::Symbol { name } => assert_eq!(name, "_lowercase_identifier"),
2971                other => panic!("expected inner SYMBOL, got {other:?}"),
2972            },
2973            other => panic!("expected RESERVED, got {other:?}"),
2974        }
2975    }
2976
2977    #[test]
2978    fn reserved_grammar_loads_end_to_end() {
2979        let bytes = br#"{
2980            "name": "tiny_reserved",
2981            "rules": {
2982                "program": {
2983                    "type": "RESERVED",
2984                    "content": {"type": "SYMBOL", "name": "ident"},
2985                    "context_name": "keywords"
2986                },
2987                "ident": {"type": "PATTERN", "value": "[a-z]+"}
2988            }
2989        }"#;
2990        let g = Grammar::from_bytes("tiny_reserved", bytes).expect("RESERVED-using grammar loads");
2991        assert!(g.rules.contains_key("program"));
2992    }
2993
2994    #[test]
2995    fn reserved_walker_helpers_recurse_into_content() {
2996        // The walker's helpers (first_symbol, has_field_in,
2997        // referenced_symbols, ...) all need to descend through
2998        // RESERVED into its content. If they bail at RESERVED, the
2999        // `pick_choice_with_cursor` heuristic ranks the alt below
3000        // alts that DO recurse, which produces wrong emit output
3001        // even when the deserialiser doesn't crash.
3002        let prod: Production = serde_json::from_str(
3003            r#"{
3004                "type": "RESERVED",
3005                "content": {
3006                    "type": "FIELD",
3007                    "name": "lhs",
3008                    "content": {"type": "SYMBOL", "name": "expr"}
3009                },
3010                "context_name": "ctx"
3011            }"#,
3012        )
3013        .expect("nested RESERVED parses");
3014        assert_eq!(first_symbol(&prod), Some("expr"));
3015        assert!(has_field_in(&prod, &["lhs"]));
3016        let symbols = referenced_symbols(&prod);
3017        assert!(symbols.contains(&"expr"));
3018    }
3019
3020    // -- Yield-set tests --
3021
3022    fn yield_of(grammar: &Grammar, prod: &Production) -> std::collections::HashSet<String> {
3023        let mut visited = std::collections::HashSet::new();
3024        let mut cache = grammar.yield_sets.clone();
3025        yield_of_production(grammar, prod, &mut visited, &mut cache)
3026    }
3027
3028    #[test]
3029    fn yield_set_seq_only_first_member() {
3030        let prod: Production = serde_json::from_str(
3031            r#"{
3032                "type": "SEQ",
3033                "members": [
3034                    {"type": "SYMBOL", "name": "identifier"},
3035                    {"type": "STRING", "value": "as"},
3036                    {"type": "SYMBOL", "name": "target"}
3037                ]
3038            }"#,
3039        )
3040        .expect("valid SEQ");
3041        let g = Grammar::from_bytes("test", b"{}").unwrap_or_else(|_| {
3042            serde_json::from_str::<Grammar>(r#"{"name":"t","rules":{}}"#).unwrap()
3043        });
3044        let ys = yield_of(&g, &prod);
3045        assert!(ys.contains("identifier"), "SEQ yields first member");
3046        assert!(
3047            !ys.contains("target"),
3048            "SEQ must NOT yield non-first members"
3049        );
3050    }
3051
3052    #[test]
3053    fn yield_set_choice_union() {
3054        let prod: Production = serde_json::from_str(
3055            r#"{
3056                "type": "CHOICE",
3057                "members": [
3058                    {"type": "SYMBOL", "name": "a"},
3059                    {"type": "SYMBOL", "name": "b"}
3060                ]
3061            }"#,
3062        )
3063        .expect("valid CHOICE");
3064        let g = serde_json::from_str::<Grammar>(r#"{"name":"t","rules":{}}"#).unwrap();
3065        let ys = yield_of(&g, &prod);
3066        assert_eq!(ys.len(), 2);
3067        assert!(ys.contains("a"));
3068        assert!(ys.contains("b"));
3069    }
3070
3071    #[test]
3072    fn yield_set_hidden_expansion() {
3073        let g = serde_json::from_str::<Grammar>(
3074            r#"{"name":"t","rules":{
3075                "_value": {
3076                    "type": "CHOICE",
3077                    "members": [
3078                        {"type": "SYMBOL", "name": "number"},
3079                        {"type": "SYMBOL", "name": "object"}
3080                    ]
3081                }
3082            }}"#,
3083        )
3084        .unwrap();
3085        let mut g = g;
3086        g.subtypes = compute_subtype_closure(&g);
3087        g.yield_sets = compute_yield_sets(&g);
3088        let sym: Production =
3089            serde_json::from_str(r#"{"type": "SYMBOL", "name": "_value"}"#).unwrap();
3090        let ys = yield_of(&g, &sym);
3091        assert!(
3092            ys.contains("number"),
3093            "hidden rule expands into its CHOICE members"
3094        );
3095        assert!(ys.contains("object"));
3096        assert!(
3097            !ys.contains("_value"),
3098            "hidden rule name is not in yield set"
3099        );
3100    }
3101
3102    #[test]
3103    fn yield_set_optional_includes_epsilon() {
3104        let prod: Production = serde_json::from_str(
3105            r#"{"type": "OPTIONAL", "content": {"type": "SYMBOL", "name": "x"}}"#,
3106        )
3107        .unwrap();
3108        let g = serde_json::from_str::<Grammar>(r#"{"name":"t","rules":{}}"#).unwrap();
3109        let ys = yield_of(&g, &prod);
3110        assert!(ys.contains("x"));
3111        assert!(ys.contains(""), "OPTIONAL includes epsilon");
3112    }
3113
3114    #[test]
3115    fn yield_set_alias_uses_value() {
3116        let prod: Production = serde_json::from_str(
3117            r#"{"type": "ALIAS", "content": {"type": "SYMBOL", "name": "real"},
3118                "named": true, "value": "alias_name"}"#,
3119        )
3120        .unwrap();
3121        let g = serde_json::from_str::<Grammar>(r#"{"name":"t","rules":{}}"#).unwrap();
3122        let ys = yield_of(&g, &prod);
3123        assert_eq!(ys.len(), 1);
3124        assert!(ys.contains("alias_name"), "named ALIAS yields its value");
3125    }
3126}