plotnik_lib/bytecode/
dump.rs

1//! Human-readable bytecode dump for debugging and documentation.
2//!
3//! See `docs/binary-format/07-dump-format.md` for the output format specification.
4
5use std::collections::BTreeMap;
6use std::fmt::Write as _;
7
8use crate::colors::Colors;
9
10use super::format::{LineBuilder, Symbol, format_effect, nav_symbol, width_for_count};
11use super::ids::TypeId;
12use super::instructions::StepId;
13use super::ir::NodeTypeIR;
14use super::module::{Instruction, Module};
15use super::type_meta::{TypeData, TypeKind};
16use super::{Call, Match, Return, Trampoline};
17
18/// Generate a human-readable dump of the bytecode module.
19pub fn dump(module: &Module, colors: Colors) -> String {
20    let mut out = String::new();
21    let ctx = DumpContext::new(module, colors);
22
23    dump_header(&mut out, module, &ctx);
24    dump_strings(&mut out, module, &ctx);
25    dump_types_defs(&mut out, module, &ctx);
26    dump_types_members(&mut out, module, &ctx);
27    dump_types_names(&mut out, module, &ctx);
28    dump_entrypoints(&mut out, module, &ctx);
29    dump_code(&mut out, module, &ctx);
30
31    out
32}
33
34fn dump_header(out: &mut String, module: &Module, ctx: &DumpContext) {
35    let c = &ctx.colors;
36    let header = module.header();
37    writeln!(out, "{}[flags]{}", c.blue, c.reset).unwrap();
38    writeln!(out, "linked = {}", header.is_linked()).unwrap();
39    out.push('\n');
40}
41
42/// Context for dump formatting, precomputes lookups for O(1) access.
43struct DumpContext {
44    /// Whether the bytecode is linked (contains grammar IDs vs StringIds).
45    is_linked: bool,
46    /// Maps step ID to entrypoint name for labeling.
47    step_labels: BTreeMap<u16, String>,
48    /// Maps node type ID to name (linked mode only).
49    node_type_names: BTreeMap<u16, String>,
50    /// Maps node field ID to name (linked mode only).
51    node_field_names: BTreeMap<u16, String>,
52    /// All strings (for unlinked mode lookups).
53    all_strings: Vec<String>,
54    /// Width for string indices (S#).
55    str_width: usize,
56    /// Width for type indices (T#).
57    type_width: usize,
58    /// Width for member indices (M#).
59    member_width: usize,
60    /// Width for name indices (N#).
61    name_width: usize,
62    /// Width for step indices.
63    step_width: usize,
64    /// Color palette.
65    colors: Colors,
66}
67
68impl DumpContext {
69    fn new(module: &Module, colors: Colors) -> Self {
70        let header = module.header();
71        let is_linked = header.is_linked();
72        let strings = module.strings();
73        let entrypoints = module.entrypoints();
74        let node_types = module.node_types();
75        let node_fields = module.node_fields();
76
77        let mut step_labels = BTreeMap::new();
78        // Preamble always at step 0 (first in layout)
79        step_labels.insert(0, "_ObjWrap".to_string());
80        for i in 0..entrypoints.len() {
81            let ep = entrypoints.get(i);
82            let name = strings.get(ep.name()).to_string();
83            step_labels.insert(ep.target(), name);
84        }
85
86        let mut node_type_names = BTreeMap::new();
87        for i in 0..node_types.len() {
88            let t = node_types.get(i);
89            node_type_names.insert(t.id(), strings.get(t.name()).to_string());
90        }
91
92        let mut node_field_names = BTreeMap::new();
93        for i in 0..node_fields.len() {
94            let f = node_fields.get(i);
95            node_field_names.insert(f.id(), strings.get(f.name()).to_string());
96        }
97
98        // Collect all strings for unlinked mode lookups
99        let str_count = header.str_table_count as usize;
100        let all_strings: Vec<String> = (0..str_count)
101            .map(|i| strings.get_by_index(i).to_string())
102            .collect();
103
104        // Compute widths for index formatting
105        let types = module.types();
106        let type_count = 3 + types.defs_count(); // 3 builtins + custom types
107        let str_width = width_for_count(str_count);
108        let type_width = width_for_count(type_count);
109        let member_width = width_for_count(types.members_count());
110        let name_width = width_for_count(types.names_count());
111        let step_width = width_for_count(header.transitions_count as usize);
112
113        Self {
114            is_linked,
115            step_labels,
116            node_type_names,
117            node_field_names,
118            all_strings,
119            str_width,
120            type_width,
121            member_width,
122            name_width,
123            step_width,
124            colors,
125        }
126    }
127
128    fn label_for(&self, step: StepId) -> Option<&str> {
129        self.step_labels.get(&step.get()).map(|s| s.as_str())
130    }
131
132    /// Get the name for a node type ID.
133    ///
134    /// In linked mode, this looks up the grammar's node type symbol table.
135    /// In unlinked mode, this looks up the StringId from the strings table.
136    fn node_type_name(&self, id: u16) -> Option<&str> {
137        if self.is_linked {
138            self.node_type_names.get(&id).map(|s| s.as_str())
139        } else {
140            // In unlinked mode, id is a StringId
141            self.all_strings.get(id as usize).map(|s| s.as_str())
142        }
143    }
144
145    /// Get the name for a node field ID.
146    ///
147    /// In linked mode, this looks up the grammar's node field symbol table.
148    /// In unlinked mode, this looks up the StringId from the strings table.
149    fn node_field_name(&self, id: u16) -> Option<&str> {
150        if self.is_linked {
151            self.node_field_names.get(&id).map(|s| s.as_str())
152        } else {
153            // In unlinked mode, id is a StringId
154            self.all_strings.get(id as usize).map(|s| s.as_str())
155        }
156    }
157}
158
159fn dump_strings(out: &mut String, module: &Module, ctx: &DumpContext) {
160    let c = &ctx.colors;
161    let strings = module.strings();
162    let count = module.header().str_table_count as usize;
163    let w = ctx.str_width;
164
165    writeln!(out, "{}[strings]{}", c.blue, c.reset).unwrap();
166    for i in 0..count {
167        let s = strings.get_by_index(i);
168        writeln!(out, "S{i:0w$} {}{s:?}{}", c.green, c.reset).unwrap();
169    }
170    out.push('\n');
171}
172
173fn dump_types_defs(out: &mut String, module: &Module, ctx: &DumpContext) {
174    let c = &ctx.colors;
175    let types = module.types();
176    let strings = module.strings();
177    let tw = ctx.type_width;
178    let mw = ctx.member_width;
179
180    writeln!(out, "{}[type_defs]{}", c.blue, c.reset).unwrap();
181
182    // All types are now in type_defs, including builtins
183    for i in 0..types.defs_count() {
184        let def = types.get_def(i);
185
186        let (formatted, comment) = match def.classify() {
187            TypeData::Primitive(kind) => {
188                let name = match kind {
189                    TypeKind::Void => "<Void>",
190                    TypeKind::Node => "<Node>",
191                    TypeKind::String => "<String>",
192                    _ => unreachable!(),
193                };
194                (name.to_string(), String::new())
195            }
196            TypeData::Wrapper { kind, inner } => {
197                let formatted = match kind {
198                    TypeKind::Optional => format!("Optional(T{:0tw$})", inner.0),
199                    TypeKind::ArrayZeroOrMore => format!("ArrayStar(T{:0tw$})", inner.0),
200                    TypeKind::ArrayOneOrMore => format!("ArrayPlus(T{:0tw$})", inner.0),
201                    TypeKind::Alias => format!("Alias(T{:0tw$})", inner.0),
202                    _ => unreachable!(),
203                };
204                let comment = match kind {
205                    TypeKind::Optional => {
206                        let inner_name = format_type_name(inner, module, ctx);
207                        format!("{}  ; {}?{}", c.dim, inner_name, c.reset)
208                    }
209                    TypeKind::ArrayZeroOrMore => {
210                        let inner_name = format_type_name(inner, module, ctx);
211                        format!("{}  ; {}*{}", c.dim, inner_name, c.reset)
212                    }
213                    TypeKind::ArrayOneOrMore => {
214                        let inner_name = format_type_name(inner, module, ctx);
215                        format!("{}  ; {}+{}", c.dim, inner_name, c.reset)
216                    }
217                    TypeKind::Alias => String::new(),
218                    _ => unreachable!(),
219                };
220                (formatted, comment)
221            }
222            TypeData::Composite {
223                kind,
224                member_start,
225                member_count,
226            } => {
227                let formatted = match kind {
228                    TypeKind::Struct => {
229                        format!("Struct  M{:0mw$}:{}", member_start, member_count)
230                    }
231                    TypeKind::Enum => format!("Enum    M{:0mw$}:{}", member_start, member_count),
232                    _ => unreachable!(),
233                };
234                let comment = match kind {
235                    TypeKind::Struct => {
236                        let fields: Vec<_> = types
237                            .members_of(&def)
238                            .map(|m| strings.get(m.name()).to_string())
239                            .collect();
240                        format!("{}  ; {{ {} }}{}", c.dim, fields.join(", "), c.reset)
241                    }
242                    TypeKind::Enum => {
243                        let variants: Vec<_> = types
244                            .members_of(&def)
245                            .map(|m| strings.get(m.name()).to_string())
246                            .collect();
247                        format!("{}  ; {}{}", c.dim, variants.join(" | "), c.reset)
248                    }
249                    _ => unreachable!(),
250                };
251                (formatted, comment)
252            }
253        };
254
255        writeln!(out, "T{i:0tw$} = {formatted}{comment}").unwrap();
256    }
257    out.push('\n');
258}
259
260fn dump_types_members(out: &mut String, module: &Module, ctx: &DumpContext) {
261    let c = &ctx.colors;
262    let types = module.types();
263    let strings = module.strings();
264    let mw = ctx.member_width;
265    let sw = ctx.str_width;
266    let tw = ctx.type_width;
267
268    writeln!(out, "{}[type_members]{}", c.blue, c.reset).unwrap();
269    for i in 0..types.members_count() {
270        let member = types.get_member(i);
271        let name = strings.get(member.name);
272        let type_name = format_type_name(member.type_id, module, ctx);
273        writeln!(
274            out,
275            "M{i:0mw$}: S{:0sw$} → T{:0tw$}  {}; {name}: {type_name}{}",
276            member.name.0, member.type_id.0, c.dim, c.reset
277        )
278        .unwrap();
279    }
280    out.push('\n');
281}
282
283fn dump_types_names(out: &mut String, module: &Module, ctx: &DumpContext) {
284    let c = &ctx.colors;
285    let types = module.types();
286    let strings = module.strings();
287    let nw = ctx.name_width;
288    let sw = ctx.str_width;
289    let tw = ctx.type_width;
290
291    writeln!(out, "{}[type_names]{}", c.blue, c.reset).unwrap();
292    for i in 0..types.names_count() {
293        let entry = types.get_name(i);
294        let name = strings.get(entry.name);
295        writeln!(
296            out,
297            "N{i:0nw$}: S{:0sw$} → T{:0tw$}  {}; {}{name}{}",
298            entry.name.0, entry.type_id.0, c.dim, c.blue, c.reset
299        )
300        .unwrap();
301    }
302    out.push('\n');
303}
304
305/// Format a type ID as a human-readable name.
306fn format_type_name(type_id: TypeId, module: &Module, ctx: &DumpContext) -> String {
307    let types = module.types();
308    let strings = module.strings();
309
310    // Check if it's a primitive type
311    if let Some(def) = types.get(type_id)
312        && let TypeData::Primitive(kind) = def.classify()
313        && let Some(name) = kind.primitive_name()
314    {
315        return format!("<{}>", name);
316    }
317
318    // Try to find a name in types.names
319    for i in 0..types.names_count() {
320        let entry = types.get_name(i);
321        if entry.type_id() == type_id {
322            return strings.get(entry.name()).to_string();
323        }
324    }
325
326    // Fall back to T# format
327    let tw = ctx.type_width;
328    format!("T{:0tw$}", type_id.0)
329}
330
331fn dump_entrypoints(out: &mut String, module: &Module, ctx: &DumpContext) {
332    let c = &ctx.colors;
333    let strings = module.strings();
334    let entrypoints = module.entrypoints();
335    let stw = ctx.step_width;
336    let tw = ctx.type_width;
337
338    writeln!(out, "{}[entrypoints]{}", c.blue, c.reset).unwrap();
339
340    // Collect and sort by name for display
341    let mut entries: Vec<_> = (0..entrypoints.len())
342        .map(|i| {
343            let ep = entrypoints.get(i);
344            let name = strings.get(ep.name());
345            (name, ep.target(), ep.result_type().0)
346        })
347        .collect();
348    entries.sort_by_key(|(name, _, _)| *name);
349
350    // Find max name length for alignment
351    let max_len = entries.iter().map(|(n, _, _)| n.len()).max().unwrap_or(0);
352
353    for (name, target, type_id) in entries {
354        writeln!(
355            out,
356            "{}{name:width$}{} = {:0stw$} :: T{type_id:0tw$}",
357            c.blue,
358            c.reset,
359            target,
360            width = max_len
361        )
362        .unwrap();
363    }
364    out.push('\n');
365}
366
367fn dump_code(out: &mut String, module: &Module, ctx: &DumpContext) {
368    let c = &ctx.colors;
369    let header = module.header();
370    let transitions_count = header.transitions_count as usize;
371    let step_width = ctx.step_width;
372
373    writeln!(out, "{}[transitions]{}", c.blue, c.reset).unwrap();
374
375    let mut step = 0u16;
376    let mut first_label = true;
377    while (step as usize) < transitions_count {
378        // Check if this step has a label (using raw u16)
379        if let Some(label) = ctx.step_labels.get(&step) {
380            if first_label {
381                writeln!(out, "{}{label}{}:", c.blue, c.reset).unwrap();
382                first_label = false;
383            } else {
384                writeln!(out, "\n{}{label}{}:", c.blue, c.reset).unwrap();
385            }
386        }
387
388        let instr = module.decode_step(step);
389        let line = format_instruction(step, &instr, module, ctx, step_width);
390        out.push_str(&line);
391        out.push('\n');
392
393        // Advance by instruction size
394        let size = instruction_step_count(&instr);
395        step += size;
396    }
397}
398
399fn instruction_step_count(instr: &Instruction) -> u16 {
400    match instr {
401        Instruction::Match(m) => {
402            let pre = m.pre_effects().count();
403            let neg = m.neg_fields().count();
404            let post = m.post_effects().count();
405            let succ = m.succ_count();
406            let slots = pre + neg + post + succ;
407
408            if pre == 0 && neg == 0 && post == 0 && succ <= 1 {
409                1 // Match8
410            } else if slots <= 4 {
411                2 // Match16
412            } else if slots <= 8 {
413                3 // Match24
414            } else if slots <= 12 {
415                4 // Match32
416            } else if slots <= 20 {
417                6 // Match48
418            } else {
419                8 // Match64
420            }
421        }
422        Instruction::Call(_) | Instruction::Return(_) | Instruction::Trampoline(_) => 1,
423    }
424}
425
426fn format_instruction(
427    step: u16,
428    instr: &Instruction,
429    module: &Module,
430    ctx: &DumpContext,
431    step_width: usize,
432) -> String {
433    match instr {
434        Instruction::Match(m) => format_match(step, m, module, ctx, step_width),
435        Instruction::Call(c) => format_call(step, c, module, ctx, step_width),
436        Instruction::Return(r) => format_return(step, r, module, ctx, step_width),
437        Instruction::Trampoline(t) => format_trampoline(step, t, ctx, step_width),
438    }
439}
440
441fn format_match(
442    step: u16,
443    m: &Match,
444    _module: &Module,
445    ctx: &DumpContext,
446    step_width: usize,
447) -> String {
448    let builder = LineBuilder::new(step_width);
449    let symbol = nav_symbol(m.nav);
450    let prefix = format!("  {:0sw$} {} ", step, symbol.format(), sw = step_width);
451
452    let content = format_match_content(m, ctx);
453    let successors = format_match_successors(m, ctx, step_width);
454
455    let base = format!("{prefix}{content}");
456    builder.pad_successors(base, &successors)
457}
458
459fn format_match_content(m: &Match, ctx: &DumpContext) -> String {
460    let mut parts = Vec::new();
461
462    let pre: Vec<_> = m.pre_effects().map(|e| format_effect(&e)).collect();
463    if !pre.is_empty() {
464        parts.push(format!("[{}]", pre.join(" ")));
465    }
466
467    // Skip neg_fields and node pattern for epsilon (no node interaction)
468    if !m.is_epsilon() {
469        for field_id in m.neg_fields() {
470            let name = ctx
471                .node_field_name(field_id)
472                .map(String::from)
473                .unwrap_or_else(|| format!("field#{field_id}"));
474            parts.push(format!("-{name}"));
475        }
476
477        let node_part = format_node_pattern(m, ctx);
478        if !node_part.is_empty() {
479            parts.push(node_part);
480        }
481    }
482
483    let post: Vec<_> = m.post_effects().map(|e| format_effect(&e)).collect();
484    if !post.is_empty() {
485        parts.push(format!("[{}]", post.join(" ")));
486    }
487
488    parts.join(" ")
489}
490
491/// Format node pattern: `field: (type)` or `(type)` or `field: _` or `(_)` or `"text"`
492fn format_node_pattern(m: &Match, ctx: &DumpContext) -> String {
493    let mut result = String::new();
494
495    if let Some(field_id) = m.node_field {
496        let name = ctx
497            .node_field_name(field_id.get())
498            .map(String::from)
499            .unwrap_or_else(|| format!("field#{}", field_id.get()));
500        result.push_str(&name);
501        result.push_str(": ");
502    }
503
504    match m.node_type {
505        NodeTypeIR::Any => {
506            // Any node wildcard: `_`
507            result.push('_');
508        }
509        NodeTypeIR::Named(None) => {
510            // Named wildcard: any named node
511            result.push_str("(_)");
512        }
513        NodeTypeIR::Named(Some(type_id)) => {
514            // Specific named node type
515            let name = ctx
516                .node_type_name(type_id.get())
517                .map(String::from)
518                .unwrap_or_else(|| format!("node#{}", type_id.get()));
519            result.push('(');
520            result.push_str(&name);
521            result.push(')');
522        }
523        NodeTypeIR::Anonymous(None) => {
524            // Anonymous wildcard: any anonymous node (future syntax)
525            result.push_str("\"_\"");
526        }
527        NodeTypeIR::Anonymous(Some(type_id)) => {
528            // Specific anonymous node (literal token)
529            let name = ctx
530                .node_type_name(type_id.get())
531                .map(String::from)
532                .unwrap_or_else(|| format!("anon#{}", type_id.get()));
533            result.push('"');
534            result.push_str(&name);
535            result.push('"');
536        }
537    }
538
539    result
540}
541
542fn format_match_successors(m: &Match, ctx: &DumpContext, step_width: usize) -> String {
543    if m.is_terminal() {
544        "◼".to_string()
545    } else {
546        m.successors()
547            .map(|s| format_step(s, ctx, step_width))
548            .collect::<Vec<_>>()
549            .join(", ")
550    }
551}
552
553fn format_call(
554    step: u16,
555    call: &Call,
556    _module: &Module,
557    ctx: &DumpContext,
558    step_width: usize,
559) -> String {
560    let c = &ctx.colors;
561    let builder = LineBuilder::new(step_width);
562    let symbol = nav_symbol(call.nav());
563    let prefix = format!("  {:0sw$} {} ", step, symbol.format(), sw = step_width);
564
565    // Format field constraint if present
566    let field_part = if let Some(field_id) = call.node_field {
567        let name = ctx
568            .node_field_name(field_id.get())
569            .map(String::from)
570            .unwrap_or_else(|| format!("field#{}", field_id.get()));
571        format!("{name}: ")
572    } else {
573        String::new()
574    };
575
576    let target_name = ctx
577        .label_for(call.target)
578        .map(String::from)
579        .unwrap_or_else(|| format!("@{:0w$}", call.target.0, w = step_width));
580    // Definition name in call is blue
581    let content = format!("{field_part}({}{}{})", c.blue, target_name, c.reset);
582    // Format as "target : return" with numeric IDs
583    let successors = format!(
584        "{:0w$} : {:0w$}",
585        call.target.get(),
586        call.next.get(),
587        w = step_width
588    );
589
590    let base = format!("{prefix}{content}");
591    builder.pad_successors(base, &successors)
592}
593
594fn format_return(
595    step: u16,
596    _r: &Return,
597    _module: &Module,
598    _ctx: &DumpContext,
599    step_width: usize,
600) -> String {
601    let builder = LineBuilder::new(step_width);
602    let prefix = format!(
603        "  {:0sw$} {} ",
604        step,
605        Symbol::EMPTY.format(),
606        sw = step_width
607    );
608    builder.pad_successors(prefix, "▶")
609}
610
611fn format_trampoline(step: u16, t: &Trampoline, _ctx: &DumpContext, step_width: usize) -> String {
612    let builder = LineBuilder::new(step_width);
613    let prefix = format!(
614        "  {:0sw$} {} ",
615        step,
616        Symbol::EMPTY.format(),
617        sw = step_width
618    );
619    let content = "Trampoline";
620    let successors = format!("{:0w$}", t.next.get(), w = step_width);
621    let base = format!("{prefix}{content}");
622    builder.pad_successors(base, &successors)
623}
624
625/// Format a step ID, showing entrypoint label or numeric ID.
626fn format_step(step: StepId, ctx: &DumpContext, step_width: usize) -> String {
627    let c = &ctx.colors;
628    if let Some(label) = ctx.label_for(step) {
629        format!("▶({}{}{})", c.blue, label, c.reset)
630    } else {
631        format!("{:0w$}", step.get(), w = step_width)
632    }
633}