plotnik_bytecode/bytecode/
dump.rs

1//! Human-readable bytecode dump for debugging and documentation.
2//!
3//! See `docs/binary-format/07-dump-format.md` for the output format specification.
4
5use std::collections::BTreeMap;
6use std::fmt::Write as _;
7
8use crate::predicate_op::PredicateOp;
9use plotnik_core::Colors;
10
11use super::format::{LineBuilder, Symbol, format_effect, nav_symbol, width_for_count};
12use super::ids::TypeId;
13use super::instructions::StepId;
14use super::module::{Instruction, Module};
15use super::nav::Nav;
16use super::node_type_ir::NodeTypeIR;
17use super::type_meta::{TypeData, TypeKind};
18use super::{Call, Match, Return, Trampoline};
19
20/// Generate a human-readable dump of the bytecode module.
21pub fn dump(module: &Module, colors: Colors) -> String {
22    let mut out = String::new();
23    let ctx = DumpContext::new(module, colors);
24
25    dump_strings(&mut out, module, &ctx);
26    dump_types_defs(&mut out, module, &ctx);
27    dump_types_members(&mut out, module, &ctx);
28    dump_types_names(&mut out, module, &ctx);
29    dump_entrypoints(&mut out, module, &ctx);
30    dump_code(&mut out, module, &ctx);
31
32    out
33}
34
35/// Context for dump formatting, precomputes lookups for O(1) access.
36struct DumpContext {
37    /// Maps step ID to entrypoint name for labeling.
38    step_labels: BTreeMap<u16, String>,
39    /// Maps node type ID to name.
40    node_type_names: BTreeMap<u16, String>,
41    /// Maps node field ID to name.
42    node_field_names: BTreeMap<u16, String>,
43    /// All strings (for predicate values, regex patterns, etc).
44    all_strings: Vec<String>,
45    /// Width for string indices (S#).
46    str_width: usize,
47    /// Width for type indices (T#).
48    type_width: usize,
49    /// Width for member indices (M#).
50    member_width: usize,
51    /// Width for name indices (N#).
52    name_width: usize,
53    /// Width for step indices.
54    step_width: usize,
55    /// Color palette.
56    colors: Colors,
57}
58
59impl DumpContext {
60    fn new(module: &Module, colors: Colors) -> Self {
61        let header = module.header();
62        let strings = module.strings();
63        let entrypoints = module.entrypoints();
64        let node_types = module.node_types();
65        let node_fields = module.node_fields();
66
67        let mut step_labels = BTreeMap::new();
68        // Preamble always at step 0 (first in layout)
69        step_labels.insert(0, "_ObjWrap".to_string());
70        for i in 0..entrypoints.len() {
71            let ep = entrypoints.get(i);
72            let name = strings.get(ep.name()).to_string();
73            step_labels.insert(ep.target(), name);
74        }
75
76        let mut node_type_names = BTreeMap::new();
77        for i in 0..node_types.len() {
78            let t = node_types.get(i);
79            node_type_names.insert(t.id, strings.get(t.name).to_string());
80        }
81
82        let mut node_field_names = BTreeMap::new();
83        for i in 0..node_fields.len() {
84            let f = node_fields.get(i);
85            node_field_names.insert(f.id, strings.get(f.name).to_string());
86        }
87
88        // Collect all strings for unlinked mode lookups
89        let str_count = header.str_table_count as usize;
90        let all_strings: Vec<String> = (0..str_count)
91            .map(|i| strings.get_by_index(i).to_string())
92            .collect();
93
94        // Compute widths for index formatting
95        let types = module.types();
96        let type_count = 3 + types.defs_count(); // 3 builtins + custom types
97        let str_width = width_for_count(str_count);
98        let type_width = width_for_count(type_count);
99        let member_width = width_for_count(types.members_count());
100        let name_width = width_for_count(types.names_count());
101        let step_width = width_for_count(header.transitions_count as usize);
102
103        Self {
104            step_labels,
105            node_type_names,
106            node_field_names,
107            all_strings,
108            str_width,
109            type_width,
110            member_width,
111            name_width,
112            step_width,
113            colors,
114        }
115    }
116
117    fn label_for(&self, step: StepId) -> Option<&str> {
118        self.step_labels.get(&step.get()).map(|s| s.as_str())
119    }
120
121    /// Get the name for a node type ID.
122    fn node_type_name(&self, id: u16) -> Option<&str> {
123        self.node_type_names.get(&id).map(|s| s.as_str())
124    }
125
126    /// Get the name for a node field ID.
127    fn node_field_name(&self, id: u16) -> Option<&str> {
128        self.node_field_names.get(&id).map(|s| s.as_str())
129    }
130}
131
132fn dump_strings(out: &mut String, module: &Module, ctx: &DumpContext) {
133    let c = &ctx.colors;
134    let strings = module.strings();
135    let count = module.header().str_table_count as usize;
136    let w = ctx.str_width;
137
138    writeln!(out, "{}[strings]{}", c.blue, c.reset).unwrap();
139    for i in 0..count {
140        let s = strings.get_by_index(i);
141        writeln!(out, "S{i:0w$} {}{s:?}{}", c.green, c.reset).unwrap();
142    }
143    out.push('\n');
144}
145
146fn dump_types_defs(out: &mut String, module: &Module, ctx: &DumpContext) {
147    let c = &ctx.colors;
148    let types = module.types();
149    let strings = module.strings();
150    let tw = ctx.type_width;
151    let mw = ctx.member_width;
152
153    writeln!(out, "{}[type_defs]{}", c.blue, c.reset).unwrap();
154
155    // All types are now in type_defs, including builtins
156    for i in 0..types.defs_count() {
157        let def = types.get_def(i);
158
159        let (formatted, comment) = match def.classify() {
160            TypeData::Primitive(kind) => {
161                let name = match kind {
162                    TypeKind::Void => "<Void>",
163                    TypeKind::Node => "<Node>",
164                    TypeKind::String => "<String>",
165                    _ => unreachable!(),
166                };
167                (name.to_string(), String::new())
168            }
169            TypeData::Wrapper { kind, inner } => {
170                let formatted = match kind {
171                    TypeKind::Optional => format!("Optional(T{:0tw$})", inner.0),
172                    TypeKind::ArrayZeroOrMore => format!("ArrayStar(T{:0tw$})", inner.0),
173                    TypeKind::ArrayOneOrMore => format!("ArrayPlus(T{:0tw$})", inner.0),
174                    TypeKind::Alias => format!("Alias(T{:0tw$})", inner.0),
175                    _ => unreachable!(),
176                };
177                let comment = match kind {
178                    TypeKind::Optional => {
179                        let inner_name = format_type_name(inner, module, ctx);
180                        format!("{}  ; {}?{}", c.dim, inner_name, c.reset)
181                    }
182                    TypeKind::ArrayZeroOrMore => {
183                        let inner_name = format_type_name(inner, module, ctx);
184                        format!("{}  ; {}*{}", c.dim, inner_name, c.reset)
185                    }
186                    TypeKind::ArrayOneOrMore => {
187                        let inner_name = format_type_name(inner, module, ctx);
188                        format!("{}  ; {}+{}", c.dim, inner_name, c.reset)
189                    }
190                    TypeKind::Alias => String::new(),
191                    _ => unreachable!(),
192                };
193                (formatted, comment)
194            }
195            TypeData::Composite {
196                kind,
197                member_start,
198                member_count,
199            } => {
200                let formatted = match kind {
201                    TypeKind::Struct => {
202                        format!("Struct  M{:0mw$}:{}", member_start, member_count)
203                    }
204                    TypeKind::Enum => format!("Enum    M{:0mw$}:{}", member_start, member_count),
205                    _ => unreachable!(),
206                };
207                let comment = match kind {
208                    TypeKind::Struct => {
209                        let fields: Vec<_> = types
210                            .members_of(&def)
211                            .map(|m| strings.get(m.name).to_string())
212                            .collect();
213                        format!("{}  ; {{ {} }}{}", c.dim, fields.join(", "), c.reset)
214                    }
215                    TypeKind::Enum => {
216                        let variants: Vec<_> = types
217                            .members_of(&def)
218                            .map(|m| strings.get(m.name).to_string())
219                            .collect();
220                        format!("{}  ; {}{}", c.dim, variants.join(" | "), c.reset)
221                    }
222                    _ => unreachable!(),
223                };
224                (formatted, comment)
225            }
226        };
227
228        writeln!(out, "T{i:0tw$} = {formatted}{comment}").unwrap();
229    }
230    out.push('\n');
231}
232
233fn dump_types_members(out: &mut String, module: &Module, ctx: &DumpContext) {
234    let c = &ctx.colors;
235    let types = module.types();
236    let strings = module.strings();
237    let mw = ctx.member_width;
238    let sw = ctx.str_width;
239    let tw = ctx.type_width;
240
241    writeln!(out, "{}[type_members]{}", c.blue, c.reset).unwrap();
242    for i in 0..types.members_count() {
243        let member = types.get_member(i);
244        let name = strings.get(member.name);
245        let type_name = format_type_name(member.type_id, module, ctx);
246        writeln!(
247            out,
248            "M{i:0mw$}: S{:0sw$} → T{:0tw$}  {}; {name}: {type_name}{}",
249            member.name.0, member.type_id.0, c.dim, c.reset
250        )
251        .unwrap();
252    }
253    out.push('\n');
254}
255
256fn dump_types_names(out: &mut String, module: &Module, ctx: &DumpContext) {
257    let c = &ctx.colors;
258    let types = module.types();
259    let strings = module.strings();
260    let nw = ctx.name_width;
261    let sw = ctx.str_width;
262    let tw = ctx.type_width;
263
264    writeln!(out, "{}[type_names]{}", c.blue, c.reset).unwrap();
265    for i in 0..types.names_count() {
266        let entry = types.get_name(i);
267        let name = strings.get(entry.name);
268        writeln!(
269            out,
270            "N{i:0nw$}: S{:0sw$} → T{:0tw$}  {}; {}{name}{}",
271            entry.name.0, entry.type_id.0, c.dim, c.blue, c.reset
272        )
273        .unwrap();
274    }
275    out.push('\n');
276}
277
278/// Format a type ID as a human-readable name.
279fn format_type_name(type_id: TypeId, module: &Module, ctx: &DumpContext) -> String {
280    let types = module.types();
281    let strings = module.strings();
282
283    // Check if it's a primitive type
284    if let Some(def) = types.get(type_id)
285        && let TypeData::Primitive(kind) = def.classify()
286        && let Some(name) = kind.primitive_name()
287    {
288        return format!("<{}>", name);
289    }
290
291    // Try to find a name in types.names
292    for i in 0..types.names_count() {
293        let entry = types.get_name(i);
294        if entry.type_id == type_id {
295            return strings.get(entry.name).to_string();
296        }
297    }
298
299    // Fall back to T# format
300    let tw = ctx.type_width;
301    format!("T{:0tw$}", type_id.0)
302}
303
304fn dump_entrypoints(out: &mut String, module: &Module, ctx: &DumpContext) {
305    let c = &ctx.colors;
306    let strings = module.strings();
307    let entrypoints = module.entrypoints();
308    let stw = ctx.step_width;
309    let tw = ctx.type_width;
310
311    writeln!(out, "{}[entrypoints]{}", c.blue, c.reset).unwrap();
312
313    // Collect and sort by name for display
314    let mut entries: Vec<_> = (0..entrypoints.len())
315        .map(|i| {
316            let ep = entrypoints.get(i);
317            let name = strings.get(ep.name());
318            (name, ep.target(), ep.result_type().0)
319        })
320        .collect();
321    entries.sort_by_key(|(name, _, _)| *name);
322
323    // Find max name length for alignment
324    let max_len = entries.iter().map(|(n, _, _)| n.len()).max().unwrap_or(0);
325
326    for (name, target, type_id) in entries {
327        writeln!(
328            out,
329            "{}{name:width$}{} = {:0stw$} :: T{type_id:0tw$}",
330            c.blue,
331            c.reset,
332            target,
333            width = max_len
334        )
335        .unwrap();
336    }
337    out.push('\n');
338}
339
340/// Check if an instruction is padding (all-zeros Match8).
341///
342/// Padding slots contain zero bytes which decode as terminal epsilon Match8
343/// with Any node type, no field constraint, and next=0.
344fn is_padding(instr: &Instruction) -> bool {
345    match instr {
346        Instruction::Match(m) => {
347            m.is_match8()
348                && m.nav == Nav::Epsilon
349                && matches!(m.node_type, NodeTypeIR::Any)
350                && m.node_field.is_none()
351                && m.is_terminal()
352        }
353        _ => false,
354    }
355}
356
357/// Format a single padding step line.
358///
359/// Output: `  07  ... ` (step number and " ... " in symbol column)
360fn format_padding_step(step: u16, step_width: usize) -> String {
361    LineBuilder::new(step_width).instruction_prefix(step, Symbol::PADDING)
362}
363
364fn dump_code(out: &mut String, module: &Module, ctx: &DumpContext) {
365    let c = &ctx.colors;
366    let header = module.header();
367    let transitions_count = header.transitions_count as usize;
368    let step_width = ctx.step_width;
369
370    writeln!(out, "{}[transitions]{}", c.blue, c.reset).unwrap();
371
372    let mut step = 0u16;
373    let mut first_label = true;
374    while (step as usize) < transitions_count {
375        // Check if this step has a label (using raw u16)
376        if let Some(label) = ctx.step_labels.get(&step) {
377            if first_label {
378                writeln!(out, "{}{label}{}:", c.blue, c.reset).unwrap();
379                first_label = false;
380            } else {
381                writeln!(out, "\n{}{label}{}:", c.blue, c.reset).unwrap();
382            }
383        }
384
385        let instr = module.decode_step(step);
386
387        // Check for padding (all-zeros Match8 instruction)
388        if is_padding(&instr) {
389            writeln!(out, "{}", format_padding_step(step, step_width)).unwrap();
390            step += 1;
391            continue;
392        }
393
394        let line = format_instruction(step, &instr, module, ctx, step_width);
395        out.push_str(&line);
396        out.push('\n');
397
398        // Advance by instruction size
399        let size = instruction_step_count(&instr);
400        step += size;
401    }
402}
403
404fn instruction_step_count(instr: &Instruction) -> u16 {
405    match instr {
406        Instruction::Match(m) => {
407            let pre = m.pre_effects().count();
408            let neg = m.neg_fields().count();
409            let post = m.post_effects().count();
410            let succ = m.succ_count();
411            let pred = if m.has_predicate() { 2 } else { 0 };
412            let slots = pre + neg + post + pred + succ;
413
414            if pre == 0 && neg == 0 && post == 0 && pred == 0 && succ <= 1 {
415                1 // Match8
416            } else if slots <= 4 {
417                2 // Match16
418            } else if slots <= 8 {
419                3 // Match24
420            } else if slots <= 12 {
421                4 // Match32
422            } else if slots <= 20 {
423                6 // Match48
424            } else {
425                8 // Match64
426            }
427        }
428        Instruction::Call(_) | Instruction::Return(_) | Instruction::Trampoline(_) => 1,
429    }
430}
431
432fn format_instruction(
433    step: u16,
434    instr: &Instruction,
435    module: &Module,
436    ctx: &DumpContext,
437    step_width: usize,
438) -> String {
439    match instr {
440        Instruction::Match(m) => format_match(step, m, module, ctx, step_width),
441        Instruction::Call(c) => format_call(step, c, module, ctx, step_width),
442        Instruction::Return(r) => format_return(step, r, module, ctx, step_width),
443        Instruction::Trampoline(t) => format_trampoline(step, t, ctx, step_width),
444    }
445}
446
447fn format_match(
448    step: u16,
449    m: &Match,
450    module: &Module,
451    ctx: &DumpContext,
452    step_width: usize,
453) -> String {
454    let builder = LineBuilder::new(step_width);
455    let symbol = nav_symbol(m.nav);
456    let prefix = format!("  {:0sw$} {} ", step, symbol.format(), sw = step_width);
457
458    let content = format_match_content(m, module, ctx);
459    let successors = format_match_successors(m, ctx, step_width);
460
461    let base = format!("{prefix}{content}");
462    builder.pad_successors(base, &successors)
463}
464
465fn format_match_content(m: &Match, module: &Module, ctx: &DumpContext) -> String {
466    let mut parts = Vec::new();
467
468    let pre: Vec<_> = m.pre_effects().map(|e| format_effect(&e)).collect();
469    if !pre.is_empty() {
470        parts.push(format!("[{}]", pre.join(" ")));
471    }
472
473    // Skip neg_fields and node pattern for epsilon (no node interaction)
474    if !m.is_epsilon() {
475        for field_id in m.neg_fields() {
476            let name = ctx
477                .node_field_name(field_id)
478                .map(String::from)
479                .unwrap_or_else(|| format!("field#{field_id}"));
480            parts.push(format!("-{name}"));
481        }
482
483        let node_part = format_node_pattern(m, ctx);
484        if !node_part.is_empty() {
485            parts.push(node_part);
486        }
487
488        // Format predicate if present
489        if let Some((op, is_regex, value_ref)) = m.predicate() {
490            let op = PredicateOp::from_byte(op);
491            let value = if is_regex {
492                let string_id = module.regexes().get_string_id(value_ref as usize);
493                let pattern = &ctx.all_strings[string_id.get() as usize];
494                format!("/{}/", pattern)
495            } else {
496                let s = &ctx.all_strings[value_ref as usize];
497                format!("{:?}", s)
498            };
499            parts.push(format!("{} {}", op.as_str(), value));
500        }
501    }
502
503    let post: Vec<_> = m.post_effects().map(|e| format_effect(&e)).collect();
504    if !post.is_empty() {
505        parts.push(format!("[{}]", post.join(" ")));
506    }
507
508    parts.join(" ")
509}
510
511/// Format node pattern: `field: (type)` or `(type)` or `field: _` or `(_)` or `"text"`
512fn format_node_pattern(m: &Match, ctx: &DumpContext) -> String {
513    let mut result = String::new();
514
515    if let Some(field_id) = m.node_field {
516        let name = ctx
517            .node_field_name(field_id.get())
518            .map(String::from)
519            .unwrap_or_else(|| format!("field#{}", field_id.get()));
520        result.push_str(&name);
521        result.push_str(": ");
522    }
523
524    match m.node_type {
525        NodeTypeIR::Any => {
526            // Any node wildcard: `_`
527            result.push('_');
528        }
529        NodeTypeIR::Named(None) => {
530            // Named wildcard: any named node
531            result.push_str("(_)");
532        }
533        NodeTypeIR::Named(Some(type_id)) => {
534            // Specific named node type
535            let name = ctx
536                .node_type_name(type_id.get())
537                .map(String::from)
538                .unwrap_or_else(|| format!("node#{}", type_id.get()));
539            result.push('(');
540            result.push_str(&name);
541            result.push(')');
542        }
543        NodeTypeIR::Anonymous(None) => {
544            // Anonymous wildcard: any anonymous node (future syntax)
545            result.push_str("\"_\"");
546        }
547        NodeTypeIR::Anonymous(Some(type_id)) => {
548            // Specific anonymous node (literal token)
549            let name = ctx
550                .node_type_name(type_id.get())
551                .map(String::from)
552                .unwrap_or_else(|| format!("anon#{}", type_id.get()));
553            result.push('"');
554            result.push_str(&name);
555            result.push('"');
556        }
557    }
558
559    result
560}
561
562fn format_match_successors(m: &Match, ctx: &DumpContext, step_width: usize) -> String {
563    if m.is_terminal() {
564        "◼".to_string()
565    } else {
566        m.successors()
567            .map(|s| format_step(s, ctx, step_width))
568            .collect::<Vec<_>>()
569            .join(", ")
570    }
571}
572
573fn format_call(
574    step: u16,
575    call: &Call,
576    _module: &Module,
577    ctx: &DumpContext,
578    step_width: usize,
579) -> String {
580    let c = &ctx.colors;
581    let builder = LineBuilder::new(step_width);
582    let symbol = nav_symbol(call.nav());
583    let prefix = format!("  {:0sw$} {} ", step, symbol.format(), sw = step_width);
584
585    // Format field constraint if present
586    let field_part = if let Some(field_id) = call.node_field {
587        let name = ctx
588            .node_field_name(field_id.get())
589            .map(String::from)
590            .unwrap_or_else(|| format!("field#{}", field_id.get()));
591        format!("{name}: ")
592    } else {
593        String::new()
594    };
595
596    let target_name = ctx
597        .label_for(call.target)
598        .map(String::from)
599        .unwrap_or_else(|| format!("@{:0w$}", call.target.0, w = step_width));
600    // Definition name in call is blue
601    let content = format!("{field_part}({}{}{})", c.blue, target_name, c.reset);
602    // Format as "target : return" with numeric IDs
603    let successors = format!(
604        "{:0w$} : {:0w$}",
605        call.target.get(),
606        call.next.get(),
607        w = step_width
608    );
609
610    let base = format!("{prefix}{content}");
611    builder.pad_successors(base, &successors)
612}
613
614fn format_return(
615    step: u16,
616    _r: &Return,
617    _module: &Module,
618    _ctx: &DumpContext,
619    step_width: usize,
620) -> String {
621    let builder = LineBuilder::new(step_width);
622    let prefix = format!(
623        "  {:0sw$} {} ",
624        step,
625        Symbol::EMPTY.format(),
626        sw = step_width
627    );
628    builder.pad_successors(prefix, "▶")
629}
630
631fn format_trampoline(step: u16, t: &Trampoline, _ctx: &DumpContext, step_width: usize) -> String {
632    let builder = LineBuilder::new(step_width);
633    let prefix = format!(
634        "  {:0sw$} {} ",
635        step,
636        Symbol::EMPTY.format(),
637        sw = step_width
638    );
639    let content = "Trampoline";
640    let successors = format!("{:0w$}", t.next.get(), w = step_width);
641    let base = format!("{prefix}{content}");
642    builder.pad_successors(base, &successors)
643}
644
645/// Format a step ID, showing entrypoint label or numeric ID.
646fn format_step(step: StepId, ctx: &DumpContext, step_width: usize) -> String {
647    let c = &ctx.colors;
648    if let Some(label) = ctx.label_for(step) {
649        format!("▶({}{}{})", c.blue, label, c.reset)
650    } else {
651        format!("{:0w$}", step.get(), w = step_width)
652    }
653}