plotnik_bytecode/bytecode/
dump.rs

1//! Human-readable bytecode dump for debugging and documentation.
2//!
3//! See `docs/binary-format/07-dump-format.md` for the output format specification.
4
5use std::collections::BTreeMap;
6use std::fmt::Write as _;
7
8use plotnik_core::Colors;
9use crate::predicate_op::PredicateOp;
10
11use super::format::{LineBuilder, Symbol, format_effect, nav_symbol, width_for_count};
12use super::ids::TypeId;
13use super::instructions::StepId;
14use super::module::{Instruction, Module};
15use super::node_type_ir::NodeTypeIR;
16use super::nav::Nav;
17use super::type_meta::{TypeData, TypeKind};
18use super::{Call, Match, Return, Trampoline};
19
20/// Generate a human-readable dump of the bytecode module.
21pub fn dump(module: &Module, colors: Colors) -> String {
22    let mut out = String::new();
23    let ctx = DumpContext::new(module, colors);
24
25    dump_strings(&mut out, module, &ctx);
26    dump_types_defs(&mut out, module, &ctx);
27    dump_types_members(&mut out, module, &ctx);
28    dump_types_names(&mut out, module, &ctx);
29    dump_entrypoints(&mut out, module, &ctx);
30    dump_code(&mut out, module, &ctx);
31
32    out
33}
34
35
36/// Context for dump formatting, precomputes lookups for O(1) access.
37struct DumpContext {
38    /// Maps step ID to entrypoint name for labeling.
39    step_labels: BTreeMap<u16, String>,
40    /// Maps node type ID to name.
41    node_type_names: BTreeMap<u16, String>,
42    /// Maps node field ID to name.
43    node_field_names: BTreeMap<u16, String>,
44    /// All strings (for predicate values, regex patterns, etc).
45    all_strings: Vec<String>,
46    /// Width for string indices (S#).
47    str_width: usize,
48    /// Width for type indices (T#).
49    type_width: usize,
50    /// Width for member indices (M#).
51    member_width: usize,
52    /// Width for name indices (N#).
53    name_width: usize,
54    /// Width for step indices.
55    step_width: usize,
56    /// Color palette.
57    colors: Colors,
58}
59
60impl DumpContext {
61    fn new(module: &Module, colors: Colors) -> Self {
62        let header = module.header();
63        let strings = module.strings();
64        let entrypoints = module.entrypoints();
65        let node_types = module.node_types();
66        let node_fields = module.node_fields();
67
68        let mut step_labels = BTreeMap::new();
69        // Preamble always at step 0 (first in layout)
70        step_labels.insert(0, "_ObjWrap".to_string());
71        for i in 0..entrypoints.len() {
72            let ep = entrypoints.get(i);
73            let name = strings.get(ep.name()).to_string();
74            step_labels.insert(ep.target(), name);
75        }
76
77        let mut node_type_names = BTreeMap::new();
78        for i in 0..node_types.len() {
79            let t = node_types.get(i);
80            node_type_names.insert(t.id, strings.get(t.name).to_string());
81        }
82
83        let mut node_field_names = BTreeMap::new();
84        for i in 0..node_fields.len() {
85            let f = node_fields.get(i);
86            node_field_names.insert(f.id, strings.get(f.name).to_string());
87        }
88
89        // Collect all strings for unlinked mode lookups
90        let str_count = header.str_table_count as usize;
91        let all_strings: Vec<String> = (0..str_count)
92            .map(|i| strings.get_by_index(i).to_string())
93            .collect();
94
95        // Compute widths for index formatting
96        let types = module.types();
97        let type_count = 3 + types.defs_count(); // 3 builtins + custom types
98        let str_width = width_for_count(str_count);
99        let type_width = width_for_count(type_count);
100        let member_width = width_for_count(types.members_count());
101        let name_width = width_for_count(types.names_count());
102        let step_width = width_for_count(header.transitions_count as usize);
103
104        Self {
105            step_labels,
106            node_type_names,
107            node_field_names,
108            all_strings,
109            str_width,
110            type_width,
111            member_width,
112            name_width,
113            step_width,
114            colors,
115        }
116    }
117
118    fn label_for(&self, step: StepId) -> Option<&str> {
119        self.step_labels.get(&step.get()).map(|s| s.as_str())
120    }
121
122    /// Get the name for a node type ID.
123    fn node_type_name(&self, id: u16) -> Option<&str> {
124        self.node_type_names.get(&id).map(|s| s.as_str())
125    }
126
127    /// Get the name for a node field ID.
128    fn node_field_name(&self, id: u16) -> Option<&str> {
129        self.node_field_names.get(&id).map(|s| s.as_str())
130    }
131}
132
133fn dump_strings(out: &mut String, module: &Module, ctx: &DumpContext) {
134    let c = &ctx.colors;
135    let strings = module.strings();
136    let count = module.header().str_table_count as usize;
137    let w = ctx.str_width;
138
139    writeln!(out, "{}[strings]{}", c.blue, c.reset).unwrap();
140    for i in 0..count {
141        let s = strings.get_by_index(i);
142        writeln!(out, "S{i:0w$} {}{s:?}{}", c.green, c.reset).unwrap();
143    }
144    out.push('\n');
145}
146
147fn dump_types_defs(out: &mut String, module: &Module, ctx: &DumpContext) {
148    let c = &ctx.colors;
149    let types = module.types();
150    let strings = module.strings();
151    let tw = ctx.type_width;
152    let mw = ctx.member_width;
153
154    writeln!(out, "{}[type_defs]{}", c.blue, c.reset).unwrap();
155
156    // All types are now in type_defs, including builtins
157    for i in 0..types.defs_count() {
158        let def = types.get_def(i);
159
160        let (formatted, comment) = match def.classify() {
161            TypeData::Primitive(kind) => {
162                let name = match kind {
163                    TypeKind::Void => "<Void>",
164                    TypeKind::Node => "<Node>",
165                    TypeKind::String => "<String>",
166                    _ => unreachable!(),
167                };
168                (name.to_string(), String::new())
169            }
170            TypeData::Wrapper { kind, inner } => {
171                let formatted = match kind {
172                    TypeKind::Optional => format!("Optional(T{:0tw$})", inner.0),
173                    TypeKind::ArrayZeroOrMore => format!("ArrayStar(T{:0tw$})", inner.0),
174                    TypeKind::ArrayOneOrMore => format!("ArrayPlus(T{:0tw$})", inner.0),
175                    TypeKind::Alias => format!("Alias(T{:0tw$})", inner.0),
176                    _ => unreachable!(),
177                };
178                let comment = match kind {
179                    TypeKind::Optional => {
180                        let inner_name = format_type_name(inner, module, ctx);
181                        format!("{}  ; {}?{}", c.dim, inner_name, c.reset)
182                    }
183                    TypeKind::ArrayZeroOrMore => {
184                        let inner_name = format_type_name(inner, module, ctx);
185                        format!("{}  ; {}*{}", c.dim, inner_name, c.reset)
186                    }
187                    TypeKind::ArrayOneOrMore => {
188                        let inner_name = format_type_name(inner, module, ctx);
189                        format!("{}  ; {}+{}", c.dim, inner_name, c.reset)
190                    }
191                    TypeKind::Alias => String::new(),
192                    _ => unreachable!(),
193                };
194                (formatted, comment)
195            }
196            TypeData::Composite {
197                kind,
198                member_start,
199                member_count,
200            } => {
201                let formatted = match kind {
202                    TypeKind::Struct => {
203                        format!("Struct  M{:0mw$}:{}", member_start, member_count)
204                    }
205                    TypeKind::Enum => format!("Enum    M{:0mw$}:{}", member_start, member_count),
206                    _ => unreachable!(),
207                };
208                let comment = match kind {
209                    TypeKind::Struct => {
210                        let fields: Vec<_> = types
211                            .members_of(&def)
212                            .map(|m| strings.get(m.name).to_string())
213                            .collect();
214                        format!("{}  ; {{ {} }}{}", c.dim, fields.join(", "), c.reset)
215                    }
216                    TypeKind::Enum => {
217                        let variants: Vec<_> = types
218                            .members_of(&def)
219                            .map(|m| strings.get(m.name).to_string())
220                            .collect();
221                        format!("{}  ; {}{}", c.dim, variants.join(" | "), c.reset)
222                    }
223                    _ => unreachable!(),
224                };
225                (formatted, comment)
226            }
227        };
228
229        writeln!(out, "T{i:0tw$} = {formatted}{comment}").unwrap();
230    }
231    out.push('\n');
232}
233
234fn dump_types_members(out: &mut String, module: &Module, ctx: &DumpContext) {
235    let c = &ctx.colors;
236    let types = module.types();
237    let strings = module.strings();
238    let mw = ctx.member_width;
239    let sw = ctx.str_width;
240    let tw = ctx.type_width;
241
242    writeln!(out, "{}[type_members]{}", c.blue, c.reset).unwrap();
243    for i in 0..types.members_count() {
244        let member = types.get_member(i);
245        let name = strings.get(member.name);
246        let type_name = format_type_name(member.type_id, module, ctx);
247        writeln!(
248            out,
249            "M{i:0mw$}: S{:0sw$} → T{:0tw$}  {}; {name}: {type_name}{}",
250            member.name.0, member.type_id.0, c.dim, c.reset
251        )
252        .unwrap();
253    }
254    out.push('\n');
255}
256
257fn dump_types_names(out: &mut String, module: &Module, ctx: &DumpContext) {
258    let c = &ctx.colors;
259    let types = module.types();
260    let strings = module.strings();
261    let nw = ctx.name_width;
262    let sw = ctx.str_width;
263    let tw = ctx.type_width;
264
265    writeln!(out, "{}[type_names]{}", c.blue, c.reset).unwrap();
266    for i in 0..types.names_count() {
267        let entry = types.get_name(i);
268        let name = strings.get(entry.name);
269        writeln!(
270            out,
271            "N{i:0nw$}: S{:0sw$} → T{:0tw$}  {}; {}{name}{}",
272            entry.name.0, entry.type_id.0, c.dim, c.blue, c.reset
273        )
274        .unwrap();
275    }
276    out.push('\n');
277}
278
279/// Format a type ID as a human-readable name.
280fn format_type_name(type_id: TypeId, module: &Module, ctx: &DumpContext) -> String {
281    let types = module.types();
282    let strings = module.strings();
283
284    // Check if it's a primitive type
285    if let Some(def) = types.get(type_id)
286        && let TypeData::Primitive(kind) = def.classify()
287        && let Some(name) = kind.primitive_name()
288    {
289        return format!("<{}>", name);
290    }
291
292    // Try to find a name in types.names
293    for i in 0..types.names_count() {
294        let entry = types.get_name(i);
295        if entry.type_id == type_id {
296            return strings.get(entry.name).to_string();
297        }
298    }
299
300    // Fall back to T# format
301    let tw = ctx.type_width;
302    format!("T{:0tw$}", type_id.0)
303}
304
305fn dump_entrypoints(out: &mut String, module: &Module, ctx: &DumpContext) {
306    let c = &ctx.colors;
307    let strings = module.strings();
308    let entrypoints = module.entrypoints();
309    let stw = ctx.step_width;
310    let tw = ctx.type_width;
311
312    writeln!(out, "{}[entrypoints]{}", c.blue, c.reset).unwrap();
313
314    // Collect and sort by name for display
315    let mut entries: Vec<_> = (0..entrypoints.len())
316        .map(|i| {
317            let ep = entrypoints.get(i);
318            let name = strings.get(ep.name());
319            (name, ep.target(), ep.result_type().0)
320        })
321        .collect();
322    entries.sort_by_key(|(name, _, _)| *name);
323
324    // Find max name length for alignment
325    let max_len = entries.iter().map(|(n, _, _)| n.len()).max().unwrap_or(0);
326
327    for (name, target, type_id) in entries {
328        writeln!(
329            out,
330            "{}{name:width$}{} = {:0stw$} :: T{type_id:0tw$}",
331            c.blue,
332            c.reset,
333            target,
334            width = max_len
335        )
336        .unwrap();
337    }
338    out.push('\n');
339}
340
341/// Check if an instruction is padding (all-zeros Match8).
342///
343/// Padding slots contain zero bytes which decode as terminal epsilon Match8
344/// with Any node type, no field constraint, and next=0.
345fn is_padding(instr: &Instruction) -> bool {
346    match instr {
347        Instruction::Match(m) => {
348            m.is_match8()
349                && m.nav == Nav::Epsilon
350                && matches!(m.node_type, NodeTypeIR::Any)
351                && m.node_field.is_none()
352                && m.is_terminal()
353        }
354        _ => false,
355    }
356}
357
358/// Format a single padding step line.
359///
360/// Output: `  07  ... ` (step number and " ... " in symbol column)
361fn format_padding_step(step: u16, step_width: usize) -> String {
362    LineBuilder::new(step_width).instruction_prefix(step, Symbol::PADDING)
363}
364
365fn dump_code(out: &mut String, module: &Module, ctx: &DumpContext) {
366    let c = &ctx.colors;
367    let header = module.header();
368    let transitions_count = header.transitions_count as usize;
369    let step_width = ctx.step_width;
370
371    writeln!(out, "{}[transitions]{}", c.blue, c.reset).unwrap();
372
373    let mut step = 0u16;
374    let mut first_label = true;
375    while (step as usize) < transitions_count {
376        // Check if this step has a label (using raw u16)
377        if let Some(label) = ctx.step_labels.get(&step) {
378            if first_label {
379                writeln!(out, "{}{label}{}:", c.blue, c.reset).unwrap();
380                first_label = false;
381            } else {
382                writeln!(out, "\n{}{label}{}:", c.blue, c.reset).unwrap();
383            }
384        }
385
386        let instr = module.decode_step(step);
387
388        // Check for padding (all-zeros Match8 instruction)
389        if is_padding(&instr) {
390            writeln!(out, "{}", format_padding_step(step, step_width)).unwrap();
391            step += 1;
392            continue;
393        }
394
395        let line = format_instruction(step, &instr, module, ctx, step_width);
396        out.push_str(&line);
397        out.push('\n');
398
399        // Advance by instruction size
400        let size = instruction_step_count(&instr);
401        step += size;
402    }
403}
404
405fn instruction_step_count(instr: &Instruction) -> u16 {
406    match instr {
407        Instruction::Match(m) => {
408            let pre = m.pre_effects().count();
409            let neg = m.neg_fields().count();
410            let post = m.post_effects().count();
411            let succ = m.succ_count();
412            let pred = if m.has_predicate() { 2 } else { 0 };
413            let slots = pre + neg + post + pred + succ;
414
415            if pre == 0 && neg == 0 && post == 0 && pred == 0 && succ <= 1 {
416                1 // Match8
417            } else if slots <= 4 {
418                2 // Match16
419            } else if slots <= 8 {
420                3 // Match24
421            } else if slots <= 12 {
422                4 // Match32
423            } else if slots <= 20 {
424                6 // Match48
425            } else {
426                8 // Match64
427            }
428        }
429        Instruction::Call(_) | Instruction::Return(_) | Instruction::Trampoline(_) => 1,
430    }
431}
432
433fn format_instruction(
434    step: u16,
435    instr: &Instruction,
436    module: &Module,
437    ctx: &DumpContext,
438    step_width: usize,
439) -> String {
440    match instr {
441        Instruction::Match(m) => format_match(step, m, module, ctx, step_width),
442        Instruction::Call(c) => format_call(step, c, module, ctx, step_width),
443        Instruction::Return(r) => format_return(step, r, module, ctx, step_width),
444        Instruction::Trampoline(t) => format_trampoline(step, t, ctx, step_width),
445    }
446}
447
448fn format_match(
449    step: u16,
450    m: &Match,
451    module: &Module,
452    ctx: &DumpContext,
453    step_width: usize,
454) -> String {
455    let builder = LineBuilder::new(step_width);
456    let symbol = nav_symbol(m.nav);
457    let prefix = format!("  {:0sw$} {} ", step, symbol.format(), sw = step_width);
458
459    let content = format_match_content(m, module, ctx);
460    let successors = format_match_successors(m, ctx, step_width);
461
462    let base = format!("{prefix}{content}");
463    builder.pad_successors(base, &successors)
464}
465
466fn format_match_content(m: &Match, module: &Module, ctx: &DumpContext) -> String {
467    let mut parts = Vec::new();
468
469    let pre: Vec<_> = m.pre_effects().map(|e| format_effect(&e)).collect();
470    if !pre.is_empty() {
471        parts.push(format!("[{}]", pre.join(" ")));
472    }
473
474    // Skip neg_fields and node pattern for epsilon (no node interaction)
475    if !m.is_epsilon() {
476        for field_id in m.neg_fields() {
477            let name = ctx
478                .node_field_name(field_id)
479                .map(String::from)
480                .unwrap_or_else(|| format!("field#{field_id}"));
481            parts.push(format!("-{name}"));
482        }
483
484        let node_part = format_node_pattern(m, ctx);
485        if !node_part.is_empty() {
486            parts.push(node_part);
487        }
488
489        // Format predicate if present
490        if let Some((op, is_regex, value_ref)) = m.predicate() {
491            let op = PredicateOp::from_byte(op);
492            let value = if is_regex {
493                let string_id = module.regexes().get_string_id(value_ref as usize);
494                let pattern = &ctx.all_strings[string_id.get() as usize];
495                format!("/{}/", pattern)
496            } else {
497                let s = &ctx.all_strings[value_ref as usize];
498                format!("{:?}", s)
499            };
500            parts.push(format!("{} {}", op.as_str(), value));
501        }
502    }
503
504    let post: Vec<_> = m.post_effects().map(|e| format_effect(&e)).collect();
505    if !post.is_empty() {
506        parts.push(format!("[{}]", post.join(" ")));
507    }
508
509    parts.join(" ")
510}
511
512/// Format node pattern: `field: (type)` or `(type)` or `field: _` or `(_)` or `"text"`
513fn format_node_pattern(m: &Match, ctx: &DumpContext) -> String {
514    let mut result = String::new();
515
516    if let Some(field_id) = m.node_field {
517        let name = ctx
518            .node_field_name(field_id.get())
519            .map(String::from)
520            .unwrap_or_else(|| format!("field#{}", field_id.get()));
521        result.push_str(&name);
522        result.push_str(": ");
523    }
524
525    match m.node_type {
526        NodeTypeIR::Any => {
527            // Any node wildcard: `_`
528            result.push('_');
529        }
530        NodeTypeIR::Named(None) => {
531            // Named wildcard: any named node
532            result.push_str("(_)");
533        }
534        NodeTypeIR::Named(Some(type_id)) => {
535            // Specific named node type
536            let name = ctx
537                .node_type_name(type_id.get())
538                .map(String::from)
539                .unwrap_or_else(|| format!("node#{}", type_id.get()));
540            result.push('(');
541            result.push_str(&name);
542            result.push(')');
543        }
544        NodeTypeIR::Anonymous(None) => {
545            // Anonymous wildcard: any anonymous node (future syntax)
546            result.push_str("\"_\"");
547        }
548        NodeTypeIR::Anonymous(Some(type_id)) => {
549            // Specific anonymous node (literal token)
550            let name = ctx
551                .node_type_name(type_id.get())
552                .map(String::from)
553                .unwrap_or_else(|| format!("anon#{}", type_id.get()));
554            result.push('"');
555            result.push_str(&name);
556            result.push('"');
557        }
558    }
559
560    result
561}
562
563fn format_match_successors(m: &Match, ctx: &DumpContext, step_width: usize) -> String {
564    if m.is_terminal() {
565        "◼".to_string()
566    } else {
567        m.successors()
568            .map(|s| format_step(s, ctx, step_width))
569            .collect::<Vec<_>>()
570            .join(", ")
571    }
572}
573
574fn format_call(
575    step: u16,
576    call: &Call,
577    _module: &Module,
578    ctx: &DumpContext,
579    step_width: usize,
580) -> String {
581    let c = &ctx.colors;
582    let builder = LineBuilder::new(step_width);
583    let symbol = nav_symbol(call.nav());
584    let prefix = format!("  {:0sw$} {} ", step, symbol.format(), sw = step_width);
585
586    // Format field constraint if present
587    let field_part = if let Some(field_id) = call.node_field {
588        let name = ctx
589            .node_field_name(field_id.get())
590            .map(String::from)
591            .unwrap_or_else(|| format!("field#{}", field_id.get()));
592        format!("{name}: ")
593    } else {
594        String::new()
595    };
596
597    let target_name = ctx
598        .label_for(call.target)
599        .map(String::from)
600        .unwrap_or_else(|| format!("@{:0w$}", call.target.0, w = step_width));
601    // Definition name in call is blue
602    let content = format!("{field_part}({}{}{})", c.blue, target_name, c.reset);
603    // Format as "target : return" with numeric IDs
604    let successors = format!(
605        "{:0w$} : {:0w$}",
606        call.target.get(),
607        call.next.get(),
608        w = step_width
609    );
610
611    let base = format!("{prefix}{content}");
612    builder.pad_successors(base, &successors)
613}
614
615fn format_return(
616    step: u16,
617    _r: &Return,
618    _module: &Module,
619    _ctx: &DumpContext,
620    step_width: usize,
621) -> String {
622    let builder = LineBuilder::new(step_width);
623    let prefix = format!(
624        "  {:0sw$} {} ",
625        step,
626        Symbol::EMPTY.format(),
627        sw = step_width
628    );
629    builder.pad_successors(prefix, "▶")
630}
631
632fn format_trampoline(step: u16, t: &Trampoline, _ctx: &DumpContext, step_width: usize) -> String {
633    let builder = LineBuilder::new(step_width);
634    let prefix = format!(
635        "  {:0sw$} {} ",
636        step,
637        Symbol::EMPTY.format(),
638        sw = step_width
639    );
640    let content = "Trampoline";
641    let successors = format!("{:0w$}", t.next.get(), w = step_width);
642    let base = format!("{prefix}{content}");
643    builder.pad_successors(base, &successors)
644}
645
646/// Format a step ID, showing entrypoint label or numeric ID.
647fn format_step(step: StepId, ctx: &DumpContext, step_width: usize) -> String {
648    let c = &ctx.colors;
649    if let Some(label) = ctx.label_for(step) {
650        format!("▶({}{}{})", c.blue, label, c.reset)
651    } else {
652        format!("{:0w$}", step.get(), w = step_width)
653    }
654}