plotnik_bytecode/bytecode/
dump.rs

1//! Human-readable bytecode dump for debugging and documentation.
2//!
3//! See `docs/binary-format/07-dump-format.md` for the output format specification.
4
5use std::collections::BTreeMap;
6use std::fmt::Write as _;
7
8use crate::predicate_op::PredicateOp;
9use plotnik_core::Colors;
10
11use super::format::{LineBuilder, Symbol, format_effect, nav_symbol, width_for_count};
12use super::ids::TypeId;
13use super::instructions::StepId;
14use super::module::{Instruction, Module};
15use super::nav::Nav;
16use super::node_type_ir::NodeTypeIR;
17use super::type_meta::{TypeData, TypeKind};
18use super::{Call, Match, Return, Trampoline};
19
20/// Generate a human-readable dump of the bytecode module.
21pub fn dump(module: &Module, colors: Colors) -> String {
22    let mut out = String::new();
23    let ctx = DumpContext::new(module, colors);
24
25    dump_strings(&mut out, module, &ctx);
26    dump_regexes(&mut out, module, &ctx);
27    dump_types_defs(&mut out, module, &ctx);
28    dump_types_members(&mut out, module, &ctx);
29    dump_types_names(&mut out, module, &ctx);
30    dump_entrypoints(&mut out, module, &ctx);
31    dump_code(&mut out, module, &ctx);
32
33    out
34}
35
36/// Context for dump formatting, precomputes lookups for O(1) access.
37struct DumpContext {
38    /// Maps step ID to entrypoint name for labeling.
39    step_labels: BTreeMap<u16, String>,
40    /// Maps node type ID to name.
41    node_type_names: BTreeMap<u16, String>,
42    /// Maps node field ID to name.
43    node_field_names: BTreeMap<u16, String>,
44    /// All strings (for predicate values, regex patterns, etc).
45    all_strings: Vec<String>,
46    /// Width for string indices (S#).
47    str_width: usize,
48    /// Width for type indices (T#).
49    type_width: usize,
50    /// Width for member indices (M#).
51    member_width: usize,
52    /// Width for name indices (N#).
53    name_width: usize,
54    /// Width for step indices.
55    step_width: usize,
56    /// Color palette.
57    colors: Colors,
58}
59
60impl DumpContext {
61    fn new(module: &Module, colors: Colors) -> Self {
62        let header = module.header();
63        let strings = module.strings();
64        let entrypoints = module.entrypoints();
65        let node_types = module.node_types();
66        let node_fields = module.node_fields();
67
68        let mut step_labels = BTreeMap::new();
69        // Preamble always at step 0 (first in layout)
70        step_labels.insert(0, "_ObjWrap".to_string());
71        for i in 0..entrypoints.len() {
72            let ep = entrypoints.get(i);
73            let name = strings.get(ep.name()).to_string();
74            step_labels.insert(ep.target(), name);
75        }
76
77        let mut node_type_names = BTreeMap::new();
78        for i in 0..node_types.len() {
79            let t = node_types.get(i);
80            node_type_names.insert(t.id, strings.get(t.name).to_string());
81        }
82
83        let mut node_field_names = BTreeMap::new();
84        for i in 0..node_fields.len() {
85            let f = node_fields.get(i);
86            node_field_names.insert(f.id, strings.get(f.name).to_string());
87        }
88
89        // Collect all strings for unlinked mode lookups
90        let str_count = header.str_table_count as usize;
91        let all_strings: Vec<String> = (0..str_count)
92            .map(|i| strings.get_by_index(i).to_string())
93            .collect();
94
95        // Compute widths for index formatting
96        let types = module.types();
97        let type_count = 3 + types.defs_count(); // 3 builtins + custom types
98        let str_width = width_for_count(str_count);
99        let type_width = width_for_count(type_count);
100        let member_width = width_for_count(types.members_count());
101        let name_width = width_for_count(types.names_count());
102        let step_width = width_for_count(header.transitions_count as usize);
103
104        Self {
105            step_labels,
106            node_type_names,
107            node_field_names,
108            all_strings,
109            str_width,
110            type_width,
111            member_width,
112            name_width,
113            step_width,
114            colors,
115        }
116    }
117
118    fn label_for(&self, step: StepId) -> Option<&str> {
119        self.step_labels.get(&step.get()).map(|s| s.as_str())
120    }
121
122    /// Get the name for a node type ID.
123    fn node_type_name(&self, id: u16) -> Option<&str> {
124        self.node_type_names.get(&id).map(|s| s.as_str())
125    }
126
127    /// Get the name for a node field ID.
128    fn node_field_name(&self, id: u16) -> Option<&str> {
129        self.node_field_names.get(&id).map(|s| s.as_str())
130    }
131}
132
133fn dump_strings(out: &mut String, module: &Module, ctx: &DumpContext) {
134    let c = &ctx.colors;
135    let strings = module.strings();
136    let count = module.header().str_table_count as usize;
137    let w = ctx.str_width;
138
139    writeln!(out, "{}[strings]{}", c.blue, c.reset).unwrap();
140    for i in 0..count {
141        let s = strings.get_by_index(i);
142        writeln!(out, "S{i:0w$} {}{s:?}{}", c.green, c.reset).unwrap();
143    }
144    out.push('\n');
145}
146
147fn dump_regexes(out: &mut String, module: &Module, ctx: &DumpContext) {
148    let count = module.header().regex_table_count as usize;
149    // Index 0 is reserved, so only print if there are actual regexes
150    if count <= 1 {
151        return;
152    }
153
154    let c = &ctx.colors;
155    let regexes = module.regexes();
156    let w = width_for_count(count);
157
158    writeln!(out, "{}[regex]{}", c.blue, c.reset).unwrap();
159    // Skip index 0 (reserved)
160    for i in 1..count {
161        let string_id = regexes.get_string_id(i);
162        let pattern = &ctx.all_strings[string_id.get() as usize];
163        writeln!(out, "R{i:0w$} {}/{pattern}/{}", c.green, c.reset).unwrap();
164    }
165    out.push('\n');
166}
167
168fn dump_types_defs(out: &mut String, module: &Module, ctx: &DumpContext) {
169    let c = &ctx.colors;
170    let types = module.types();
171    let strings = module.strings();
172    let tw = ctx.type_width;
173    let mw = ctx.member_width;
174
175    writeln!(out, "{}[type_defs]{}", c.blue, c.reset).unwrap();
176
177    // All types are now in type_defs, including builtins
178    for i in 0..types.defs_count() {
179        let def = types.get_def(i);
180
181        let (formatted, comment) = match def.classify() {
182            TypeData::Primitive(kind) => {
183                let name = match kind {
184                    TypeKind::Void => "<Void>",
185                    TypeKind::Node => "<Node>",
186                    TypeKind::String => "<String>",
187                    _ => unreachable!(),
188                };
189                (name.to_string(), String::new())
190            }
191            TypeData::Wrapper { kind, inner } => {
192                let formatted = match kind {
193                    TypeKind::Optional => format!("Optional(T{:0tw$})", inner.0),
194                    TypeKind::ArrayZeroOrMore => format!("ArrayStar(T{:0tw$})", inner.0),
195                    TypeKind::ArrayOneOrMore => format!("ArrayPlus(T{:0tw$})", inner.0),
196                    TypeKind::Alias => format!("Alias(T{:0tw$})", inner.0),
197                    _ => unreachable!(),
198                };
199                let comment = match kind {
200                    TypeKind::Optional => {
201                        let inner_name = format_type_name(inner, module, ctx);
202                        format!("{}  ; {}?{}", c.dim, inner_name, c.reset)
203                    }
204                    TypeKind::ArrayZeroOrMore => {
205                        let inner_name = format_type_name(inner, module, ctx);
206                        format!("{}  ; {}*{}", c.dim, inner_name, c.reset)
207                    }
208                    TypeKind::ArrayOneOrMore => {
209                        let inner_name = format_type_name(inner, module, ctx);
210                        format!("{}  ; {}+{}", c.dim, inner_name, c.reset)
211                    }
212                    TypeKind::Alias => String::new(),
213                    _ => unreachable!(),
214                };
215                (formatted, comment)
216            }
217            TypeData::Composite {
218                kind,
219                member_start,
220                member_count,
221            } => {
222                let formatted = match kind {
223                    TypeKind::Struct => {
224                        format!("Struct  M{:0mw$}:{}", member_start, member_count)
225                    }
226                    TypeKind::Enum => format!("Enum    M{:0mw$}:{}", member_start, member_count),
227                    _ => unreachable!(),
228                };
229                let comment = match kind {
230                    TypeKind::Struct => {
231                        let fields: Vec<_> = types
232                            .members_of(&def)
233                            .map(|m| strings.get(m.name).to_string())
234                            .collect();
235                        format!("{}  ; {{ {} }}{}", c.dim, fields.join(", "), c.reset)
236                    }
237                    TypeKind::Enum => {
238                        let variants: Vec<_> = types
239                            .members_of(&def)
240                            .map(|m| strings.get(m.name).to_string())
241                            .collect();
242                        format!("{}  ; {}{}", c.dim, variants.join(" | "), c.reset)
243                    }
244                    _ => unreachable!(),
245                };
246                (formatted, comment)
247            }
248        };
249
250        writeln!(out, "T{i:0tw$} = {formatted}{comment}").unwrap();
251    }
252    out.push('\n');
253}
254
255fn dump_types_members(out: &mut String, module: &Module, ctx: &DumpContext) {
256    let c = &ctx.colors;
257    let types = module.types();
258    let strings = module.strings();
259    let mw = ctx.member_width;
260    let sw = ctx.str_width;
261    let tw = ctx.type_width;
262
263    writeln!(out, "{}[type_members]{}", c.blue, c.reset).unwrap();
264    for i in 0..types.members_count() {
265        let member = types.get_member(i);
266        let name = strings.get(member.name);
267        let type_name = format_type_name(member.type_id, module, ctx);
268        writeln!(
269            out,
270            "M{i:0mw$}: S{:0sw$} → T{:0tw$}  {}; {name}: {type_name}{}",
271            member.name.0, member.type_id.0, c.dim, c.reset
272        )
273        .unwrap();
274    }
275    out.push('\n');
276}
277
278fn dump_types_names(out: &mut String, module: &Module, ctx: &DumpContext) {
279    let c = &ctx.colors;
280    let types = module.types();
281    let strings = module.strings();
282    let nw = ctx.name_width;
283    let sw = ctx.str_width;
284    let tw = ctx.type_width;
285
286    writeln!(out, "{}[type_names]{}", c.blue, c.reset).unwrap();
287    for i in 0..types.names_count() {
288        let entry = types.get_name(i);
289        let name = strings.get(entry.name);
290        writeln!(
291            out,
292            "N{i:0nw$}: S{:0sw$} → T{:0tw$}  {}; {}{name}{}",
293            entry.name.0, entry.type_id.0, c.dim, c.blue, c.reset
294        )
295        .unwrap();
296    }
297    out.push('\n');
298}
299
300/// Format a type ID as a human-readable name.
301fn format_type_name(type_id: TypeId, module: &Module, ctx: &DumpContext) -> String {
302    let types = module.types();
303    let strings = module.strings();
304
305    // Check if it's a primitive type
306    if let Some(def) = types.get(type_id)
307        && let TypeData::Primitive(kind) = def.classify()
308        && let Some(name) = kind.primitive_name()
309    {
310        return format!("<{}>", name);
311    }
312
313    // Try to find a name in types.names
314    for i in 0..types.names_count() {
315        let entry = types.get_name(i);
316        if entry.type_id == type_id {
317            return strings.get(entry.name).to_string();
318        }
319    }
320
321    // Fall back to T# format
322    let tw = ctx.type_width;
323    format!("T{:0tw$}", type_id.0)
324}
325
326fn dump_entrypoints(out: &mut String, module: &Module, ctx: &DumpContext) {
327    let c = &ctx.colors;
328    let strings = module.strings();
329    let entrypoints = module.entrypoints();
330    let stw = ctx.step_width;
331    let tw = ctx.type_width;
332
333    writeln!(out, "{}[entrypoints]{}", c.blue, c.reset).unwrap();
334
335    // Collect and sort by name for display
336    let mut entries: Vec<_> = (0..entrypoints.len())
337        .map(|i| {
338            let ep = entrypoints.get(i);
339            let name = strings.get(ep.name());
340            (name, ep.target(), ep.result_type().0)
341        })
342        .collect();
343    entries.sort_by_key(|(name, _, _)| *name);
344
345    // Find max name length for alignment
346    let max_len = entries.iter().map(|(n, _, _)| n.len()).max().unwrap_or(0);
347
348    for (name, target, type_id) in entries {
349        writeln!(
350            out,
351            "{}{name:width$}{} = {:0stw$} :: T{type_id:0tw$}",
352            c.blue,
353            c.reset,
354            target,
355            width = max_len
356        )
357        .unwrap();
358    }
359    out.push('\n');
360}
361
362/// Check if an instruction is padding (all-zeros Match8).
363///
364/// Padding slots contain zero bytes which decode as terminal epsilon Match8
365/// with Any node type, no field constraint, and next=0.
366fn is_padding(instr: &Instruction) -> bool {
367    match instr {
368        Instruction::Match(m) => {
369            m.is_match8()
370                && m.nav == Nav::Epsilon
371                && matches!(m.node_type, NodeTypeIR::Any)
372                && m.node_field.is_none()
373                && m.is_terminal()
374        }
375        _ => false,
376    }
377}
378
379/// Format a single padding step line.
380///
381/// Output: `  07  ... ` (step number and " ... " in symbol column)
382fn format_padding_step(step: u16, step_width: usize) -> String {
383    LineBuilder::new(step_width).instruction_prefix(step, Symbol::PADDING)
384}
385
386fn dump_code(out: &mut String, module: &Module, ctx: &DumpContext) {
387    let c = &ctx.colors;
388    let header = module.header();
389    let transitions_count = header.transitions_count as usize;
390    let step_width = ctx.step_width;
391
392    writeln!(out, "{}[transitions]{}", c.blue, c.reset).unwrap();
393
394    let mut step = 0u16;
395    let mut first_label = true;
396    while (step as usize) < transitions_count {
397        // Check if this step has a label (using raw u16)
398        if let Some(label) = ctx.step_labels.get(&step) {
399            if first_label {
400                writeln!(out, "{}{label}{}:", c.blue, c.reset).unwrap();
401                first_label = false;
402            } else {
403                writeln!(out, "\n{}{label}{}:", c.blue, c.reset).unwrap();
404            }
405        }
406
407        let instr = module.decode_step(step);
408
409        // Check for padding (all-zeros Match8 instruction)
410        if is_padding(&instr) {
411            writeln!(out, "{}", format_padding_step(step, step_width)).unwrap();
412            step += 1;
413            continue;
414        }
415
416        let line = format_instruction(step, &instr, module, ctx, step_width);
417        out.push_str(&line);
418        out.push('\n');
419
420        // Advance by instruction size
421        let size = instruction_step_count(&instr);
422        step += size;
423    }
424}
425
426fn instruction_step_count(instr: &Instruction) -> u16 {
427    match instr {
428        Instruction::Match(m) => {
429            let pre = m.pre_effects().count();
430            let neg = m.neg_fields().count();
431            let post = m.post_effects().count();
432            let succ = m.succ_count();
433            let pred = if m.has_predicate() { 2 } else { 0 };
434            let slots = pre + neg + post + pred + succ;
435
436            if pre == 0 && neg == 0 && post == 0 && pred == 0 && succ <= 1 {
437                1 // Match8
438            } else if slots <= 4 {
439                2 // Match16
440            } else if slots <= 8 {
441                3 // Match24
442            } else if slots <= 12 {
443                4 // Match32
444            } else if slots <= 20 {
445                6 // Match48
446            } else {
447                8 // Match64
448            }
449        }
450        Instruction::Call(_) | Instruction::Return(_) | Instruction::Trampoline(_) => 1,
451    }
452}
453
454fn format_instruction(
455    step: u16,
456    instr: &Instruction,
457    module: &Module,
458    ctx: &DumpContext,
459    step_width: usize,
460) -> String {
461    match instr {
462        Instruction::Match(m) => format_match(step, m, module, ctx, step_width),
463        Instruction::Call(c) => format_call(step, c, module, ctx, step_width),
464        Instruction::Return(r) => format_return(step, r, module, ctx, step_width),
465        Instruction::Trampoline(t) => format_trampoline(step, t, ctx, step_width),
466    }
467}
468
469fn format_match(
470    step: u16,
471    m: &Match,
472    module: &Module,
473    ctx: &DumpContext,
474    step_width: usize,
475) -> String {
476    let builder = LineBuilder::new(step_width);
477    let symbol = nav_symbol(m.nav);
478    let prefix = format!("  {:0sw$} {} ", step, symbol.format(), sw = step_width);
479
480    let content = format_match_content(m, module, ctx);
481    let successors = format_match_successors(m, ctx, step_width);
482
483    let base = format!("{prefix}{content}");
484    builder.pad_successors(base, &successors)
485}
486
487fn format_match_content(m: &Match, module: &Module, ctx: &DumpContext) -> String {
488    let mut parts = Vec::new();
489
490    let pre: Vec<_> = m.pre_effects().map(|e| format_effect(&e)).collect();
491    if !pre.is_empty() {
492        parts.push(format!("[{}]", pre.join(" ")));
493    }
494
495    // Skip neg_fields and node pattern for epsilon (no node interaction)
496    if !m.is_epsilon() {
497        for field_id in m.neg_fields() {
498            let name = ctx
499                .node_field_name(field_id)
500                .map(String::from)
501                .unwrap_or_else(|| format!("field#{field_id}"));
502            parts.push(format!("-{name}"));
503        }
504
505        let node_part = format_node_pattern(m, ctx);
506        if !node_part.is_empty() {
507            parts.push(node_part);
508        }
509
510        // Format predicate if present
511        if let Some((op, is_regex, value_ref)) = m.predicate() {
512            let op = PredicateOp::from_byte(op);
513            let value = if is_regex {
514                let string_id = module.regexes().get_string_id(value_ref as usize);
515                let pattern = &ctx.all_strings[string_id.get() as usize];
516                format!("/{}/", pattern)
517            } else {
518                let s = &ctx.all_strings[value_ref as usize];
519                format!("{:?}", s)
520            };
521            parts.push(format!("{} {}", op.as_str(), value));
522        }
523    }
524
525    let post: Vec<_> = m.post_effects().map(|e| format_effect(&e)).collect();
526    if !post.is_empty() {
527        parts.push(format!("[{}]", post.join(" ")));
528    }
529
530    parts.join(" ")
531}
532
533/// Format node pattern: `field: (type)` or `(type)` or `field: _` or `(_)` or `"text"`
534fn format_node_pattern(m: &Match, ctx: &DumpContext) -> String {
535    let mut result = String::new();
536
537    if let Some(field_id) = m.node_field {
538        let name = ctx
539            .node_field_name(field_id.get())
540            .map(String::from)
541            .unwrap_or_else(|| format!("field#{}", field_id.get()));
542        result.push_str(&name);
543        result.push_str(": ");
544    }
545
546    match m.node_type {
547        NodeTypeIR::Any => {
548            // Any node wildcard: `_`
549            result.push('_');
550        }
551        NodeTypeIR::Named(None) => {
552            // Named wildcard: any named node
553            result.push_str("(_)");
554        }
555        NodeTypeIR::Named(Some(type_id)) => {
556            // Specific named node type
557            let name = ctx
558                .node_type_name(type_id.get())
559                .map(String::from)
560                .unwrap_or_else(|| format!("node#{}", type_id.get()));
561            result.push('(');
562            result.push_str(&name);
563            result.push(')');
564        }
565        NodeTypeIR::Anonymous(None) => {
566            // Anonymous wildcard: any anonymous node (future syntax)
567            result.push_str("\"_\"");
568        }
569        NodeTypeIR::Anonymous(Some(type_id)) => {
570            // Specific anonymous node (literal token)
571            let name = ctx
572                .node_type_name(type_id.get())
573                .map(String::from)
574                .unwrap_or_else(|| format!("anon#{}", type_id.get()));
575            result.push('"');
576            result.push_str(&name);
577            result.push('"');
578        }
579    }
580
581    result
582}
583
584fn format_match_successors(m: &Match, ctx: &DumpContext, step_width: usize) -> String {
585    if m.is_terminal() {
586        "◼".to_string()
587    } else {
588        m.successors()
589            .map(|s| format_step(s, ctx, step_width))
590            .collect::<Vec<_>>()
591            .join(", ")
592    }
593}
594
595fn format_call(
596    step: u16,
597    call: &Call,
598    _module: &Module,
599    ctx: &DumpContext,
600    step_width: usize,
601) -> String {
602    let c = &ctx.colors;
603    let builder = LineBuilder::new(step_width);
604    let symbol = nav_symbol(call.nav());
605    let prefix = format!("  {:0sw$} {} ", step, symbol.format(), sw = step_width);
606
607    // Format field constraint if present
608    let field_part = if let Some(field_id) = call.node_field {
609        let name = ctx
610            .node_field_name(field_id.get())
611            .map(String::from)
612            .unwrap_or_else(|| format!("field#{}", field_id.get()));
613        format!("{name}: ")
614    } else {
615        String::new()
616    };
617
618    let target_name = ctx
619        .label_for(call.target)
620        .map(String::from)
621        .unwrap_or_else(|| format!("@{:0w$}", call.target.0, w = step_width));
622    // Definition name in call is blue
623    let content = format!("{field_part}({}{}{})", c.blue, target_name, c.reset);
624    // Format as "target : return" with numeric IDs
625    let successors = format!(
626        "{:0w$} : {:0w$}",
627        call.target.get(),
628        call.next.get(),
629        w = step_width
630    );
631
632    let base = format!("{prefix}{content}");
633    builder.pad_successors(base, &successors)
634}
635
636fn format_return(
637    step: u16,
638    _r: &Return,
639    _module: &Module,
640    _ctx: &DumpContext,
641    step_width: usize,
642) -> String {
643    let builder = LineBuilder::new(step_width);
644    let prefix = format!(
645        "  {:0sw$} {} ",
646        step,
647        Symbol::EMPTY.format(),
648        sw = step_width
649    );
650    builder.pad_successors(prefix, "▶")
651}
652
653fn format_trampoline(step: u16, t: &Trampoline, _ctx: &DumpContext, step_width: usize) -> String {
654    let builder = LineBuilder::new(step_width);
655    let prefix = format!(
656        "  {:0sw$} {} ",
657        step,
658        Symbol::EMPTY.format(),
659        sw = step_width
660    );
661    let content = "Trampoline";
662    let successors = format!("{:0w$}", t.next.get(), w = step_width);
663    let base = format!("{prefix}{content}");
664    builder.pad_successors(base, &successors)
665}
666
667/// Format a step ID, showing entrypoint label or numeric ID.
668fn format_step(step: StepId, ctx: &DumpContext, step_width: usize) -> String {
669    let c = &ctx.colors;
670    if let Some(label) = ctx.label_for(step) {
671        format!("▶({}{}{})", c.blue, label, c.reset)
672    } else {
673        format!("{:0w$}", step.get(), w = step_width)
674    }
675}