Skip to main content

ud_ast/
types.rs

1//! AST types for `.ud`.
2
3/// A complete `.ud` file: a `@module { … }` header followed by zero
4/// or more top-level items.
5#[derive(Debug, Clone, PartialEq, Eq)]
6pub struct UdFile {
7    pub module: Module,
8    pub items: Vec<Item>,
9}
10
11/// The `@module { … }` block at the top of every file.
12///
13/// `fields` is an ordered list — order is significant for round-trip
14/// (the pretty-printer emits in this order).
15#[derive(Debug, Clone, PartialEq, Eq)]
16pub struct Module {
17    pub fields: Vec<Field>,
18}
19
20/// One `name: value` entry inside a `@module` or nested block.
21#[derive(Debug, Clone, PartialEq, Eq)]
22pub struct Field {
23    pub name: String,
24    pub value: Value,
25}
26
27/// A value that can appear on the right-hand side of a `Field`.
28#[derive(Debug, Clone, PartialEq, Eq)]
29pub enum Value {
30    /// A double-quoted string. Storage is the unescaped form.
31    String(String),
32    /// An integer literal. Always emitted in hex with the `0x` prefix
33    /// for now (decimal also accepted on parse).
34    Int(u64),
35    /// A bracketed list of values: `[v1, v2, …]`.
36    List(Vec<Value>),
37    /// A nested block: `{ name: value, … }`.
38    Block(Vec<Field>),
39}
40
41/// A `#[key=value]` annotation. Attributes live on structural
42/// elements (functions, conditionals, …) and carry metadata that is
43/// either:
44///
45/// * **Informational**: hints for the reader / downstream tooling
46///   (`#[compiler="msvc15"]`, `#[abi="stdcall"]`) — they don't change
47///   the lower-path output.
48/// * **Load-bearing**: bytes / decisions that the lower path
49///   consumes (`#[head_bytes=[…]]` on a separated cmp/jcc `if`, so
50///   the cmp bytes land at the right offset relative to the
51///   intervening statements).
52///
53/// Round-trip rule: attributes round-trip verbatim. The `@module`
54/// header's optional `defaults: { … }` block can shadow any attribute
55/// here; the emitter omits an attribute when it equals the module
56/// default, the parser supplies the default when an attribute is
57/// missing.
58#[derive(Debug, Clone, PartialEq, Eq)]
59pub struct Attribute {
60    pub key: String,
61    pub value: AttrValue,
62}
63
64/// Right-hand side of an attribute. Kept small on purpose — every
65/// new variant has to round-trip through emit + parse + lower.
66#[derive(Debug, Clone, PartialEq, Eq)]
67pub enum AttrValue {
68    /// `"…"` — quoted string.
69    String(String),
70    /// `0x…` or decimal integer.
71    Int(u64),
72    /// `[0x01, 0x02, …]` — used by `head_bytes` and friends.
73    ByteList(Vec<u8>),
74    /// Bare flag, e.g. `#[naked]` — no `=value` part. Renders
75    /// as just the key name; parses from any attribute that
76    /// omits the `=` sign.
77    Flag,
78}
79
80/// An item in the file: at the top level, or nested inside an
81/// [`Item::Section`].
82#[derive(Debug, Clone, PartialEq, Eq)]
83pub enum Item {
84    /// Free-floating `// …` line. Preserved on emit so structural
85    /// notes survive parse → re-emit.
86    Comment(String),
87
88    /// A function declaration.
89    Function(FnDecl),
90
91    /// `@raw(0x…, [bytes])` — pin a slice of bytes at a virtual address.
92    /// Used by the decompiler to fill the gaps between functions
93    /// (alignment padding) and to capture the content of non-executable
94    /// sections (`.rodata`, `.data`, etc.).
95    Raw { addr: u64, bytes: Vec<u8> },
96
97    /// `@strings(0x…, ["a", "b", …])` — a packed null-terminated
98    /// string table. Lowers to each entry's UTF-8 bytes followed by
99    /// a single 0x00 terminator, in order. Used for ELF `SHT_STRTAB`
100    /// sections (`.dynstr`, `.strtab`, `.shstrtab`) and for any
101    /// well-known single-string sections like `.interp` (which is
102    /// emitted as a one-entry list).
103    Strings { addr: u64, strings: Vec<String> },
104
105    /// `@notes(0x…, [{ type: …, name: "…", desc: [bytes] }, …])` — an
106    /// ELF note section. Each entry has a 12-byte `Elf64_Nhdr`
107    /// header (name_size, desc_size, type), a name padded to a 4-byte
108    /// boundary, and a desc padded to a 4-byte boundary. Used for
109    /// `SHT_NOTE` sections (`.note.gnu.property`, `.note.ABI-tag`,
110    /// `.note.gnu.build-id`, …).
111    Notes { addr: u64, entries: Vec<NoteEntry> },
112
113    /// `@section("name", 0x…) { items… }` — group items under an ELF
114    /// section. The section's start address must equal the first
115    /// nested item's address; items are required to cover the section
116    /// contiguously (no gaps) for [`lower`](crate) to succeed.
117    Section {
118        name: String,
119        addr: u64,
120        items: Vec<Item>,
121    },
122
123    /// `@jump_table(0x…, dispatch="…") { case_0: label_<addr>; … }` —
124    /// a structured switch jump table. Each entry names a case index
125    /// and the address it dispatches to; the `dispatch` string tags
126    /// the encoding kind (e.g. `"gcc_pie_rel32"`, `"msvc_va32"`) so
127    /// lower knows whether to emit 4-byte signed offsets relative to
128    /// the table base, absolute 32-bit VAs, or some other layout.
129    /// Replaces the `@raw` byte run a jump table would otherwise
130    /// occupy in `.rodata`, recovering the symbolic intent of the
131    /// dispatch.
132    JumpTable {
133        addr: u64,
134        dispatch: String,
135        entries: Vec<JumpTableEntry>,
136    },
137}
138
139/// One entry inside an [`Item::JumpTable`] block: a case index and
140/// the address it dispatches to. The case ordering is the encoded
141/// table order — entries lower in source-text order render at
142/// `(addr + i * entry_size)`.
143#[derive(Debug, Clone, PartialEq, Eq)]
144pub struct JumpTableEntry {
145    /// Case index — `0`, `1`, … for dense tables; sparse tables
146    /// preserve gaps via case numbers that aren't strictly
147    /// contiguous (rare in practice — most compilers normalise to
148    /// a dense table with a `default` arm).
149    pub case: u64,
150    /// Target address the case dispatches to. Renders as
151    /// `label_<addr:x>` in source text — the same label name a
152    /// `Stmt::Goto` would produce.
153    pub target: u64,
154}
155
156/// One entry inside an [`Item::Notes`] block. Mirrors the structure
157/// of an ELF note (`Elf64_Nhdr` + name + desc, each padded to a
158/// 4-byte boundary).
159#[derive(Debug, Clone, PartialEq, Eq)]
160pub struct NoteEntry {
161    /// Note type (`NT_GNU_PROPERTY_TYPE_0`, `NT_GNU_BUILD_ID`, …).
162    pub note_type: u32,
163    /// Owner string (`"GNU"` for GNU notes, etc.). Encoded with a
164    /// trailing NUL byte then padded to a 4-byte boundary.
165    pub name: String,
166    /// Descriptor bytes — opaque payload, padded to a 4-byte
167    /// boundary on emit.
168    pub desc: Vec<u8>,
169}
170
171/// A function declaration.
172///
173/// `signature` carries typed parameters and return type when known
174/// (e.g. recovered from DWARF). When absent, the function emits as
175/// `fn name() { … }` and behaves as untyped.
176#[derive(Debug, Clone, PartialEq, Eq)]
177pub struct FnDecl {
178    /// Optional `@addr(0x…)` directive preceding `fn`. Required for
179    /// functions whose name doesn't encode the address (i.e. anything
180    /// not matching `sub_<hex>`); the decompiler emits it always for
181    /// clarity.
182    pub addr: Option<u64>,
183    pub name: String,
184    /// `#[…]` attributes attached to the `fn` keyword. Carry per-
185    /// function profile info (`abi`, `cc`, `saves`, …); module-level
186    /// `defaults` in the `@module` header can shadow these.
187    pub attrs: Vec<Attribute>,
188    /// Typed parameters and return type, when known.
189    pub signature: Option<Signature>,
190    /// Variable / register declarations at the top of the function
191    /// body. Stack slots discovered from `[ebp±N]` accesses get a
192    /// `Stack` decl; registers the function touches get a `Register`
193    /// decl. Purely informational today (the prologue's pinned bytes
194    /// already encode the actual stack allocation); future work can
195    /// use the size hints to drive lowering of a re-allocated frame.
196    pub locals: Vec<LocalDecl>,
197    pub body: Vec<Stmt>,
198}
199
200/// One `let name: type;` entry at the head of a function body.
201///
202/// Kinds:
203/// * **Stack** — `let var_4: u32;` — backed by a stack slot at
204///   `[ebp-4]` (the name carries the offset as its hex suffix).
205/// * **Register** — `let eax: u32 @reg;` — backed by a CPU
206///   register; the name is the canonical x86 register mnemonic.
207///
208/// The type captures the largest access size seen at the slot /
209/// register in the function. Multiple-width accesses (`mov al,
210/// [ebp-1]; mov dword ptr [ebp-4], …`) pick the widest.
211#[derive(Debug, Clone, PartialEq, Eq)]
212pub struct LocalDecl {
213    pub name: String,
214    pub ty: Type,
215    pub kind: LocalKind,
216}
217
218#[derive(Debug, Clone, Copy, PartialEq, Eq)]
219pub enum LocalKind {
220    Stack,
221    Register,
222}
223
224/// A function signature: parameter list + return type.
225#[derive(Debug, Clone, PartialEq, Eq)]
226pub struct Signature {
227    pub params: Vec<Param>,
228    pub return_type: Type,
229}
230
231/// One typed parameter in a function signature.
232///
233/// `location` carries the calling-convention slot the value is
234/// passed in — for the 6502 backend this is a register name like
235/// `"A"` / `"X"` / `"Y"`. When `Some`, the parameter renders as
236/// `name: ty @LOC`. When `None`, just `name: ty`.
237#[derive(Debug, Clone, PartialEq, Eq)]
238pub struct Param {
239    pub name: String,
240    pub ty: Type,
241    pub location: Option<String>,
242}
243
244/// A type expressible in `.ud` source.
245///
246/// v0 covers C-like primitives plus single-level pointer wrapping.
247/// Anything we can't recover (composite types, qualifiers, function
248/// pointers) lands as [`Type::Unknown`], which the parser still
249/// accepts so the round-trip closes.
250#[derive(Debug, Clone, PartialEq, Eq)]
251pub enum Type {
252    Void,
253    I8,
254    I16,
255    I32,
256    I64,
257    U8,
258    U16,
259    U32,
260    U64,
261    F32,
262    F64,
263    Bool,
264    Char,
265    /// `ptr<T>` — pointer to `T`.
266    Pointer(Box<Type>),
267    /// A type the source language can't yet express. Round-trips
268    /// verbatim as the literal token `unknown`.
269    Unknown,
270}
271
272/// Structured breakdown of a function prologue. Lets the source
273/// language carry semantic information (which registers got
274/// saved, whether a frame was set up, how much stack the function
275/// reserves, whether CET protection is on) instead of an opaque
276/// byte blob.
277///
278/// Used by the emitter to render
279/// `@prologue(saves: [ebx, esi, edi], frame, sub: 0x40)` style
280/// directives that drop the byte list because the parser can
281/// regenerate identical bytes via the arch's prologue codec.
282#[derive(Debug, Clone, Default, PartialEq, Eq)]
283pub struct PrologueParams {
284    /// Callee-saved registers pushed before the frame setup, in
285    /// push order. Lowercase canonical names (`"ebx"`, `"esi"`,
286    /// `"r12"`, …).
287    pub saves: Vec<String>,
288    /// Callee-saved registers pushed AFTER the frame setup
289    /// (MSVC i386 idiom). Same naming.
290    pub saves_after: Vec<String>,
291    /// True when the prologue includes `push ebp; mov ebp, esp`
292    /// (or the 64-bit variant).
293    pub frame: bool,
294    /// Stack reservation in bytes (`sub esp, IMM`). Zero when
295    /// the function has no stack locals beyond saves.
296    pub sub_esp: u32,
297    /// True when the prologue starts with `endbr32` / `endbr64`
298    /// (Intel CET indirect-branch landing pad).
299    pub cf_protect: bool,
300    /// Frame-setup encoding selector: `false` for the MSVC RM
301    /// form (`mov ebp, esp` as `0x8b 0xec`), `true` for the GCC
302    /// MR form (`0x89 0xe5`). Only meaningful when `frame` is
303    /// true; the codec uses it to re-emit byte-identical
304    /// instructions for either compiler.
305    pub frame_alt: bool,
306}
307
308/// Structured breakdown of a function epilogue. Mirrors
309/// [`PrologueParams`].
310#[derive(Debug, Clone, Default, PartialEq, Eq)]
311pub struct EpilogueParams {
312    /// Callee-saved registers popped, in pop order (typically
313    /// the reverse of the prologue's push order).
314    pub saves: Vec<String>,
315    /// True when the epilogue uses `leave` (atomic
316    /// `mov esp, ebp; pop ebp`).
317    pub leave: bool,
318    /// True when the epilogue pops the frame pointer with an
319    /// explicit `pop ebp` (after the named saves).
320    pub pop_frame: bool,
321    /// Stack adjustment via `add esp, IMM` before `ret`. Zero
322    /// when absent.
323    pub add_esp: u32,
324    /// Immediate operand of `ret` (callee-cleanup amount).
325    /// Zero for cdecl.
326    pub ret_imm: u16,
327}
328
329/// A statement inside a function body.
330#[derive(Debug, Clone, PartialEq, Eq)]
331pub enum Stmt {
332    /// `@asm("text")` or `@asm("text", [bytes])` — an instruction.
333    ///
334    /// `text` is the human-readable assembly. `bytes` pins the exact
335    /// encoded bytes; when non-empty, it's the ground truth for
336    /// recompilation and the assembler's job is to verify that
337    /// assembling `text` produces matching bytes (with directive-pinned
338    /// encoding choices, when those land).
339    ///
340    /// `bytes` may be empty: a future assembler will then derive them
341    /// from the text alone. v0 always populates `bytes` because we
342    /// don't yet ship a text assembler that produces byte-identical
343    /// output for non-canonical encodings.
344    Asm { text: String, bytes: Vec<u8> },
345
346    /// `// …` line. Used by the decompiler to surface block boundaries
347    /// and direct-branch targets without committing to a structural
348    /// syntax for them yet.
349    Comment(String),
350
351    /// `@return(value, [bytes])` — a recognised return-with-literal
352    /// pattern at the tail of a function. Lifted from sequences like
353    /// `mov eax, N; [pop rbp;] ret` or `xor eax, eax; [pop rbp;] ret`.
354    /// `bytes` carries every encoded byte of those instructions
355    /// concatenated, so the lower path just emits the bytes.
356    Return { value: u64, bytes: Vec<u8> },
357
358    /// `@prologue("kind", [bytes])` — a recognised function prologue,
359    /// typically `endbr64; push rbp; mov rbp, rsp; sub rsp, IMM` or
360    /// a close variant. `kind` is a descriptive label
361    /// (`"std"` / `"std-no-cf"` / `"std-noframe"`); `bytes` carries
362    /// every encoded byte for round-trip.
363    ///
364    /// `params` carries the structured breakdown (saves list,
365    /// frame flag, sub_esp value, cf_protect) when the prologue's
366    /// bytes round-trip through the canonical codec. Lets the
367    /// emitter render `@prologue(saves: [ebx, esi, edi], frame,
368    /// sub: 0x40)` without the byte list. Empty for handwritten
369    /// or non-canonical prologues where bytes are the source of
370    /// truth.
371    Prologue {
372        kind: String,
373        params: Option<PrologueParams>,
374        bytes: Vec<u8>,
375    },
376
377    /// `@epilogue("kind", [bytes])` — a recognised function epilogue,
378    /// typically `leave; ret` or `pop rbp; ret`. Used at the tail of
379    /// the last block when no [`Stmt::Return`] consumed those bytes
380    /// (e.g. the return value was computed in an earlier block).
381    Epilogue {
382        kind: String,
383        params: Option<EpilogueParams>,
384        bytes: Vec<u8>,
385    },
386
387    /// `@save("REG", [bytes])` — a mid-function callee-saved register
388    /// save. Pairs LIFO with a matching [`Stmt::Restore`] elsewhere in
389    /// the body; together they bracket a region where the function
390    /// borrows an extra register the prologue didn't reserve. Bytes
391    /// are exactly the `push REG` encoding.
392    Save { reg: String, bytes: Vec<u8> },
393
394    /// `@restore("REG", [bytes])` — the matching restore for a prior
395    /// [`Stmt::Save`]. Bytes are exactly the `pop REG` encoding.
396    Restore { reg: String, bytes: Vec<u8> },
397
398    /// `@if_return("cond", "value", [bytes])` — an early-return
399    /// pattern: a `test/cmp + jcc` whose taken target is a
400    /// return-shaped block elsewhere in the function. The bytes
401    /// are the original cmp/test + jcc encoding; the actual return
402    /// happens at the target block (whose bytes remain in place).
403    /// Renders as `if (cond) return value;` to convey the intent
404    /// even though the jcc semantically transfers control to a
405    /// shared cleanup tail.
406    ///
407    /// `value` is the literal/expression the target block returns,
408    /// when statically known; empty when the target's return value
409    /// can't be folded.
410    ///
411    /// Same shape as `IfGoto`: the jcc tail re-encodes from
412    /// the target's *implicit* address (the return-block's
413    /// position, captured at decompile time via the cmp-bytes
414    /// length + jcc rel resolution). `cmp_bytes` stays pinned
415    /// until the text assembler.
416    IfReturn {
417        cond_text: String,
418        value_text: String,
419        target_addr: u64,
420        cmp_bytes: Vec<u8>,
421        cond_code: u8,
422        wide: bool,
423    },
424
425    /// `label_XXXX:` — a zero-byte marker for a jump target. The
426    /// `addr` is the run-time virtual address the label represents
427    /// (rendered as `label_<hex>`). Labels carry no bytes; they
428    /// occupy a position in the source so a [`Stmt::Goto`] or
429    /// [`Stmt::IfGoto`] elsewhere in the function can point at
430    /// them by name. Round-trip neutral.
431    Label { addr: u64 },
432
433    /// `goto label_XXXX;` (or `goto label_XXXX #[wide];`) — an
434    /// unconditional `jmp` to a label somewhere in the function
435    /// body. No pinned bytes: the lower path picks the encoding
436    /// from `target_addr`, the cursor position, and the `wide`
437    /// flag:
438    ///
439    /// * `wide=false` and the displacement fits in `i8`:
440    ///   `jmp rel8` (2 bytes).
441    /// * otherwise: `jmp rel32` (5 bytes).
442    ///
443    /// The `wide` flag captures encoding choices the compiler
444    /// made that don't follow the "always shortest" rule —
445    /// occasional, but real (some MSVC paths emit `jmp rel32`
446    /// even when `jmp rel8` would fit). Editing the function so
447    /// a label moves auto-promotes `wide=false` → `wide=true`
448    /// when the displacement no longer fits in `i8`.
449    Goto { target_addr: u64, wide: bool },
450
451    /// `if (cond) goto label_XXXX;` — a conditional jump folded
452    /// from `cmp/test …; jcc …`. The jcc tail is no longer
453    /// pinned in source: the lower path re-encodes
454    /// `jcc rel8/rel32` from `target_addr`, `cond_code`, and
455    /// `wide`. `cmp_bytes` carries the cmp/test prefix (empty
456    /// when the source is a bare flag check); it stays pinned
457    /// until the text-assembler can re-encode it from
458    /// `cond_text`.
459    ///
460    /// Editing a label so its position changes flows through to
461    /// the rebuilt binary. Editing `cmp_bytes` and `cond_text`
462    /// without keeping them consistent is the user's job until
463    /// the assembler lands.
464    IfGoto {
465        cond_text: String,
466        target_addr: u64,
467        cmp_bytes: Vec<u8>,
468        cond_code: u8,
469        wide: bool,
470    },
471
472    /// `switch (selector) #[dispatch="…", table_va=…] { case N: goto … }`
473    /// — a structured switch whose dispatch bytes are *not* pinned
474    /// to the source. The lower path regenerates `cmp REG,MAX; ja
475    /// DEFAULT; jmp dword ptr [REG*4+TABLE_VA]` from the structured
476    /// fields, validating that the case/default/selector data
477    /// re-encodes to a correct dispatch sequence.
478    ///
479    /// `dispatch` names the encoding shape (currently only
480    /// `"msvc-jmp-table"` is recognised). `table_va` is the
481    /// absolute address of the jump-table data the indirect jmp
482    /// reads — the table contents themselves still ride in a
483    /// `@raw` block under the appropriate data section.
484    ///
485    /// Editing the source is the whole point: adding a case here,
486    /// changing `default_addr`, or renaming the selector all flow
487    /// through to the rebuilt binary via the lower-side encoder,
488    /// without any pinned bytes to silently invalidate.
489    Switch {
490        selector: String,
491        cases: Vec<u64>,
492        default_addr: u64,
493        dispatch: String,
494        table_va: u64,
495    },
496
497    /// `@seh_install([bytes])` — MSVC's Structured Exception
498    /// Handling frame install: `mov fs:[0], esp` after pushing
499    /// the handler-frame fields. Bytes are exactly the
500    /// `mov fs:[0], esp` encoding (7 bytes on x86-32).
501    SehInstall { bytes: Vec<u8> },
502
503    /// `@seh_restore([bytes])` — pops the SEH chain back to the
504    /// previously installed handler. Bytes encode
505    /// `mov reg, [ebp-N]; mov fs:[0], reg` (or similar pop
506    /// sequence). Pairs LIFO with a prior `Stmt::SehInstall`.
507    SehRestore { bytes: Vec<u8> },
508
509    /// `@return_expr("text", [bytes])` — a recognised
510    /// "compute-a-value-and-fall-through-to-the-epilogue" block whose
511    /// contents have been lifted into a single human-readable
512    /// expression. The expression text is informational; the pinned
513    /// bytes are the lower path's source of truth, so the original
514    /// instruction stream re-emits exactly even if the expression is
515    /// edited.
516    ReturnExpr { text: String, bytes: Vec<u8> },
517
518    /// `@arg_spill(N, [bytes])` — a recognised SysV-x64 argument
519    /// spill: `mov [rbp+disp], REG_N` where `REG_N` is the integer or
520    /// XMM register holding argument `N` at function entry. The slot
521    /// displacement is recoverable from the pinned bytes, so it
522    /// doesn't appear in the directive shape.
523    ArgSpill { arg_index: u32, bytes: Vec<u8> },
524
525    /// `@call("name", [args], [bytes])` — a recognised direct-call
526    /// site whose preceding `mov reg, …` / `lea reg, …` instructions
527    /// have been folded into the args list. Each arg is a
528    /// human-readable rendering (string literal, integer constant,
529    /// global address, `&function` reference, or `result` for a
530    /// previous call's return value); the pinned bytes cover both
531    /// `name(args)` — a function call (direct or indirect).
532    ///
533    /// `bytes` pins the arg-setup prefix (pushes, movs, etc.).
534    /// For **indirect** calls (`call dword ptr [imm]` etc.) the
535    /// call instruction itself rides at the end of `bytes`
536    /// because we don't yet re-encode arbitrary memory operands.
537    ///
538    /// For **direct** calls (`call rel32`) the trailing 5 bytes
539    /// are stripped from `bytes` and `direct_target` carries the
540    /// callee's IP. The lower path encodes `call rel32` against
541    /// the current cursor + `direct_target`, so editing a
542    /// function's position automatically re-resolves every
543    /// caller's relative offset.
544    Call {
545        name: String,
546        args: Vec<String>,
547        bytes: Vec<u8>,
548        direct_target: Option<u64>,
549    },
550
551    /// A structured `cmp/test + jcc` head plus its branches:
552    ///
553    /// ```text
554    /// @if_branch("cond text", [cond bytes]) {
555    ///     @then { …fallthrough body… }
556    ///     @else { …taken body… }   // optional
557    /// }
558    /// ```
559    ///
560    /// `else_body == None` means the source-language `if` has no
561    /// `else` clause — the jcc-taken side jumps directly to whatever
562    /// code follows the `@if_branch` in source order. With `Some`,
563    /// both arms are real branches that converge somewhere later.
564    ///
565    /// Bytes layout, exactly preserved on lower (in source order):
566    ///
567    /// * `attrs["head_bytes"]` if present (the cmp/test bytes that
568    ///   live *before* the intervening insns the compiler reordered
569    ///   between the comparison and the conditional branch),
570    /// * `pre_body` statement bytes (the "intervening" insns
571    ///   between cmp and jcc — empty for the adjacent-cmp case),
572    /// * `cond_bytes` (the jcc when there's `head_bytes`; the full
573    ///   cmp+jcc when there isn't),
574    /// * `then_body` statement bytes,
575    /// * `else_body` statement bytes if present.
576    IfBranch {
577        cond_text: String,
578        cond_bytes: Vec<u8>,
579        /// Free-form metadata. Recognised keys today: `head_bytes`
580        /// (load-bearing — see byte layout above).
581        attrs: Vec<Attribute>,
582        /// Statements that fall between the cmp/test and the jcc in
583        /// the original instruction stream. Empty for adjacent cmp +
584        /// jcc (the common case) — the field exists for the
585        /// separated-by-flag-preserving-insns case.
586        pre_body: Vec<Stmt>,
587        then_body: Vec<Stmt>,
588        else_body: Option<Vec<Stmt>>,
589    },
590
591    /// `@local_set(slot, value, [bytes])` — a recognised
592    /// `mov dword/qword ptr [rbp+disp], IMM` (or analogous on i386
593    /// `[ebp+disp]`) where the destination is a stack-frame local.
594    /// Lifts the common "initialise a local with a literal" pattern.
595    /// `slot` is the signed displacement from the frame pointer
596    /// (e.g. `-8` for `[rbp-8]`); `value` is the immediate, signed.
597    LocalSet {
598        slot: i64,
599        value: i64,
600        bytes: Vec<u8>,
601    },
602
603    /// `@local_arith(slot, op, value, [bytes])` — a recognised
604    /// `add/sub dword/qword ptr [rbp+disp], IMM` pattern. Lifts
605    /// the loop-counter / accumulator-update idiom.
606    /// `op` is the arithmetic operation (`"+="` or `"-="`); `value`
607    /// is the immediate, signed.
608    LocalArith {
609        slot: i64,
610        op: String,
611        value: i64,
612        bytes: Vec<u8>,
613    },
614
615    /// `@local_compound(dst, op, src, [bytes])` — a multi-instruction
616    /// pattern of the shape `[rbp+dst] op= [rbp+src]`. Either:
617    ///
618    /// * 2-insn form: `mov reg, [rbp+src]; <op> [rbp+dst], reg`
619    ///   for ops with a memory-destination form (add, sub, and, or, xor),
620    /// * 3-insn form: `mov reg, [rbp+dst]; <op> reg, [rbp+src];
621    ///   mov [rbp+dst], reg` for ops without one (imul).
622    ///
623    /// The pinned `bytes` cover the whole sequence; the lower path
624    /// re-emits them verbatim.
625    LocalCompound {
626        dst: i64,
627        op: String,
628        src: i64,
629        bytes: Vec<u8>,
630    },
631
632    /// `@move("dst", "src", [bytes])` — an arch-agnostic
633    /// "dst := src" data move whose lowering is pinned by `bytes`.
634    /// The 6502 decompiler emits this for `LDA src; STA dst` pairs;
635    /// the `dst` and `src` strings are operand text from the
636    /// instruction stream (e.g. `"IN,Y"` and `"KBD"`).
637    ///
638    /// Round-trip: the source-language text is purely informational,
639    /// `bytes` is what the lower path emits.
640    Move {
641        dst: String,
642        src: String,
643        bytes: Vec<u8>,
644    },
645
646    /// `@inc16("lo", "hi", [bytes])` — a 16-bit increment composed
647    /// of `INC lo; BNE +2; INC hi` (with the `BNE` skipping the
648    /// high-byte INC unless the low byte just rolled over). The
649    /// canonical 6502 idiom for advancing a 16-bit pointer.
650    Inc16 {
651        lo: String,
652        hi: String,
653        bytes: Vec<u8>,
654    },
655
656    /// A structured loop with the test at the bottom. Canonical
657    /// gcc -O0 shape:
658    ///
659    /// ```text
660    /// @loop(entry_jmp=[bytes], "cond text", [tail bytes]) {
661    ///     …body stmts…
662    /// }
663    /// ```
664    ///
665    /// Lifted from a CFG triple where:
666    ///
667    /// * a body block falls through to a tail block,
668    /// * the tail block ends with a conditional branch whose
669    ///   `taken` target is the body block (i.e. a back-edge),
670    ///
671    /// `entry_jmp_bytes` is the pre-header `jmp` that enters the
672    /// loop at the tail (gcc's "skip body on first iteration" idiom).
673    /// When detected, those bytes are folded into the directive so
674    /// no `@asm` line is left behind for them.
675    ///
676    /// Lower-path byte order: `entry_jmp_bytes` (if any) → `body`
677    /// bytes → `tail_bytes`. The `@loop` itself contributes nothing
678    /// before `entry_jmp_bytes` — its placement in the function body
679    /// determines where the bytes land.
680    Loop {
681        cond_text: String,
682        entry_jmp_bytes: Option<Vec<u8>>,
683        tail_bytes: Vec<u8>,
684        body: Vec<Stmt>,
685    },
686}
687
688impl Stmt {
689    /// Construct an [`Stmt::Asm`] with both text and pinned bytes.
690    #[must_use]
691    pub fn asm(text: impl Into<String>, bytes: Vec<u8>) -> Self {
692        Self::Asm {
693            text: text.into(),
694            bytes,
695        }
696    }
697
698    /// Construct an [`Stmt::Asm`] with text only (no bytes pinned).
699    /// Useful in tests; not used by the v0 decompiler.
700    #[must_use]
701    pub fn asm_text(text: impl Into<String>) -> Self {
702        Self::Asm {
703            text: text.into(),
704            bytes: Vec::new(),
705        }
706    }
707}